Run util/openssl-format-source on the Curve448 code

Reviewed-by: Bernd Edlinger <bernd.edlinger@hotmail.de> (Merged from https://github.com/openssl/openssl/pull/5105)
2025-05-06 06:29:42 +00:00 · 2017-12-04 11:38:58 +00:00 · 2017-12-04 11:38:58 +00:00 · 205fd63881
commit 205fd63881
parent 1308e022e1
29 changed files with 4140 additions and 3030 deletions
--- a/crypto/ec/curve448/arch_32/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_32/arch_intrinsics.h
@ -11,20 +11,21 @@
 */
 #ifndef __ARCH_ARCH_32_ARCH_INTRINSICS_H__
-#define __ARCH_ARCH_32_ARCH_INTRINSICS_H__
+# define __ARCH_ARCH_32_ARCH_INTRINSICS_H__
-#define ARCH_WORD_BITS 32
+# define ARCH_WORD_BITS 32
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-uint32_t word_is_zero(uint32_t a) {
+uint32_t word_is_zero(uint32_t a)
 {
    /* let's hope the compiler isn't clever enough to optimize this. */
-    return (((uint64_t)a)-1)>>32;
+    return (((uint64_t)a) - 1) >> 32;
 }
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-uint64_t widemul(uint32_t a, uint32_t b) {
+uint64_t widemul(uint32_t a, uint32_t b)
 {
    return ((uint64_t)a) * b;
 }
 #endif                          /* __ARCH_ARM_32_ARCH_INTRINSICS_H__ */
--- a/crypto/ec/curve448/arch_32/f_impl.c
+++ b/crypto/ec/curve448/arch_32/f_impl.c
@ -14,54 +14,51 @@
 #if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \
     || defined(DECAF_FORCE_UNROLL)
-#define REPEAT8(_x) _x _x _x _x _x _x _x _x
+# define REPEAT8(_x) _x _x _x _x _x _x _x _x
-#define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0)
+# define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0)
 #else
-#define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0)
+# define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0)
 #endif
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { 
+void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
 {
    const uint32_t *a = as->limb, *b = bs->limb;
    uint32_t *c = cs->limb;
    uint64_t accum0 = 0, accum1 = 0, accum2 = 0;
-    uint32_t mask = (1<<28) - 1;  
+    uint32_t mask = (1 << 28) - 1;
    uint32_t aa[8], bb[8];
-    int i,j;
+    int i, j;
-    for (i=0; i<8; i++) {
+    for (i = 0; i < 8; i++) {
-        aa[i] = a[i] + a[i+8];
+        aa[i] = a[i] + a[i + 8];
-        bb[i] = b[i] + b[i+8];
+        bb[i] = b[i] + b[i + 8];
    }
-    FOR_LIMB(j,0,8,{
+    FOR_LIMB(j, 0, 8, {
             accum2 = 0;
-    
+             FOR_LIMB(i, 0, j + 1, {
-        FOR_LIMB (i,0,j+1,{
+                      accum2 += widemul(a[j - i], b[i]);
-            accum2 += widemul(a[j-i],b[i]);
+                      accum1 += widemul(aa[j - i], bb[i]);
-            accum1 += widemul(aa[j-i],bb[i]);
+                      accum0 += widemul(a[8 + j - i], b[8 + i]);
-            accum0 += widemul(a[8+j-i], b[8+i]);
+                      }
-        });
+             ); accum1 -= accum2; accum0 += accum2;
        accum1 -= accum2;
        accum0 += accum2;
             accum2 = 0;
-    
+             FOR_LIMB(i, j + 1, 8, {
-        FOR_LIMB (i,j+1,8,{
+                      accum0 -=
-            accum0 -= widemul(a[8+j-i], b[i]);
+                      widemul(a[8 + j - i], b[i]);
-            accum2 += widemul(aa[8+j-i], bb[i]);
+                      accum2 +=
-            accum1 += widemul(a[16+j-i], b[8+i]);
+                      widemul(aa[8 + j - i],
-        });
+                              bb[i]);
-
+                      accum1 += widemul(a[16 + j - i], b[8 + i]);
                      }
             );
             accum1 += accum2;
             accum0 += accum2;
             c[j] = ((uint32_t)(accum0)) & mask;
-        c[j+8] = ((uint32_t)(accum1)) & mask;
+             c[j + 8] = ((uint32_t)(accum1)) & mask;
-
+             accum0 >>= 28; accum1 >>= 28;
        accum0 >>= 28;
        accum1 >>= 28;
             });
    accum0 += accum1;
@ -76,21 +73,20 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    c[1] += ((uint32_t)(accum1));
 }
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
+void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
 {
    const uint32_t *a = as->limb;
    uint32_t *c = cs->limb;
    uint64_t accum0 = 0, accum8 = 0;
-    uint32_t mask = (1<<28)-1;  
+    uint32_t mask = (1 << 28) - 1;
    int i;
-    assert(b<1<<28);
+    assert(b < 1 << 28);
    FOR_LIMB(i,0,8,{
        accum0 += widemul(b, a[i]);
        accum8 += widemul(b, a[i+8]);
    FOR_LIMB(i, 0, 8, {
             accum0 += widemul(b, a[i]); accum8 += widemul(b, a[i + 8]);
             c[i] = accum0 & mask; accum0 >>= 28;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+             c[i + 8] = accum8 & mask; accum8 >>= 28;
             });
    accum0 += accum8 + c[8];
@ -102,7 +98,7 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
    c[1] += accum8 >> 28;
 }
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+void gf_sqr(gf_s * __restrict__ cs, const gf as)
-    gf_mul(cs,as,as); /* Performs better with a dedicated square */
+{
    gf_mul(cs, as, as);         /* Performs better with a dedicated square */
 }
--- a/crypto/ec/curve448/arch_32/f_impl.h
+++ b/crypto/ec/curve448/arch_32/f_impl.h
@ -16,40 +16,43 @@
 #define LIMB_PLACE_VALUE(i) 28
-void gf_add_RAW (gf out, const gf a, const gf b) {
+void gf_add_RAW(gf out, const gf a, const gf b)
 {
    unsigned int i;
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+    for (i = 0; i < sizeof(*out) / sizeof(out->limb[0]); i++) {
        out->limb[i] = a->limb[i] + b->limb[i];
    }
 }
-void gf_sub_RAW (gf out, const gf a, const gf b) {
+void gf_sub_RAW(gf out, const gf a, const gf b)
 {
    unsigned int i;
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+    for (i = 0; i < sizeof(*out) / sizeof(out->limb[0]); i++) {
        out->limb[i] = a->limb[i] - b->limb[i];
    }
 }
-void gf_bias (gf a, int amt) {
+void gf_bias(gf a, int amt)
 {
    unsigned int i;
-    uint32_t co1 = ((1<<28)-1)*amt, co2 = co1-amt;
+    uint32_t co1 = ((1 << 28) - 1) * amt, co2 = co1 - amt;
-    for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
+    for (i = 0; i < sizeof(*a) / sizeof(a->limb[0]); i++) {
-        a->limb[i] += (i==sizeof(*a)/sizeof(a->limb[0])/2) ? co2 : co1;
+        a->limb[i] += (i == sizeof(*a) / sizeof(a->limb[0]) / 2) ? co2 : co1;
    }
 }
-void gf_weak_reduce (gf a) {
+void gf_weak_reduce(gf a)
-    uint32_t mask = (1<<28) - 1;
+{
    uint32_t mask = (1 << 28) - 1;
    uint32_t tmp = a->limb[15] >> 28;
    unsigned int i;
    a->limb[8] += tmp;
-    for (i=15; i>0; i--) {
+    for (i = 15; i > 0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>28);
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);
    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }
--- a/crypto/ec/curve448/arch_arm_32/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_arm_32/arch_intrinsics.h
@ -11,22 +11,26 @@
 */
 #ifndef __ARCH_ARM_32_ARCH_INTRINSICS_H__
-#define __ARCH_ARM_32_ARCH_INTRINSICS_H__
+# define __ARCH_ARM_32_ARCH_INTRINSICS_H__
-#define ARCH_WORD_BITS 32
+# define ARCH_WORD_BITS 32
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-uint32_t word_is_zero(uint32_t a) {
+uint32_t word_is_zero(uint32_t a)
 {
    uint32_t ret;
-    asm("subs %0, %1, #1;\n\tsbc %0, %0, %0" : "=r"(ret) : "r"(a) : "cc");
+ asm("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
    return ret;
 }
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-uint64_t widemul(uint32_t a, uint32_t b) {
+uint64_t widemul(uint32_t a, uint32_t b)
-    /* Could be UMULL, but it's hard to express to CC that the registers must be different */
+{
    /*
     * Could be UMULL, but it's hard to express to CC that the registers must
     * be different
     */
    return ((uint64_t)a) * b;
 }
 #endif                          /* __ARCH_ARM_32_ARCH_INTRINSICS_H__ */
--- a/crypto/ec/curve448/arch_arm_32/f_impl.c
+++ b/crypto/ec/curve448/arch_arm_32/f_impl.c
@ -12,100 +12,89 @@
 #include "f_field.h"
-static inline void __attribute__((gnu_inline,always_inline))
+static inline void __attribute__ ((gnu_inline, always_inline))
-smlal (
+    smlal(uint64_t *acc, const uint32_t a, const uint32_t b)
-    uint64_t *acc,
+{
    const uint32_t a,
    const uint32_t b
 ) {
 #ifdef  __ARMEL__
-    uint32_t lo = *acc, hi = (*acc)>>32;
+    uint32_t lo = *acc, hi = (*acc) >> 32;
-    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
+    __asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
-        : [lo]"+&r"(lo), [hi]"+&r"(hi)
+                         [hi] "+&r"(hi)
-        : [a]"r"(a), [b]"r"(b));
+                         :[a] "r"(a),[b] "r"(b));
-    *acc = lo + (((uint64_t)hi)<<32);
+    *acc = lo + (((uint64_t)hi) << 32);
 #else
-    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b;
+    *acc += (int64_t)(int32_t)a *(int64_t)(int32_t)b;
 #endif
 }
-static inline void __attribute__((gnu_inline,always_inline))
+static inline void __attribute__ ((gnu_inline, always_inline))
-smlal2 (
+    smlal2(uint64_t *acc, const uint32_t a, const uint32_t b)
-    uint64_t *acc,
+{
    const uint32_t a,
    const uint32_t b
 ) {
 #ifdef __ARMEL__
-    uint32_t lo = *acc, hi = (*acc)>>32;
+    uint32_t lo = *acc, hi = (*acc) >> 32;
-    __asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
+    __asm__ __volatile__("smlal %[lo], %[hi], %[a], %[b]":[lo] "+&r"(lo),
-        : [lo]"+&r"(lo), [hi]"+&r"(hi)
+                         [hi] "+&r"(hi)
-        : [a]"r"(a), [b]"r"(2*b));
+                         :[a] "r"(a),[b] "r"(2 * b));
-    *acc = lo + (((uint64_t)hi)<<32);
+    *acc = lo + (((uint64_t)hi) << 32);
 #else
-    *acc += (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
+    *acc += (int64_t)(int32_t)a *(int64_t)(int32_t)(b * 2);
 #endif
 }
-static inline void __attribute__((gnu_inline,always_inline))
+static inline void __attribute__ ((gnu_inline, always_inline))
-smull (
+    smull(uint64_t *acc, const uint32_t a, const uint32_t b)
-    uint64_t *acc,
+{
    const uint32_t a,
    const uint32_t b
 ) {
 #ifdef __ARMEL__
    uint32_t lo, hi;
-    __asm__ __volatile__ ("smull %[lo], %[hi], %[a], %[b]"
+    __asm__ __volatile__("smull %[lo], %[hi], %[a], %[b]":[lo] "=&r"(lo),
-        : [lo]"=&r"(lo), [hi]"=&r"(hi)
+                         [hi] "=&r"(hi)
-        : [a]"r"(a), [b]"r"(b));
+                         :[a] "r"(a),[b] "r"(b));
-    *acc = lo + (((uint64_t)hi)<<32);
+    *acc = lo + (((uint64_t)hi) << 32);
 #else
-    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b;
+    *acc = (int64_t)(int32_t)a *(int64_t)(int32_t)b;
 #endif
 }
-static inline void __attribute__((gnu_inline,always_inline))
+static inline void __attribute__ ((gnu_inline, always_inline))
-smull2 (
+    smull2(uint64_t *acc, const uint32_t a, const uint32_t b)
-    uint64_t *acc,
+{
    const uint32_t a,
    const uint32_t b
 ) {
 #ifdef __ARMEL__
    uint32_t lo, hi;
    __asm__ /*__volatile__*/ ("smull %[lo], %[hi], %[a], %[b]"
-        : [lo]"=&r"(lo), [hi]"=&r"(hi)
+ :                           [lo] "=&r"(lo),[hi] "=&r"(hi)
-        : [a]"r"(a), [b]"r"(2*b));
+ :                           [a] "r"(a),[b] "r"(2 * b));
-    *acc = lo + (((uint64_t)hi)<<32);
+    *acc = lo + (((uint64_t)hi) << 32);
 #else
-    *acc = (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
+    *acc = (int64_t)(int32_t)a *(int64_t)(int32_t)(b * 2);
 #endif
 }
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
 {
    const uint32_t *a = as->limb, *b = bs->limb;
    uint32_t *c = cs->limb;
    uint64_t accum0 = 0, accum1 = 0, accum2, accum3, accumC0, accumC1;
-    uint32_t mask = (1<<28) - 1;  
+    uint32_t mask = (1 << 28) - 1;
    uint32_t aa[8], bm[8];
    int i;
-    for (i=0; i<8; i++) {
+    for (i = 0; i < 8; i++) {
-        aa[i] = a[i] + a[i+8];
+        aa[i] = a[i] + a[i + 8];
-        bm[i] = b[i] - b[i+8];
+        bm[i] = b[i] - b[i + 8];
    }
-    uint32_t ax,bx;
+    uint32_t ax, bx;
    {
        /* t^3 terms */
        smull(&accum1, ax = aa[1], bx = b[15]);
@ -284,7 +273,6 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
        smlal(&accum0, ax, bx = b[8]);
        smlal(&accum2, ax = aa[5], bx);
        smlal(&accum0, ax = a[13], bx = b[7]);
        smlal(&accum2, ax = a[14], bx);
        smlal(&accum0, ax, bx = b[6]);
@ -307,7 +295,6 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
        smlal(&accum1, ax, bx = b[0]);
        smlal(&accum3, ax = a[13], bx);
        smlal(&accum1, ax = a[5], bx = bm[7]);
        smlal(&accum3, ax = a[6], bx);
        smlal(&accum1, ax, bx = bm[6]);
@ -365,7 +352,6 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
        smlal(&accum0, ax, bx = b[8]);
        smlal(&accum2, ax = aa[7], bx);
        smlal(&accum0, ax = a[15], bx = b[7]);
        /* t terms */
@ -388,7 +374,6 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
        smlal(&accum1, ax, bx = b[0]);
        smlal(&accum3, ax = a[15], bx);
        smlal(&accum1, ax = a[7], bx = bm[7]);
        /* 1 terms */
@ -435,21 +420,22 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    c[1] += ((uint32_t)(accum1));
 }
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+void gf_sqr(gf_s * __restrict__ cs, const gf as)
 {
    const uint32_t *a = as->limb;
    uint32_t *c = cs->limb;
    uint64_t accum0 = 0, accum1 = 0, accum2, accum3, accumC0, accumC1, tmp;
-    uint32_t mask = (1<<28) - 1;  
+    uint32_t mask = (1 << 28) - 1;
    uint32_t bm[8];
    int i;
-    for (i=0; i<8; i++) {
+    for (i = 0; i < 8; i++) {
-        bm[i] = a[i] - a[i+8];
+        bm[i] = a[i] - a[i + 8];
    }
-    uint32_t ax,bx;
+    uint32_t ax, bx;
    {
        /* t^3 terms */
        smull2(&accum1, ax = a[9], bx = a[15]);
@ -498,8 +484,12 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
        smlal2(&accum2, ax = bm[0], bx = bm[1]);
        smlal(&accum0, ax, ax);
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        tmp = -accum3;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        accum3 = tmp - accum2;
        accum2 = tmp;
        tmp = -accum1;
        accum1 = tmp - accum0;
        accum0 = tmp;
        accum2 += accum0 >> 28;
        accum3 += accum1 >> 28;
@ -560,9 +550,12 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
        smlal2(&accum2, ax = bm[1], bx);
        smlal(&accum0, ax, ax);
-        
+        tmp = -accum3;
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        accum3 = tmp - accum2;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        accum2 = tmp;
        tmp = -accum1;
        accum1 = tmp - accum0;
        accum0 = tmp;
        accum0 += accumC0;
        accum1 += accumC1;
@ -596,7 +589,6 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
        smlal2(&accum2, ax = a[10], bx);
        smlal(&accum0, ax, ax);
        smlal2(&accum0, ax = a[5], bx = a[7]);
        smlal2(&accum2, ax = a[6], bx);
        smlal(&accum0, ax, ax);
@ -630,9 +622,12 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
        smlal2(&accum2, ax = bm[2], bx);
        smlal(&accum0, ax, ax);
-        
+        tmp = -accum3;
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        accum3 = tmp - accum2;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        accum2 = tmp;
        tmp = -accum1;
        accum1 = tmp - accum0;
        accum0 = tmp;
        accum0 += accumC0;
        accum1 += accumC1;
@ -664,7 +659,6 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
        smlal2(&accum2, ax = a[11], bx);
        smlal(&accum0, ax, ax);
        smlal(&accum0, ax = a[7], bx = a[7]);
        /* t terms */
@ -699,9 +693,12 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
        smlal2(&accum2, ax = bm[3], bx);
        smlal(&accum0, ax, ax);
-        tmp = -accum3; accum3 = tmp-accum2; accum2 = tmp;
+        tmp = -accum3;
-        tmp = -accum1; accum1 = tmp-accum0; accum0 = tmp;
+        accum3 = tmp - accum2;
-        
+        accum2 = tmp;
        tmp = -accum1;
        accum1 = tmp - accum0;
        accum0 = tmp;
        accum0 += accumC0;
        accum1 += accumC1;
@ -729,12 +726,9 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    c[1] += ((uint32_t)(accum1));
 }
-void gf_mulw_unsigned (
+void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
-    gf_s *__restrict__ cs,
+{
-    const gf as,
+    uint32_t mask = (1ull << 28) - 1;
    uint32_t b
 ) {
    uint32_t mask = (1ull<<28)-1;  
    assert(b <= mask);
    const uint32_t *a = as->limb;
@ -745,75 +739,99 @@ void gf_mulw_unsigned (
    int i;
    uint32_t c0, c8, n0, n8;
-    c0 = a[0]; c8 = a[8];
+    c0 = a[0];
    c8 = a[8];
    accum0 = widemul(b, c0);
    accum8 = widemul(b, c8);
-    c[0] = accum0 & mask; accum0 >>= 28;
+    c[0] = accum0 & mask;
-    c[8] = accum8 & mask; accum8 >>= 28;
+    accum0 >>= 28;
    c[8] = accum8 & mask;
    accum8 >>= 28;
-    i=1;
+    i = 1;
    {
-        n0 = a[i]; n8 = a[i+8];
+        n0 = a[i];
        n8 = a[i + 8];
        smlal(&accum0, b, n0);
        smlal(&accum8, b, n8);
-        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i] = accum0 & mask;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+        accum0 >>= 28;
        c[i + 8] = accum8 & mask;
        accum8 >>= 28;
        i++;
    }
    {
-        c0 = a[i]; c8 = a[i+8];
+        c0 = a[i];
        c8 = a[i + 8];
        smlal(&accum0, b, c0);
        smlal(&accum8, b, c8);
-        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i] = accum0 & mask;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+        accum0 >>= 28;
        c[i + 8] = accum8 & mask;
        accum8 >>= 28;
        i++;
    }
    {
-        n0 = a[i]; n8 = a[i+8];
+        n0 = a[i];
        n8 = a[i + 8];
        smlal(&accum0, b, n0);
        smlal(&accum8, b, n8);
-        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i] = accum0 & mask;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+        accum0 >>= 28;
        c[i + 8] = accum8 & mask;
        accum8 >>= 28;
        i++;
    }
    {
-        c0 = a[i]; c8 = a[i+8];
+        c0 = a[i];
        c8 = a[i + 8];
        smlal(&accum0, b, c0);
        smlal(&accum8, b, c8);
-        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i] = accum0 & mask;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+        accum0 >>= 28;
        c[i + 8] = accum8 & mask;
        accum8 >>= 28;
        i++;
    }
    {
-        n0 = a[i]; n8 = a[i+8];
+        n0 = a[i];
        n8 = a[i + 8];
        smlal(&accum0, b, n0);
        smlal(&accum8, b, n8);
-        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i] = accum0 & mask;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+        accum0 >>= 28;
        c[i + 8] = accum8 & mask;
        accum8 >>= 28;
        i++;
    }
    {
-        c0 = a[i]; c8 = a[i+8];
+        c0 = a[i];
        c8 = a[i + 8];
        smlal(&accum0, b, c0);
        smlal(&accum8, b, c8);
-        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i] = accum0 & mask;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+        accum0 >>= 28;
        c[i + 8] = accum8 & mask;
        accum8 >>= 28;
        i++;
    }
    {
-        n0 = a[i]; n8 = a[i+8];
+        n0 = a[i];
        n8 = a[i + 8];
        smlal(&accum0, b, n0);
        smlal(&accum8, b, n8);
-        c[i] = accum0 & mask; accum0 >>= 28;
+        c[i] = accum0 & mask;
-        c[i+8] = accum8 & mask; accum8 >>= 28;
+        accum0 >>= 28;
        c[i + 8] = accum8 & mask;
        accum8 >>= 28;
        i++;
    }
--- a/crypto/ec/curve448/arch_arm_32/f_impl.h
+++ b/crypto/ec/curve448/arch_arm_32/f_impl.h
@ -17,45 +17,49 @@
 #define LIMB_PLACE_VALUE(i) 28
-void gf_add_RAW (gf out, const gf a, const gf b) {
+void gf_add_RAW(gf out, const gf a, const gf b)
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+{
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
+    for (unsigned int i = 0; i < sizeof(*out) / sizeof(uint32xn_t); i++) {
        ((uint32xn_t *) out)[i] =
            ((const uint32xn_t *)a)[i] + ((const uint32xn_t *)b)[i];
    }
    /*
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+     * for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] + b->limb[i];
+     * out->limb[i] = a->limb[i] + b->limb[i]; }
    }
     */
 }
-void gf_sub_RAW (gf out, const gf a, const gf b) {
+void gf_sub_RAW(gf out, const gf a, const gf b)
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+{
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
+    for (unsigned int i = 0; i < sizeof(*out) / sizeof(uint32xn_t); i++) {
        ((uint32xn_t *) out)[i] =
            ((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
    }
    /*
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+     * for (unsigned int i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-        out->limb[i] = a->limb[i] - b->limb[i];
+     * out->limb[i] = a->limb[i] - b->limb[i]; }
    }
     */
 }
-void gf_bias (gf a, int amt) {
+void gf_bias(gf a, int amt)
-    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
+{
-    uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
+    uint32_t co1 = ((1ull << 28) - 1) * amt, co2 = co1 - amt;
-    uint32x4_t *aa = (uint32x4_t*) a;
+    uint32x4_t lo = { co1, co1, co1, co1 }, hi = {
    co2, co1, co1, co1};
    uint32x4_t *aa = (uint32x4_t *) a;
    aa[0] += lo;
    aa[1] += lo;
    aa[2] += hi;
    aa[3] += lo;
 }
-void gf_weak_reduce (gf a) {
+void gf_weak_reduce(gf a)
-    uint64_t mask = (1ull<<28) - 1;
+{
    uint64_t mask = (1ull << 28) - 1;
    uint64_t tmp = a->limb[15] >> 28;
    a->limb[8] += tmp;
-    for (unsigned int i=15; i>0; i--) {
+    for (unsigned int i = 15; i > 0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>28);
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 28);
    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }
--- a/crypto/ec/curve448/arch_neon/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_neon/arch_intrinsics.h
@ -11,22 +11,26 @@
 */
 #ifndef __ARCH_NEON_ARCH_INTRINSICS_H__
-#define __ARCH_NEON_ARCH_INTRINSICS_H__
+# define __ARCH_NEON_ARCH_INTRINSICS_H__
-#define ARCH_WORD_BITS 32
+# define ARCH_WORD_BITS 32
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-uint32_t word_is_zero(uint32_t a) {
+uint32_t word_is_zero(uint32_t a)
 {
    uint32_t ret;
-    __asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0" : "=r"(ret) : "r"(a) : "cc");
+ __asm__("subs %0, %1, #1;\n\tsbc %0, %0, %0": "=r"(ret): "r"(a):"cc");
    return ret;
 }
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-uint64_t widemul(uint32_t a, uint32_t b) {
+uint64_t widemul(uint32_t a, uint32_t b)
-    /* Could be UMULL, but it's hard to express to CC that the registers must be different */
+{
    /*
     * Could be UMULL, but it's hard to express to CC that the registers must
     * be different
     */
    return ((uint64_t)a) * b;
 }
 #endif                          /* __ARCH_NEON_ARCH_INTRINSICS_H__ */
--- a/crypto/ec/curve448/arch_neon/f_impl.c
+++ b/crypto/ec/curve448/arch_neon/f_impl.c
--- a/crypto/ec/curve448/arch_neon/f_impl.h
+++ b/crypto/ec/curve448/arch_neon/f_impl.h
@ -15,7 +15,7 @@
 #define USE_NEON_PERM 1
 #define LIMBHI(x) ((x##ull)>>28)
 #define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
-#  define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
+#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
    {{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
      LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
      LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
@ -23,42 +23,47 @@
 #define LIMB_PLACE_VALUE(i) 28
-void gf_add_RAW (gf out, const gf a, const gf b) {
+void gf_add_RAW(gf out, const gf a, const gf b)
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+{
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
+    for (unsigned int i = 0; i < sizeof(*out) / sizeof(uint32xn_t); i++) {
        ((uint32xn_t *) out)[i] =
            ((const uint32xn_t *)a)[i] + ((const uint32xn_t *)b)[i];
    }
 }
-void gf_sub_RAW (gf out, const gf a, const gf b) {
+void gf_sub_RAW(gf out, const gf a, const gf b)
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
+{
-        ((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
+    for (unsigned int i = 0; i < sizeof(*out) / sizeof(uint32xn_t); i++) {
        ((uint32xn_t *) out)[i] =
            ((const uint32xn_t *)a)[i] - ((const uint32xn_t *)b)[i];
    }
    /*
-    unsigned int i;
+     * unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+     * out->limb[i] = a->limb[i] - b->limb[i]; }
        out->limb[i] = a->limb[i] - b->limb[i];
    }
     */
 }
-void gf_bias (gf a, int amt) {
+void gf_bias(gf a, int amt)
-    uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
+{
-    uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1};
+    uint32_t co1 = ((1ull << 28) - 1) * amt, co2 = co1 - amt;
-    uint32x4_t *aa = (uint32x4_t*) a;
+    uint32x4_t lo = { co1, co2, co1, co1 }, hi = {
    co1, co1, co1, co1};
    uint32x4_t *aa = (uint32x4_t *) a;
    aa[0] += lo;
    aa[1] += hi;
    aa[2] += hi;
    aa[3] += hi;
 }
-void gf_weak_reduce (gf a) {
+void gf_weak_reduce(gf a)
 {
-    uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},
+    uint32x2_t *aa = (uint32x2_t *) a, vmask = {
-       tmp = vshr_n_u32(aa[7],28);
+    (1ull << 28) - 1, (1ull << 28) - 1}, vm2 = {
    0, -1}, tmp = vshr_n_u32(aa[7], 28);
-    for (unsigned int i=7; i>=1; i--) {
+    for (unsigned int i = 7; i >= 1; i--) {
-        aa[i] = vsra_n_u32(aa[i] & vmask, aa[i-1], 28);
+        aa[i] = vsra_n_u32(aa[i] & vmask, aa[i - 1], 28);
    }
-    aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
+    aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp & vm2);
 }
--- a/crypto/ec/curve448/arch_ref64/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_ref64/arch_intrinsics.h
@ -11,20 +11,21 @@
 */
 #ifndef __ARCH_REF64_ARCH_INTRINSICS_H__
-#define __ARCH_REF64_ARCH_INTRINSICS_H__
+# define __ARCH_REF64_ARCH_INTRINSICS_H__
-#define ARCH_WORD_BITS 64
+# define ARCH_WORD_BITS 64
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-uint64_t word_is_zero(uint64_t a) {
+uint64_t word_is_zero(uint64_t a)
 {
    /* let's hope the compiler isn't clever enough to optimize this. */
-    return (((__uint128_t)a)-1)>>64;
+    return (((__uint128_t) a) - 1) >> 64;
 }
-static __inline__ __attribute((always_inline,unused))
+static __inline__ __attribute((always_inline, unused))
-__uint128_t widemul(uint64_t a, uint64_t b) {
+__uint128_t widemul(uint64_t a, uint64_t b)
-    return ((__uint128_t)a) * b; 
+{
    return ((__uint128_t) a) * b;
 }
 #endif                          /* ARCH_REF64_ARCH_INTRINSICS_H__ */
--- a/crypto/ec/curve448/arch_ref64/f_impl.c
+++ b/crypto/ec/curve448/arch_ref64/f_impl.c
@ -11,48 +11,49 @@
 */
 #include "f_field.h"
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
 {
    const uint64_t *a = as->limb, *b = bs->limb;
    uint64_t *c = cs->limb;
    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
+    uint64_t mask = (1ull << 56) - 1;
    uint64_t aa[4], bb[4], bbb[4];
    unsigned int i;
-    for (i=0; i<4; i++) {
+    for (i = 0; i < 4; i++) {
-        aa[i]  = a[i] + a[i+4];
+        aa[i] = a[i] + a[i + 4];
-        bb[i]  = b[i] + b[i+4];
+        bb[i] = b[i] + b[i + 4];
-        bbb[i] = bb[i] + b[i+4];
+        bbb[i] = bb[i] + b[i + 4];
    }
    int I_HATE_UNROLLED_LOOPS = 0;
    if (I_HATE_UNROLLED_LOOPS) {
-        /* The compiler probably won't unroll this,
+        /*
-         * so it's like 80% slower.
+         * The compiler probably won't unroll this, so it's like 80% slower.
         */
-        for (i=0; i<4; i++) {
+        for (i = 0; i < 4; i++) {
            accum2 = 0;
            unsigned int j;
-            for (j=0; j<=i; j++) {
+            for (j = 0; j <= i; j++) {
-                accum2 += widemul(a[j],   b[i-j]);
+                accum2 += widemul(a[j], b[i - j]);
-                accum1 += widemul(aa[j], bb[i-j]);
+                accum1 += widemul(aa[j], bb[i - j]);
-                accum0 += widemul(a[j+4], b[i-j+4]);
+                accum0 += widemul(a[j + 4], b[i - j + 4]);
            }
-            for (; j<4; j++) {
+            for (; j < 4; j++) {
-                accum2 += widemul(a[j],   b[i-j+8]);
+                accum2 += widemul(a[j], b[i - j + 8]);
-                accum1 += widemul(aa[j], bbb[i-j+4]);
+                accum1 += widemul(aa[j], bbb[i - j + 4]);
-                accum0 += widemul(a[j+4], bb[i-j+4]);
+                accum0 += widemul(a[j + 4], bb[i - j + 4]);
            }
            accum1 -= accum2;
            accum0 += accum2;
            c[i] = ((uint64_t)(accum0)) & mask;
-            c[i+4] = ((uint64_t)(accum1)) & mask;
+            c[i + 4] = ((uint64_t)(accum1)) & mask;
            accum0 >>= 56;
            accum1 >>= 56;
@ -172,19 +173,22 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    c[1] += ((uint64_t)(accum1));
 }
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
+void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
 {
    const uint64_t *a = as->limb;
    uint64_t *c = cs->limb;
    __uint128_t accum0 = 0, accum4 = 0;
-    uint64_t mask = (1ull<<56) - 1;  
+    uint64_t mask = (1ull << 56) - 1;
    int i;
-    for (i=0; i<4; i++) {
+    for (i = 0; i < 4; i++) {
        accum0 += widemul(b, a[i]);
-        accum4 += widemul(b, a[i+4]);
+        accum4 += widemul(b, a[i + 4]);
-        c[i]   = accum0 & mask; accum0 >>= 56;
+        c[i] = accum0 & mask;
-        c[i+4] = accum4 & mask; accum4 >>= 56;
+        accum0 >>= 56;
        c[i + 4] = accum4 & mask;
        accum4 >>= 56;
    }
    accum0 += accum4 + c[4];
@ -196,24 +200,25 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
    c[1] += accum4 >> 56;
 }
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+void gf_sqr(gf_s * __restrict__ cs, const gf as)
 {
    const uint64_t *a = as->limb;
    uint64_t *c = cs->limb;
    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
+    uint64_t mask = (1ull << 56) - 1;
    uint64_t aa[4];
    /* For some reason clang doesn't vectorize this without prompting? */
    unsigned int i;
-    for (i=0; i<4; i++) {
+    for (i = 0; i < 4; i++) {
-        aa[i] = a[i] + a[i+4];
+        aa[i] = a[i] + a[i + 4];
    }
-    accum2  = widemul(a[0],a[3]);
+    accum2 = widemul(a[0], a[3]);
-    accum0  = widemul(aa[0],aa[3]);
+    accum0 = widemul(aa[0], aa[3]);
-    accum1  = widemul(a[4],a[7]);
+    accum1 = widemul(a[4], a[7]);
    accum2 += widemul(a[1], a[2]);
    accum0 += widemul(aa[1], aa[2]);
@ -222,21 +227,21 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    accum0 -= accum2;
    accum1 += accum2;
-    c[3] = ((uint64_t)(accum1))<<1 & mask;
+    c[3] = ((uint64_t)(accum1)) << 1 & mask;
-    c[7] = ((uint64_t)(accum0))<<1 & mask;
+    c[7] = ((uint64_t)(accum0)) << 1 & mask;
    accum0 >>= 55;
    accum1 >>= 55;
-    accum0 += widemul(2*aa[1],aa[3]);
+    accum0 += widemul(2 * aa[1], aa[3]);
-    accum1 += widemul(2*a[5], a[7]);
+    accum1 += widemul(2 * a[5], a[7]);
    accum0 += widemul(aa[2], aa[2]);
    accum1 += accum0;
-    accum0 -= widemul(2*a[1], a[3]);
+    accum0 -= widemul(2 * a[1], a[3]);
    accum1 += widemul(a[6], a[6]);
-    accum2 = widemul(a[0],a[0]);
+    accum2 = widemul(a[0], a[0]);
    accum1 -= accum2;
    accum0 += accum2;
@ -250,16 +255,16 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    accum0 >>= 56;
    accum1 >>= 56;
-    accum2  = widemul(2*aa[2],aa[3]);
+    accum2 = widemul(2 * aa[2], aa[3]);
-    accum0 -= widemul(2*a[2], a[3]);
+    accum0 -= widemul(2 * a[2], a[3]);
-    accum1 += widemul(2*a[6], a[7]);
+    accum1 += widemul(2 * a[6], a[7]);
    accum1 += accum2;
    accum0 += accum2;
-    accum2  = widemul(2*a[0],a[1]);
+    accum2 = widemul(2 * a[0], a[1]);
-    accum1 += widemul(2*aa[0], aa[1]);
+    accum1 += widemul(2 * aa[0], aa[1]);
-    accum0 += widemul(2*a[4], a[5]);
+    accum0 += widemul(2 * a[4], a[5]);
    accum1 -= accum2;
    accum0 += accum2;
@ -270,16 +275,16 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    accum0 >>= 56;
    accum1 >>= 56;
-    accum2  = widemul(aa[3],aa[3]);
+    accum2 = widemul(aa[3], aa[3]);
    accum0 -= widemul(a[3], a[3]);
    accum1 += widemul(a[7], a[7]);
    accum1 += accum2;
    accum0 += accum2;
-    accum2  = widemul(2*a[0],a[2]);
+    accum2 = widemul(2 * a[0], a[2]);
-    accum1 += widemul(2*aa[0], aa[2]);
+    accum1 += widemul(2 * aa[0], aa[2]);
-    accum0 += widemul(2*a[4], a[6]);
+    accum0 += widemul(2 * a[4], a[6]);
    accum2 += widemul(a[1], a[1]);
    accum1 += widemul(aa[1], aa[1]);
@ -306,4 +311,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
    c[0] += ((uint64_t)(accum1));
 }
--- a/crypto/ec/curve448/arch_ref64/f_impl.h
+++ b/crypto/ec/curve448/arch_ref64/f_impl.h
@ -15,32 +15,36 @@
 #define LIMB_PLACE_VALUE(i) 56
-void gf_add_RAW (gf out, const gf a, const gf b) {
+void gf_add_RAW(gf out, const gf a, const gf b)
-    for (unsigned int i=0; i<8; i++) {
+{
    for (unsigned int i = 0; i < 8; i++) {
        out->limb[i] = a->limb[i] + b->limb[i];
    }
    gf_weak_reduce(out);
 }
-void gf_sub_RAW (gf out, const gf a, const gf b) {
+void gf_sub_RAW(gf out, const gf a, const gf b)
-    uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
+{
-    for (unsigned int i=0; i<8; i++) {
+    uint64_t co1 = ((1ull << 56) - 1) * 2, co2 = co1 - 2;
-        out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
+    for (unsigned int i = 0; i < 8; i++) {
        out->limb[i] = a->limb[i] - b->limb[i] + ((i == 4) ? co2 : co1);
    }
    gf_weak_reduce(out);
 }
-void gf_bias (gf a, int amt) {
+void gf_bias(gf a, int amt)
-    (void) a;
+{
-    (void) amt;
+    (void)a;
    (void)amt;
 }
-void gf_weak_reduce (gf a) {
+void gf_weak_reduce(gf a)
-    uint64_t mask = (1ull<<56) - 1;
+{
    uint64_t mask = (1ull << 56) - 1;
    uint64_t tmp = a->limb[7] >> 56;
    a->limb[4] += tmp;
-    for (unsigned int i=7; i>0; i--) {
+    for (unsigned int i = 7; i > 0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }
--- a/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_x86_64/arch_intrinsics.h
@ -10,303 +10,292 @@
 * Originally written by Mike Hamburg
 */
 #ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
-#define __ARCH_X86_64_ARCH_INTRINSICS_H__
+# define __ARCH_X86_64_ARCH_INTRINSICS_H__
-#define ARCH_WORD_BITS 64
+# define ARCH_WORD_BITS 64
-#include <openssl/e_os2.h>
+# include <openssl/e_os2.h>
 /* FUTURE: autogenerate */
-static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) {
+static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b)
-  uint64_t c,d;
+{
-  #ifndef __BMI2__
+    uint64_t c, d;
 # ifndef __BMI2__
    __asm__ volatile
-          ("movq %[a], %%rax;"
+     ("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
-           "mulq %[b];"
+      :[b] "m"(*b),[a] "m"(*a)
-           : [c]"=&a"(c), [d]"=d"(d)
+      :"cc");
-           : [b]"m"(*b), [a]"m"(*a)
+# else
           : "cc");
  #else
    __asm__ volatile
-          ("movq %[a], %%rdx;"
+     ("movq %[a], %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-           "mulx %[b], %[c], %[d];"
+      :[b] "m"(*b),[a] "m"(*a)
-           : [c]"=r"(c), [d]"=r"(d)
+      :"rdx");
-           : [b]"m"(*b), [a]"m"(*a)
+# endif
-           : "rdx");
+    return (((__uint128_t) (d)) << 64) | c;
  #endif
  return (((__uint128_t)(d))<<64) | c;
 }
-static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) {
+static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b)
-  uint64_t c,d;
+{
-  #ifndef __BMI2__
+    uint64_t c, d;
 # ifndef __BMI2__
    __asm__ volatile
-          ("movq %[a], %%rax;"
+     ("movq %[a], %%rax;" "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
-           "mulq %[b];"
+      :[b] "m"(*b),[a] "r"(a)
-           : [c]"=&a"(c), [d]"=d"(d)
+      :"cc");
-           : [b]"m"(*b), [a]"r"(a)
+# else
           : "cc");
  #else
    __asm__ volatile
-          ("mulx %[b], %[c], %[d];"
+     ("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-           : [c]"=r"(c), [d]"=r"(d)
+      :[b] "m"(*b),[a] "d"(a));
-           : [b]"m"(*b), [a]"d"(a));
+# endif
-  #endif
+    return (((__uint128_t) (d)) << 64) | c;
  return (((__uint128_t)(d))<<64) | c;
 }
-static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) {
+static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b)
-  uint64_t c,d;
+{
-  #ifndef __BMI2__
+    uint64_t c, d;
 # ifndef __BMI2__
    __asm__ volatile
-          ("mulq %[b];"
+     ("mulq %[b];":[c] "=a"(c),[d] "=d"(d)
-           : [c]"=a"(c), [d]"=d"(d)
+      :[b] "r"(b), "a"(a)
-           : [b]"r"(b), "a"(a)
+      :"cc");
-           : "cc");
+# else
  #else
    __asm__ volatile
-          ("mulx %[b], %[c], %[d];"
+     ("mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-           : [c]"=r"(c), [d]"=r"(d)
+      :[b] "r"(b),[a] "d"(a));
-           : [b]"r"(b), [a]"d"(a));
+# endif
-  #endif
+    return (((__uint128_t) (d)) << 64) | c;
  return (((__uint128_t)(d))<<64) | c;
 }
-static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) {
+static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b)
-  uint64_t c,d;
+{
-  #ifndef __BMI2__
+    uint64_t c, d;
 # ifndef __BMI2__
    __asm__ volatile
     ("movq %[a], %%rax; "
-           "addq %%rax, %%rax; "
+      "addq %%rax, %%rax; " "mulq %[b];":[c] "=&a"(c),[d] "=d"(d)
-           "mulq %[b];"
+      :[b] "m"(*b),[a] "m"(*a)
-           : [c]"=&a"(c), [d]"=d"(d)
+      :"cc");
-           : [b]"m"(*b), [a]"m"(*a)
+# else
           : "cc");
  #else
    __asm__ volatile
     ("movq %[a], %%rdx;"
-           "leaq (,%%rdx,2), %%rdx;"
+      "leaq (,%%rdx,2), %%rdx;" "mulx %[b], %[c], %[d];":[c] "=r"(c),[d] "=r"(d)
-           "mulx %[b], %[c], %[d];"
+      :[b] "m"(*b),[a] "m"(*a)
-           : [c]"=r"(c), [d]"=r"(d)
+      :"rdx");
-           : [b]"m"(*b), [a]"m"(*a)
+# endif
-           : "rdx");
+    return (((__uint128_t) (d)) << 64) | c;
  #endif
  return (((__uint128_t)(d))<<64) | c;
 }
-static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+static __inline__ void mac(__uint128_t * acc, const uint64_t *a,
-  uint64_t lo = *acc, hi = *acc>>64;
+                           const uint64_t *b)
 {
    uint64_t lo = *acc, hi = *acc >> 64;
-  #ifdef __BMI2__
+# ifdef __BMI2__
-      uint64_t c,d;
+    uint64_t c, d;
    __asm__ volatile
     ("movq %[a], %%rdx; "
      "mulx %[b], %[c], %[d]; "
      "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
+      "adcq %[d], %[hi]; ":[c] "=&r"(c),[d] "=&r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-           : [c]"=&r"(c), [d]"=&r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+      :[b] "m"(*b),[a] "m"(*a)
-           : [b]"m"(*b), [a]"m"(*a)
+      :"rdx", "cc");
-           : "rdx", "cc");
+# else
  #else
    __asm__ volatile
     ("movq %[a], %%rax; "
      "mulq %[b]; "
-           "addq %%rax, %[lo]; "
+      "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-           "adcq %%rdx, %[hi]; "
+      :[b] "m"(*b),[a] "m"(*a)
-           : [lo]"+r"(lo), [hi]"+r"(hi)
+      :"rax", "rdx", "cc");
-           : [b]"m"(*b), [a]"m"(*a)
+# endif
           : "rax", "rdx", "cc");
  #endif
-  *acc = (((__uint128_t)(hi))<<64) | lo;
+    *acc = (((__uint128_t) (hi)) << 64) | lo;
 }
-static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) {
+static __inline__ void macac(__uint128_t * acc, __uint128_t * acc2,
-  uint64_t lo = *acc, hi = *acc>>64;
+                             const uint64_t *a, const uint64_t *b)
-  uint64_t lo2 = *acc2, hi2 = *acc2>>64;
+{
    uint64_t lo = *acc, hi = *acc >> 64;
    uint64_t lo2 = *acc2, hi2 = *acc2 >> 64;
-  #ifdef __BMI2__
+# ifdef __BMI2__
-      uint64_t c,d;
+    uint64_t c, d;
    __asm__ volatile
     ("movq %[a], %%rdx; "
      "mulx %[b], %[c], %[d]; "
      "addq %[c], %[lo]; "
      "adcq %[d], %[hi]; "
      "addq %[c], %[lo2]; "
-           "adcq %[d], %[hi2]; "
+      "adcq %[d], %[hi2]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi),
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
+      [lo2] "+r"(lo2),[hi2] "+r"(hi2)
-           : [b]"m"(*b), [a]"m"(*a)
+      :[b] "m"(*b),[a] "m"(*a)
-           : "rdx", "cc");
+      :"rdx", "cc");
-  #else
+# else
    __asm__ volatile
     ("movq %[a], %%rax; "
      "mulq %[b]; "
      "addq %%rax, %[lo]; "
      "adcq %%rdx, %[hi]; "
      "addq %%rax, %[lo2]; "
-           "adcq %%rdx, %[hi2]; "
+      "adcq %%rdx, %[hi2]; ":[lo] "+r"(lo),[hi] "+r"(hi),[lo2] "+r"(lo2),
-           : [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
+      [hi2] "+r"(hi2)
-           : [b]"m"(*b), [a]"m"(*a)
+      :[b] "m"(*b),[a] "m"(*a)
-           : "rax", "rdx", "cc");
+      :"rax", "rdx", "cc");
-  #endif
+# endif
-  *acc = (((__uint128_t)(hi))<<64) | lo;
+    *acc = (((__uint128_t) (hi)) << 64) | lo;
-  *acc2 = (((__uint128_t)(hi2))<<64) | lo2;
+    *acc2 = (((__uint128_t) (hi2)) << 64) | lo2;
 }
-static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) {
+static __inline__ void mac_rm(__uint128_t * acc, uint64_t a, const uint64_t *b)
-  uint64_t lo = *acc, hi = *acc>>64;
+{
    uint64_t lo = *acc, hi = *acc >> 64;
-  #ifdef __BMI2__
+# ifdef __BMI2__
-      uint64_t c,d;
+    uint64_t c, d;
    __asm__ volatile
     ("mulx %[b], %[c], %[d]; "
      "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
+      "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+      :[b] "m"(*b),[a] "d"(a)
-           : [b]"m"(*b), [a]"d"(a)
+      :"cc");
-           : "cc");
+# else
  #else
    __asm__ volatile
     ("movq %[a], %%rax; "
      "mulq %[b]; "
-           "addq %%rax, %[lo]; "
+      "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-           "adcq %%rdx, %[hi]; "
+      :[b] "m"(*b),[a] "r"(a)
-           : [lo]"+r"(lo), [hi]"+r"(hi)
+      :"rax", "rdx", "cc");
-           : [b]"m"(*b), [a]"r"(a)
+# endif
           : "rax", "rdx", "cc");
  #endif
-  *acc = (((__uint128_t)(hi))<<64) | lo;
+    *acc = (((__uint128_t) (hi)) << 64) | lo;
 }
-static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) {
+static __inline__ void mac_rr(__uint128_t * acc, uint64_t a, const uint64_t b)
-  uint64_t lo = *acc, hi = *acc>>64;
+{
    uint64_t lo = *acc, hi = *acc >> 64;
-  #ifdef __BMI2__
+# ifdef __BMI2__
-      uint64_t c,d;
+    uint64_t c, d;
    __asm__ volatile
     ("mulx %[b], %[c], %[d]; "
      "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
+      "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+      :[b] "r"(b),[a] "d"(a)
-           : [b]"r"(b), [a]"d"(a)
+      :"cc");
-           : "cc");
+# else
  #else
    __asm__ volatile
     ("mulq %[b]; "
      "addq %%rax, %[lo]; "
-           "adcq %%rdx, %[hi]; "
+      "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi), "+a"(a)
-           : [lo]"+r"(lo), [hi]"+r"(hi), "+a"(a)
+      :[b] "r"(b)
-           : [b]"r"(b)
+      :"rdx", "cc");
-           : "rdx", "cc");
+# endif
  #endif
-  *acc = (((__uint128_t)(hi))<<64) | lo;
+    *acc = (((__uint128_t) (hi)) << 64) | lo;
 }
-static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+static __inline__ void mac2(__uint128_t * acc, const uint64_t *a,
-  uint64_t lo = *acc, hi = *acc>>64;
+                            const uint64_t *b)
 {
    uint64_t lo = *acc, hi = *acc >> 64;
-  #ifdef __BMI2__
+# ifdef __BMI2__
-      uint64_t c,d;
+    uint64_t c, d;
    __asm__ volatile
     ("movq %[a], %%rdx; "
      "addq %%rdx, %%rdx; "
      "mulx %[b], %[c], %[d]; "
      "addq %[c], %[lo]; "
-           "adcq %[d], %[hi]; "
+      "adcq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+      :[b] "m"(*b),[a] "m"(*a)
-           : [b]"m"(*b), [a]"m"(*a)
+      :"rdx", "cc");
-           : "rdx", "cc");
+# else
  #else
    __asm__ volatile
     ("movq %[a], %%rax; "
      "addq %%rax, %%rax; "
      "mulq %[b]; "
-           "addq %%rax, %[lo]; "
+      "addq %%rax, %[lo]; " "adcq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-           "adcq %%rdx, %[hi]; "
+      :[b] "m"(*b),[a] "m"(*a)
-           : [lo]"+r"(lo), [hi]"+r"(hi)
+      :"rax", "rdx", "cc");
-           : [b]"m"(*b), [a]"m"(*a)
+# endif
           : "rax", "rdx", "cc");
  #endif
-  *acc = (((__uint128_t)(hi))<<64) | lo;
+    *acc = (((__uint128_t) (hi)) << 64) | lo;
 }
-static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+static __inline__ void msb(__uint128_t * acc, const uint64_t *a,
-  uint64_t lo = *acc, hi = *acc>>64;
+                           const uint64_t *b)
-  #ifdef __BMI2__
+{
-      uint64_t c,d;
+    uint64_t lo = *acc, hi = *acc >> 64;
 # ifdef __BMI2__
    uint64_t c, d;
    __asm__ volatile
     ("movq %[a], %%rdx; "
      "mulx %[b], %[c], %[d]; "
      "subq %[c], %[lo]; "
-           "sbbq %[d], %[hi]; "
+      "sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+      :[b] "m"(*b),[a] "m"(*a)
-           : [b]"m"(*b), [a]"m"(*a)
+      :"rdx", "cc");
-           : "rdx", "cc");
+# else
  #else
    __asm__ volatile
     ("movq %[a], %%rax; "
      "mulq %[b]; "
-           "subq %%rax, %[lo]; "
+      "subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-           "sbbq %%rdx, %[hi]; "
+      :[b] "m"(*b),[a] "m"(*a)
-           : [lo]"+r"(lo), [hi]"+r"(hi)
+      :"rax", "rdx", "cc");
-           : [b]"m"(*b), [a]"m"(*a)
+# endif
-           : "rax", "rdx", "cc");
+    *acc = (((__uint128_t) (hi)) << 64) | lo;
  #endif
  *acc = (((__uint128_t)(hi))<<64) | lo;
 }
-static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+static __inline__ void msb2(__uint128_t * acc, const uint64_t *a,
-  uint64_t lo = *acc, hi = *acc>>64;
+                            const uint64_t *b)
-  #ifdef __BMI2__
+{
-      uint64_t c,d;
+    uint64_t lo = *acc, hi = *acc >> 64;
 # ifdef __BMI2__
    uint64_t c, d;
    __asm__ volatile
     ("movq %[a], %%rdx; "
      "addq %%rdx, %%rdx; "
      "mulx %[b], %[c], %[d]; "
      "subq %[c], %[lo]; "
-           "sbbq %[d], %[hi]; "
+      "sbbq %[d], %[hi]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-           : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+      :[b] "m"(*b),[a] "m"(*a)
-           : [b]"m"(*b), [a]"m"(*a)
+      :"rdx", "cc");
-           : "rdx", "cc");
+# else
  #else
    __asm__ volatile
     ("movq %[a], %%rax; "
      "addq %%rax, %%rax; "
      "mulq %[b]; "
-           "subq %%rax, %[lo]; "
+      "subq %%rax, %[lo]; " "sbbq %%rdx, %[hi]; ":[lo] "+r"(lo),[hi] "+r"(hi)
-           "sbbq %%rdx, %[hi]; "
+      :[b] "m"(*b),[a] "m"(*a)
-           : [lo]"+r"(lo), [hi]"+r"(hi)
+      :"rax", "rdx", "cc");
-           : [b]"m"(*b), [a]"m"(*a)
+# endif
-           : "rax", "rdx", "cc");
+    *acc = (((__uint128_t) (hi)) << 64) | lo;
  #endif
  *acc = (((__uint128_t)(hi))<<64) | lo;
 }
-static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
+static __inline__ void mrs(__uint128_t * acc, const uint64_t *a,
-  uint64_t c,d, lo = *acc, hi = *acc>>64;
+                           const uint64_t *b)
 {
    uint64_t c, d, lo = *acc, hi = *acc >> 64;
    __asm__ volatile
     ("movq %[a], %%rdx; "
      "mulx %[b], %[c], %[d]; "
      "subq %[lo], %[c]; "
-       "sbbq %[hi], %[d]; "
+      "sbbq %[hi], %[d]; ":[c] "=r"(c),[d] "=r"(d),[lo] "+r"(lo),[hi] "+r"(hi)
-       : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
+      :[b] "m"(*b),[a] "m"(*a)
-       : [b]"m"(*b), [a]"m"(*a)
+      :"rdx", "cc");
-       : "rdx", "cc");
+    *acc = (((__uint128_t) (d)) << 64) | c;
  *acc = (((__uint128_t)(d))<<64) | c;
 }
-static __inline__ uint64_t word_is_zero(uint64_t x) {
+static __inline__ uint64_t word_is_zero(uint64_t x)
-  __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
+{
    __asm__ volatile ("neg %0; sbb %0, %0;":"+r" (x));
    return ~x;
 }
-static inline uint64_t shrld(__uint128_t x, int n) {
+static inline uint64_t shrld(__uint128_t x, int n)
-    return x>>n;
+{
    return x >> n;
 }
 #endif                          /* __ARCH_X86_64_ARCH_INTRINSICS_H__ */
--- a/crypto/ec/curve448/arch_x86_64/f_impl.c
+++ b/crypto/ec/curve448/arch_x86_64/f_impl.c
@ -12,32 +12,34 @@
 #include "f_field.h"
-void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
+void gf_mul(gf_s * __restrict__ cs, const gf as, const gf bs)
 {
    const uint64_t *a = as->limb, *b = bs->limb;
    uint64_t *c = cs->limb;
    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
+    uint64_t mask = (1ull << 56) - 1;
    uint64_t aa[4] VECTOR_ALIGNED, bb[4] VECTOR_ALIGNED, bbb[4] VECTOR_ALIGNED;
    /* For some reason clang doesn't vectorize this without prompting? */
    unsigned int i;
-    for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) {
+    for (i = 0; i < sizeof(aa) / sizeof(uint64xn_t); i++) {
-        ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i];
+        ((uint64xn_t *) aa)[i] =
-        ((uint64xn_t*)bb)[i] = ((const uint64xn_t*)b)[i] + ((const uint64xn_t*)(&b[4]))[i]; 
+            ((const uint64xn_t *)a)[i] + ((const uint64xn_t *)(&a[4]))[i];
-        ((uint64xn_t*)bbb)[i] = ((const uint64xn_t*)bb)[i] + ((const uint64xn_t*)(&b[4]))[i];     
+        ((uint64xn_t *) bb)[i] =
            ((const uint64xn_t *)b)[i] + ((const uint64xn_t *)(&b[4]))[i];
        ((uint64xn_t *) bbb)[i] =
            ((const uint64xn_t *)bb)[i] + ((const uint64xn_t *)(&b[4]))[i];
    }
    /*
-    for (int i=0; i<4; i++) {
+     * for (int i=0; i<4; i++) { aa[i] = a[i] + a[i+4]; bb[i] = b[i] + b[i+4];
-    aa[i] = a[i] + a[i+4];
+     * }
    bb[i] = b[i] + b[i+4];
    }
     */
-    accum2  = widemul(&a[0],&b[3]);
+    accum2 = widemul(&a[0], &b[3]);
-    accum0  = widemul(&aa[0],&bb[3]);
+    accum0 = widemul(&aa[0], &bb[3]);
-    accum1  = widemul(&a[4],&b[7]);
+    accum1 = widemul(&a[4], &b[7]);
    mac(&accum2, &a[1], &b[2]);
    mac(&accum0, &aa[1], &bb[2]);
@ -60,14 +62,14 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    accum0 >>= 56;
    accum1 >>= 56;
-    mac(&accum0, &aa[1],&bb[3]);
+    mac(&accum0, &aa[1], &bb[3]);
    mac(&accum1, &a[5], &b[7]);
    mac(&accum0, &aa[2], &bb[2]);
    mac(&accum1, &a[6], &b[6]);
    mac(&accum0, &aa[3], &bb[1]);
    accum1 += accum0;
-    accum2 = widemul(&a[0],&b[0]);
+    accum2 = widemul(&a[0], &b[0]);
    accum1 -= accum2;
    accum0 += accum2;
@ -84,7 +86,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    accum0 >>= 56;
    accum1 >>= 56;
-    accum2  = widemul(&a[2],&b[7]);
+    accum2 = widemul(&a[2], &b[7]);
    mac(&accum0, &a[6], &bb[3]);
    mac(&accum1, &aa[2], &bbb[3]);
@ -92,7 +94,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    mac(&accum0, &a[7], &bb[2]);
    mac(&accum1, &aa[3], &bbb[2]);
-    mac(&accum2, &a[0],&b[1]);
+    mac(&accum2, &a[0], &b[1]);
    mac(&accum1, &aa[0], &bb[1]);
    mac(&accum0, &a[4], &b[5]);
@ -109,11 +111,11 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    accum0 >>= 56;
    accum1 >>= 56;
-    accum2  = widemul(&a[3],&b[7]);
+    accum2 = widemul(&a[3], &b[7]);
    mac(&accum0, &a[7], &bb[3]);
    mac(&accum1, &aa[3], &bbb[3]);
-    mac(&accum2, &a[0],&b[2]);
+    mac(&accum2, &a[0], &b[2]);
    mac(&accum1, &aa[0], &bb[2]);
    mac(&accum0, &a[4], &b[6]);
@ -147,36 +149,45 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
    c[0] += ((uint64_t)(accum1));
 }
-void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
+void gf_mulw_unsigned(gf_s * __restrict__ cs, const gf as, uint32_t b)
 {
    const uint64_t *a = as->limb;
    uint64_t *c = cs->limb;
    __uint128_t accum0, accum4;
-    uint64_t mask = (1ull<<56) - 1;  
+    uint64_t mask = (1ull << 56) - 1;
    accum0 = widemul_rm(b, &a[0]);
    accum4 = widemul_rm(b, &a[4]);
-    c[0] = accum0 & mask; accum0 >>= 56;
+    c[0] = accum0 & mask;
-    c[4] = accum4 & mask; accum4 >>= 56;
+    accum0 >>= 56;
    c[4] = accum4 & mask;
    accum4 >>= 56;
    mac_rm(&accum0, b, &a[1]);
    mac_rm(&accum4, b, &a[5]);
-    c[1] = accum0 & mask; accum0 >>= 56;
+    c[1] = accum0 & mask;
-    c[5] = accum4 & mask; accum4 >>= 56;
+    accum0 >>= 56;
    c[5] = accum4 & mask;
    accum4 >>= 56;
    mac_rm(&accum0, b, &a[2]);
    mac_rm(&accum4, b, &a[6]);
-    c[2] = accum0 & mask; accum0 >>= 56;
+    c[2] = accum0 & mask;
-    c[6] = accum4 & mask; accum4 >>= 56;
+    accum0 >>= 56;
    c[6] = accum4 & mask;
    accum4 >>= 56;
    mac_rm(&accum0, b, &a[3]);
    mac_rm(&accum4, b, &a[7]);
-    c[3] = accum0 & mask; accum0 >>= 56;
+    c[3] = accum0 & mask;
-    c[7] = accum4 & mask; accum4 >>= 56;
+    accum0 >>= 56;
    c[7] = accum4 & mask;
    accum4 >>= 56;
    accum0 += accum4 + c[4];
    c[4] = accum0 & mask;
@ -187,24 +198,26 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
    c[1] += accum4 >> 56;
 }
-void gf_sqr (gf_s *__restrict__ cs, const gf as) {
+void gf_sqr(gf_s * __restrict__ cs, const gf as)
 {
    const uint64_t *a = as->limb;
    uint64_t *c = cs->limb;
    __uint128_t accum0 = 0, accum1 = 0, accum2;
-    uint64_t mask = (1ull<<56) - 1;  
+    uint64_t mask = (1ull << 56) - 1;
    uint64_t aa[4] VECTOR_ALIGNED;
    /* For some reason clang doesn't vectorize this without prompting? */
    unsigned int i;
-    for (i=0; i<sizeof(aa)/sizeof(uint64xn_t); i++) {
+    for (i = 0; i < sizeof(aa) / sizeof(uint64xn_t); i++) {
-      ((uint64xn_t*)aa)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)(&a[4]))[i];
+        ((uint64xn_t *) aa)[i] =
            ((const uint64xn_t *)a)[i] + ((const uint64xn_t *)(&a[4]))[i];
    }
-    accum2  = widemul(&a[0],&a[3]);
+    accum2 = widemul(&a[0], &a[3]);
-    accum0  = widemul(&aa[0],&aa[3]);
+    accum0 = widemul(&aa[0], &aa[3]);
-    accum1  = widemul(&a[4],&a[7]);
+    accum1 = widemul(&a[4], &a[7]);
    mac(&accum2, &a[1], &a[2]);
    mac(&accum0, &aa[1], &aa[2]);
@ -213,13 +226,13 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    accum0 -= accum2;
    accum1 += accum2;
-    c[3] = ((uint64_t)(accum1))<<1 & mask;
+    c[3] = ((uint64_t)(accum1)) << 1 & mask;
-    c[7] = ((uint64_t)(accum0))<<1 & mask;
+    c[7] = ((uint64_t)(accum0)) << 1 & mask;
    accum0 >>= 55;
    accum1 >>= 55;
-    mac2(&accum0, &aa[1],&aa[3]);
+    mac2(&accum0, &aa[1], &aa[3]);
    mac2(&accum1, &a[5], &a[7]);
    mac(&accum0, &aa[2], &aa[2]);
    accum1 += accum0;
@ -227,7 +240,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    msb2(&accum0, &a[1], &a[3]);
    mac(&accum1, &a[6], &a[6]);
-    accum2 = widemul(&a[0],&a[0]);
+    accum2 = widemul(&a[0], &a[0]);
    accum1 -= accum2;
    accum0 += accum2;
@ -241,14 +254,14 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    accum0 >>= 56;
    accum1 >>= 56;
-    accum2  = widemul2(&aa[2],&aa[3]);
+    accum2 = widemul2(&aa[2], &aa[3]);
    msb2(&accum0, &a[2], &a[3]);
    mac2(&accum1, &a[6], &a[7]);
    accum1 += accum2;
    accum0 += accum2;
-    accum2  = widemul2(&a[0],&a[1]);
+    accum2 = widemul2(&a[0], &a[1]);
    mac2(&accum1, &aa[0], &aa[1]);
    mac2(&accum0, &a[4], &a[5]);
@ -261,14 +274,14 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
    accum0 >>= 56;
    accum1 >>= 56;
-    accum2  = widemul(&aa[3],&aa[3]);
+    accum2 = widemul(&aa[3], &aa[3]);
    msb(&accum0, &a[3], &a[3]);
    mac(&accum1, &a[7], &a[7]);
    accum1 += accum2;
    accum0 += accum2;
-    accum2  = widemul2(&a[0],&a[2]);
+    accum2 = widemul2(&a[0], &a[2]);
    mac2(&accum1, &aa[0], &aa[2]);
    mac2(&accum0, &a[4], &a[6]);
--- a/crypto/ec/curve448/arch_x86_64/f_impl.h
+++ b/crypto/ec/curve448/arch_x86_64/f_impl.h
@ -14,60 +14,63 @@
 #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
 #define LIMB_PLACE_VALUE(i) 56
-void gf_add_RAW (gf out, const gf a, const gf b) {
+void gf_add_RAW(gf out, const gf a, const gf b)
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
+{
-        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
+    for (unsigned int i = 0; i < sizeof(*out) / sizeof(uint64xn_t); i++) {
        ((uint64xn_t *) out)[i] =
            ((const uint64xn_t *)a)[i] + ((const uint64xn_t *)b)[i];
    }
    /*
-    unsigned int i;
+     * unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+     * out->limb[i] = a->limb[i] + b->limb[i]; }
        out->limb[i] = a->limb[i] + b->limb[i];
    }
     */
 }
-void gf_sub_RAW (gf out, const gf a, const gf b) {
+void gf_sub_RAW(gf out, const gf a, const gf b)
-    for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
+{
-        ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
+    for (unsigned int i = 0; i < sizeof(*out) / sizeof(uint64xn_t); i++) {
        ((uint64xn_t *) out)[i] =
            ((const uint64xn_t *)a)[i] - ((const uint64xn_t *)b)[i];
    }
    /*
-    unsigned int i;
+     * unsigned int i; for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
-    for (i=0; i<sizeof(*out)/sizeof(out->limb[0]); i++) {
+     * out->limb[i] = a->limb[i] - b->limb[i]; }
        out->limb[i] = a->limb[i] - b->limb[i];
    }
     */
 }
-void gf_bias (gf a, int amt) {
+void gf_bias(gf a, int amt)
-    uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
+{
    uint64_t co1 = ((1ull << 56) - 1) * amt, co2 = co1 - amt;
 #if __AVX2__
-    uint64x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
+    uint64x4_t lo = { co1, co1, co1, co1 }, hi = {
-    uint64x4_t *aa = (uint64x4_t*) a;
+    co2, co1, co1, co1};
    uint64x4_t *aa = (uint64x4_t *) a;
    aa[0] += lo;
    aa[1] += hi;
 #elif __SSE2__
-    uint64x2_t lo = {co1,co1}, hi = {co2,co1};
+    uint64x2_t lo = { co1, co1 }, hi = {
-    uint64x2_t *aa = (uint64x2_t*) a;
+    co2, co1};
    uint64x2_t *aa = (uint64x2_t *) a;
    aa[0] += lo;
    aa[1] += lo;
    aa[2] += hi;
    aa[3] += lo;
 #else
-    for (unsigned int i=0; i<sizeof(*a)/sizeof(uint64_t); i++) {
+    for (unsigned int i = 0; i < sizeof(*a) / sizeof(uint64_t); i++) {
-        a->limb[i] += (i==4) ? co2 : co1;
+        a->limb[i] += (i == 4) ? co2 : co1;
    }
 #endif
 }
-void gf_weak_reduce (gf a) {
+void gf_weak_reduce(gf a)
 {
    /* PERF: use pshufb/palignr if anyone cares about speed of this */
-    uint64_t mask = (1ull<<56) - 1;
+    uint64_t mask = (1ull << 56) - 1;
    uint64_t tmp = a->limb[7] >> 56;
    a->limb[4] += tmp;
-    for (unsigned int i=7; i>0; i--) {
+    for (unsigned int i = 7; i > 0; i--) {
-        a->limb[i] = (a->limb[i] & mask) + (a->limb[i-1]>>56);
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
    }
    a->limb[0] = (a->limb[0] & mask) + tmp;
 }
--- a/crypto/ec/curve448/constant_time.h
+++ b/crypto/ec/curve448/constant_time.h
@ -11,10 +11,10 @@
 */
 #ifndef __CONSTANT_TIME_H__
-#define __CONSTANT_TIME_H__ 1
+# define __CONSTANT_TIME_H__ 1
-#include "word.h"
+# include "word.h"
-#include <string.h>
+# include <string.h>
 /*
 * Constant-time operations on hopefully-compile-time-sized memory
@ -36,20 +36,19 @@
 * Instead, we're putting our trust in the loop unroller and unswitcher.
 */
 /**
 * Unaligned big (vector?) register.
 */
 typedef struct {
    big_register_t unaligned;
-} __attribute__((packed)) unaligned_br_t;
+} __attribute__ ((packed)) unaligned_br_t;
 /**
 * Unaligned word register, for architectures where that matters.
 */
 typedef struct {
    word_t unaligned;
-} __attribute__((packed)) unaligned_word_t;
+} __attribute__ ((packed)) unaligned_word_t;
 /**
 * @brief Constant-time conditional swap.
@ -60,62 +59,58 @@ typedef struct {
 * as their sizes, if the CPU cares about that sort of thing.
 */
 static __inline__ void
-__attribute__((unused,always_inline))
+    __attribute__ ((unused, always_inline))
-constant_time_cond_swap (
+    constant_time_cond_swap(void *__restrict__ a_,
-    void *__restrict__ a_,
+                        void *__restrict__ b_, word_t elem_bytes, mask_t doswap)
-    void *__restrict__ b_,
+{
    word_t elem_bytes,
    mask_t doswap
 ) {
    word_t k;
    unsigned char *a = (unsigned char *)a_;
    unsigned char *b = (unsigned char *)b_;
    big_register_t br_mask = br_set_to_mask(doswap);
-    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+    for (k = 0; k <= elem_bytes - sizeof(big_register_t);
         k += sizeof(big_register_t)) {
        if (elem_bytes % sizeof(big_register_t)) {
            /* unaligned */
            big_register_t xor =
-                ((unaligned_br_t*)(&a[k]))->unaligned
+                ((unaligned_br_t *) (&a[k]))->unaligned
-              ^ ((unaligned_br_t*)(&b[k]))->unaligned;
+                ^ ((unaligned_br_t *) (&b[k]))->unaligned;
            xor &= br_mask;
-            ((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
+            ((unaligned_br_t *) (&a[k]))->unaligned ^= xor;
-            ((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
+            ((unaligned_br_t *) (&b[k]))->unaligned ^= xor;
        } else {
            /* aligned */
-            big_register_t xor =
+            big_register_t xor = *((big_register_t *) (&a[k]))
-                *((big_register_t*)(&a[k]))
+                ^ *((big_register_t *) (&b[k]));
              ^ *((big_register_t*)(&b[k]));
            xor &= br_mask;
-            *((big_register_t*)(&a[k])) ^= xor;
+            *((big_register_t *) (&a[k])) ^= xor;
-            *((big_register_t*)(&b[k])) ^= xor;
+            *((big_register_t *) (&b[k])) ^= xor;
        }
    }
    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+        for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
            if (elem_bytes % sizeof(word_t)) {
                /* unaligned */
                word_t xor =
-                    ((unaligned_word_t*)(&a[k]))->unaligned
+                    ((unaligned_word_t *) (&a[k]))->unaligned
-                  ^ ((unaligned_word_t*)(&b[k]))->unaligned;
+                    ^ ((unaligned_word_t *) (&b[k]))->unaligned;
                xor &= doswap;
-                ((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
+                ((unaligned_word_t *) (&a[k]))->unaligned ^= xor;
-                ((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
+                ((unaligned_word_t *) (&b[k]))->unaligned ^= xor;
            } else {
                /* aligned */
-                word_t xor =
+                word_t xor = *((word_t *) (&a[k]))
-                    *((word_t*)(&a[k]))
+                    ^ *((word_t *) (&b[k]));
                  ^ *((word_t*)(&b[k]));
                xor &= doswap;
-                *((word_t*)(&a[k])) ^= xor;
+                *((word_t *) (&a[k])) ^= xor;
-                *((word_t*)(&b[k])) ^= xor;
+                *((word_t *) (&b[k])) ^= xor;
            }
        }
    }
    if (elem_bytes % sizeof(word_t)) {
-        for (; k<elem_bytes; k+=1) {
+        for (; k < elem_bytes; k += 1) {
            unsigned char xor = a[k] ^ b[k];
            xor &= doswap;
            a[k] ^= xor;
@ -133,53 +128,60 @@ constant_time_cond_swap (
 * The table and output must not alias.
 */
 static __inline__ void
-__attribute__((unused,always_inline))
+    __attribute__ ((unused, always_inline))
-constant_time_lookup (
+    constant_time_lookup(void *__restrict__ out_,
    void *__restrict__ out_,
                     const void *table_,
-    word_t elem_bytes,
+                     word_t elem_bytes, word_t n_table, word_t idx)
-    word_t n_table,
+{
    word_t idx
 ) {
    big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
    /* Can't do pointer arithmetic on void* */
    unsigned char *out = (unsigned char *)out_;
    const unsigned char *table = (const unsigned char *)table_;
-    word_t j,k;
+    word_t j, k;
    memset(out, 0, elem_bytes);
-    for (j=0; j<n_table; j++, big_i-=big_one) {        
+    for (j = 0; j < n_table; j++, big_i -= big_one) {
        big_register_t br_mask = br_is_zero(big_i);
        word_t mask;
-        for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+        for (k = 0; k <= elem_bytes - sizeof(big_register_t);
             k += sizeof(big_register_t)) {
            if (elem_bytes % sizeof(big_register_t)) {
                /* unaligned */
-                ((unaligned_br_t *)(out+k))->unaligned
+                ((unaligned_br_t *) (out + k))->unaligned
-			|= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
+                    |=
                    br_mask &
                    ((const unaligned_br_t
                      *)(&table[k + j * elem_bytes]))->unaligned;
            } else {
                /* aligned */
-                *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
+                *(big_register_t *) (out + k) |=
                    br_mask & *(const big_register_t
                                *)(&table[k + j * elem_bytes]);
            }
        }
-        mask = word_is_zero(idx^j);
+        mask = word_is_zero(idx ^ j);
        if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-            for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+            for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
                if (elem_bytes % sizeof(word_t)) {
                    /* input unaligned, output aligned */
-                    *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
+                    *(word_t *) (out + k) |=
                        mask &
                        ((const unaligned_word_t
                          *)(&table[k + j * elem_bytes]))->unaligned;
                } else {
                    /* aligned */
-                    *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
+                    *(word_t *) (out + k) |=
                        mask & *(const word_t *)(&table[k + j * elem_bytes]);
                }
            }
        }
        if (elem_bytes % sizeof(word_t)) {
-            for (; k<elem_bytes; k+=1) {
+            for (; k < elem_bytes; k += 1) {
-                out[k] |= mask & table[k+j*elem_bytes];
+                out[k] |= mask & table[k + j * elem_bytes];
            }
        }
    }
@ -195,15 +197,12 @@ constant_time_lookup (
 * input, it must be equal and not partially overlap.
 */
 static __inline__ void
-__attribute__((unused,always_inline))
+    __attribute__ ((unused, always_inline))
-constant_time_select (
+    constant_time_select(void *a_,
    void *a_,
                     const void *bFalse_,
                     const void *bTrue_,
-    word_t elem_bytes,
+                     word_t elem_bytes, mask_t mask, size_t alignment_bytes)
-    mask_t mask,
+{
    size_t alignment_bytes
 ) {
    unsigned char *a = (unsigned char *)a_;
    const unsigned char *bTrue = (const unsigned char *)bTrue_;
    const unsigned char *bFalse = (const unsigned char *)bFalse_;
@ -212,39 +211,41 @@ constant_time_select (
    alignment_bytes |= elem_bytes;
-    for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
+    for (k = 0; k <= elem_bytes - sizeof(big_register_t);
         k += sizeof(big_register_t)) {
        if (alignment_bytes % sizeof(big_register_t)) {
            /* unaligned */
-            ((unaligned_br_t*)(&a[k]))->unaligned =
+            ((unaligned_br_t *) (&a[k]))->unaligned =
-		  ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
+                (br_mask & ((const unaligned_br_t *)(&bTrue[k]))->unaligned)
-		| (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
+                | (~br_mask &
                   ((const unaligned_br_t *)(&bFalse[k]))->unaligned);
        } else {
            /* aligned */
-            *(big_register_t *)(a+k) =
+            *(big_register_t *) (a + k) =
-		  ( br_mask & *(const big_register_t*)(&bTrue [k]))
+                (br_mask & *(const big_register_t *)(&bTrue[k]))
-		| (~br_mask & *(const big_register_t*)(&bFalse[k]));
+                | (~br_mask & *(const big_register_t *)(&bFalse[k]));
        }
    }
    if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
-        for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
+        for (; k <= elem_bytes - sizeof(word_t); k += sizeof(word_t)) {
            if (alignment_bytes % sizeof(word_t)) {
                /* unaligned */
-                ((unaligned_word_t*)(&a[k]))->unaligned =
+                ((unaligned_word_t *) (&a[k]))->unaligned =
-		    ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
+                    (mask & ((const unaligned_word_t *)(&bTrue[k]))->unaligned)
-		  | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
+                    | (~mask &
                       ((const unaligned_word_t *)(&bFalse[k]))->unaligned);
            } else {
                /* aligned */
-                *(word_t *)(a+k) =
+                *(word_t *) (a + k) = (mask & *(const word_t *)(&bTrue[k]))
-		    ( mask & *(const word_t*)(&bTrue [k]))
+                    | (~mask & *(const word_t *)(&bFalse[k]));
 		  | (~mask & *(const word_t*)(&bFalse[k]));
            }
        }
    }
    if (elem_bytes % sizeof(word_t)) {
-        for (; k<elem_bytes; k+=1) {
+        for (; k < elem_bytes; k += 1) {
-            a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
+            a[k] = (mask & bTrue[k]) | (~mask & bFalse[k]);
        }
    }
 }
--- a/crypto/ec/curve448/curve448.c
+++ b/crypto/ec/curve448/curve448.c
--- a/crypto/ec/curve448/curve448_tables.c
+++ b/crypto/ec/curve448/curve448_tables.c
--- a/crypto/ec/curve448/curve448_test.c
+++ b/crypto/ec/curve448/curve448_test.c
@ -76,20 +76,27 @@ const uint8_t out_u3[3][56] = {
     0x11, 0xd9, 0xdc, 0x14, 0xfd, 0x4b, 0xf6, 0x7a, 0xf3, 0x07, 0x65, 0xc2,
     0xae, 0x2b, 0x84, 0x6a, 0x4d, 0x23, 0xa8, 0xcd, 0x0d, 0xb8, 0x97, 0x08,
     0x62, 0x39, 0x49, 0x2c, 0xaf, 0x35, 0x0b, 0x51, 0xf8, 0x33, 0x86, 0x8b,
-        0x9b, 0xc2, 0xb3, 0xbc, 0xa9, 0xcf, 0x41, 0x13
+     0x9b, 0xc2, 0xb3, 0xbc, 0xa9, 0xcf, 0x41, 0x13}, {
-    }, {
+                                                       0xaa, 0x3b, 0x47, 0x49,
-        0xaa, 0x3b, 0x47, 0x49, 0xd5, 0x5b, 0x9d, 0xaf, 0x1e, 0x5b, 0x00, 0x28,
+                                                       0xd5, 0x5b, 0x9d, 0xaf,
-        0x88, 0x26, 0xc4, 0x67, 0x27, 0x4c, 0xe3, 0xeb, 0xbd, 0xd5, 0xc1, 0x7b,
+                                                       0x1e, 0x5b, 0x00, 0x28,
-        0x97, 0x5e, 0x09, 0xd4, 0xaf, 0x6c, 0x67, 0xcf, 0x10, 0xd0, 0x87, 0x20,
+                                                       0x88, 0x26, 0xc4, 0x67,
-        0x2d, 0xb8, 0x82, 0x86, 0xe2, 0xb7, 0x9f, 0xce, 0xea, 0x3e, 0xc3, 0x53,
+                                                       0x27, 0x4c, 0xe3, 0xeb,
-        0xef, 0x54, 0xfa, 0xa2, 0x6e, 0x21, 0x9f, 0x38
+                                                       0xbd, 0xd5, 0xc1, 0x7b,
-    }, {
+                                                       0x97, 0x5e, 0x09, 0xd4,
                                                       0xaf, 0x6c, 0x67, 0xcf,
                                                       0x10, 0xd0, 0x87, 0x20,
                                                       0x2d, 0xb8, 0x82, 0x86,
                                                       0xe2, 0xb7, 0x9f, 0xce,
                                                       0xea, 0x3e, 0xc3, 0x53,
                                                       0xef, 0x54, 0xfa, 0xa2,
                                                       0x6e, 0x21, 0x9f, 0x38},
    {
     0x07, 0x7f, 0x45, 0x36, 0x81, 0xca, 0xca, 0x36, 0x93, 0x19, 0x84, 0x20,
     0xbb, 0xe5, 0x15, 0xca, 0xe0, 0x00, 0x24, 0x72, 0x51, 0x9b, 0x3e, 0x67,
     0x66, 0x1a, 0x7e, 0x89, 0xca, 0xb9, 0x46, 0x95, 0xc8, 0xf4, 0xbc, 0xd6,
     0x6e, 0x61, 0xb9, 0xb9, 0xc9, 0x46, 0xda, 0x8d, 0x52, 0x4d, 0xe3, 0xd6,
-        0x9b, 0xd9, 0xd9, 0xd6, 0x6b, 0x99, 0x7e, 0x37
+     0x9b, 0xd9, 0xd9, 0xd6, 0x6b, 0x99, 0x7e, 0x37}
    }
 };
 /* Test vectors from RFC8032 for Ed448 */
@ -590,7 +597,6 @@ static const uint8_t *dohash(EVP_MD_CTX *hashctx, const uint8_t *msg,
    return hashout;
 }
 static int test_eddsa(void)
 {
    uint8_t outsig[114];
@ -614,7 +620,8 @@ static int test_eddsa(void)
        goto err;
    }
-    ED448_sign(outsig, msg3, sizeof(msg3), pubkey3, privkey3, context3, sizeof(context3));
+    ED448_sign(outsig, msg3, sizeof(msg3), pubkey3, privkey3, context3,
               sizeof(context3));
    if (memcmp(sig3, outsig, sizeof(sig3)) != 0) {
        printf("Calculated sig and expected sig differ (3)\n");
        goto err;
@ -683,7 +690,7 @@ int main(int argc, char *argv[])
    int j = -1;
    if (argc != 1 && (argc != 2 || strcmp(argv[1], "-f") != 0)) {
-        printf ("Usage: curve448_test [-f]\n");
+        printf("Usage: curve448_test [-f]\n");
        return 1;
    }
@ -725,7 +732,8 @@ int main(int argc, char *argv[])
        if (i == 1 || i == 1000 || i == 1000000) {
            j++;
            if (memcmp(out, out_u3[j], sizeof(out)) != 0) {
-                printf("Calculated output and expected output differ (3, %ud)\n",
+                printf
                    ("Calculated output and expected output differ (3, %ud)\n",
                     i);
                return 1;
            }
--- a/crypto/ec/curve448/curve448utils.h
+++ b/crypto/ec/curve448/curve448utils.h
@ -11,47 +11,47 @@
 */
 #ifndef __DECAF_COMMON_H__
-#define __DECAF_COMMON_H__ 1
+# define __DECAF_COMMON_H__ 1
-#include <openssl/e_os2.h>
+# include <openssl/e_os2.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
-/* Internal word types.
+/*
- *
+ * Internal word types. Somewhat tricky.  This could be decided separately per
- * Somewhat tricky.  This could be decided separately per platform.  However,
+ * platform.  However, the structs do need to be all the same size and
- * the structs do need to be all the same size and alignment on a given
+ * alignment on a given platform to support dynamic linking, since even if you
- * platform to support dynamic linking, since even if you header was built
+ * header was built with eg arch_neon, you might end up linking a library built
- * with eg arch_neon, you might end up linking a library built with arch_arm32.
+ * with arch_arm32.
 */
-#ifndef DECAF_WORD_BITS
+# ifndef DECAF_WORD_BITS
-    #if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
+#  if (defined(__ILP64__) || defined(__amd64__) || defined(__x86_64__) || (((__UINT_FAST32_MAX__)>>30)>>30))
-        #define DECAF_WORD_BITS 64 /**< The number of bits in a word */
+#   define DECAF_WORD_BITS 64      /**< The number of bits in a word */
-    #else
+#  else
-        #define DECAF_WORD_BITS 32 /**< The number of bits in a word */
+#   define DECAF_WORD_BITS 32      /**< The number of bits in a word */
-    #endif
+#  endif
-#endif
+# endif
-#if DECAF_WORD_BITS == 64
+# if DECAF_WORD_BITS == 64
 typedef uint64_t decaf_word_t;      /**< Word size for internal computations */
 typedef int64_t decaf_sword_t;      /**< Signed word size for internal computations */
 typedef uint64_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
 typedef __uint128_t decaf_dword_t;  /**< Double-word size for internal computations */
 typedef __int128_t decaf_dsword_t;  /**< Signed double-word size for internal computations */
-#elif DECAF_WORD_BITS == 32         /**< The number of bits in a word */
+# elif DECAF_WORD_BITS == 32        /**< The number of bits in a word */
 typedef uint32_t decaf_word_t;      /**< Word size for internal computations */
 typedef int32_t decaf_sword_t;      /**< Signed word size for internal computations */
 typedef uint32_t decaf_bool_t;      /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
 typedef uint64_t decaf_dword_t;     /**< Double-word size for internal computations */
 typedef int64_t decaf_dsword_t;     /**< Signed double-word size for internal computations */
-#else
+# else
-#error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
+#  error "Only supporting DECAF_WORD_BITS = 32 or 64 for now"
-#endif
+# endif
 /** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */
-static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t)1;
+static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t) 1;
 /** DECAF_FALSE = 0 so that DECAF_FALSE & x = 0 */
 static const decaf_bool_t DECAF_FALSE = 0;
@ -62,18 +62,17 @@ typedef enum {
    DECAF_FAILURE = 0   /**< The operation failed. */
 } decaf_error_t;
 /** Return success if x is true */
-static ossl_inline decaf_error_t
+static ossl_inline decaf_error_t decaf_succeed_if(decaf_bool_t x)
-decaf_succeed_if(decaf_bool_t x) {
+{
-    return (decaf_error_t)x;
+    return (decaf_error_t) x;
 }
 /** Return DECAF_TRUE iff x == DECAF_SUCCESS */
-static ossl_inline decaf_bool_t
+static ossl_inline decaf_bool_t decaf_successful(decaf_error_t e)
-decaf_successful(decaf_error_t e) {
+{
-    decaf_dword_t w = ((decaf_word_t)e) ^  ((decaf_word_t)DECAF_SUCCESS);
+    decaf_dword_t w = ((decaf_word_t) e) ^ ((decaf_word_t) DECAF_SUCCESS);
-    return (w-1)>>DECAF_WORD_BITS;
+    return (w - 1) >> DECAF_WORD_BITS;
 }
 #ifdef __cplusplus
--- a/crypto/ec/curve448/ed448.h
+++ b/crypto/ec/curve448/ed448.h
@ -11,31 +11,31 @@
 */
 #ifndef __DECAF_ED448_H__
-#define __DECAF_ED448_H__ 1
+# define __DECAF_ED448_H__ 1
-#include "point_448.h"
+# include "point_448.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /** Number of bytes in an EdDSA public key. */
-#define DECAF_EDDSA_448_PUBLIC_BYTES 57
+# define DECAF_EDDSA_448_PUBLIC_BYTES 57
 /** Number of bytes in an EdDSA private key. */
-#define DECAF_EDDSA_448_PRIVATE_BYTES DECAF_EDDSA_448_PUBLIC_BYTES
+# define DECAF_EDDSA_448_PRIVATE_BYTES DECAF_EDDSA_448_PUBLIC_BYTES
 /** Number of bytes in an EdDSA private key. */
-#define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + DECAF_EDDSA_448_PRIVATE_BYTES)
+# define DECAF_EDDSA_448_SIGNATURE_BYTES (DECAF_EDDSA_448_PUBLIC_BYTES + DECAF_EDDSA_448_PRIVATE_BYTES)
 /** Does EdDSA support non-contextual signatures? */
-#define DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS 0
+# define DECAF_EDDSA_448_SUPPORTS_CONTEXTLESS_SIGS 0
 /** EdDSA encoding ratio. */
-#define DECAF_448_EDDSA_ENCODE_RATIO 4
+# define DECAF_448_EDDSA_ENCODE_RATIO 4
 /** EdDSA decoding ratio. */
-#define DECAF_448_EDDSA_DECODE_RATIO (4 / 4)
+# define DECAF_448_EDDSA_DECODE_RATIO (4 / 4)
 /**
 * @brief EdDSA key generation.  This function uses a different (non-Decaf)
@ -44,10 +44,13 @@ extern "C" {
 * @param [out] pubkey The public key.
 * @param [in] privkey The private key.
 */
-decaf_error_t decaf_ed448_derive_public_key (
+decaf_error_t decaf_ed448_derive_public_key(uint8_t
-    uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                            pubkey
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES]
+                                            [DECAF_EDDSA_448_PUBLIC_BYTES],
-);
+                                            const uint8_t
                                            privkey
                                            [DECAF_EDDSA_448_PRIVATE_BYTES]
    );
 /**
 * @brief EdDSA signing.
@ -66,16 +69,16 @@ decaf_error_t decaf_ed448_derive_public_key (
 * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
 * you no seat belt.
 */
-decaf_error_t decaf_ed448_sign (
+decaf_error_t decaf_ed448_sign(uint8_t
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                               signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+                               const uint8_t
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                               privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-    const uint8_t *message,
+                               const uint8_t
-    size_t message_len,
+                               pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    uint8_t prehashed,
+                               const uint8_t *message, size_t message_len,
-    const uint8_t *context,
+                               uint8_t prehashed, const uint8_t *context,
-    size_t context_len
+                               size_t context_len)
-) __attribute__((nonnull(1,2,3)));
+    __attribute__ ((nonnull(1, 2, 3)));
 /**
 * @brief EdDSA signing with prehash.
@ -92,14 +95,17 @@ decaf_error_t decaf_ed448_sign (
 * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
 * you no seat belt.
 */
-decaf_error_t decaf_ed448_sign_prehash (
+decaf_error_t decaf_ed448_sign_prehash(uint8_t
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                                       signature
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+                                       [DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                       const uint8_t
                                       privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
                                       const uint8_t
                                       pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
                                       const uint8_t hash[64],
                                       const uint8_t *context,
-    size_t context_len
+                                       size_t context_len)
-) __attribute__((nonnull(1,2,3,4)));
+    __attribute__ ((nonnull(1, 2, 3, 4)));
 /**
 * @brief EdDSA signature verification.
@ -119,15 +125,14 @@ decaf_error_t decaf_ed448_sign_prehash (
 * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
 * you no seat belt.
 */
-decaf_error_t decaf_ed448_verify (
+decaf_error_t decaf_ed448_verify(const uint8_t
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                                 signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                 const uint8_t
-    const uint8_t *message,
+                                 pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    size_t message_len,
+                                 const uint8_t *message, size_t message_len,
-    uint8_t prehashed,
+                                 uint8_t prehashed, const uint8_t *context,
-    const uint8_t *context,
+                                 uint8_t context_len)
-    uint8_t context_len
+    __attribute__ ((nonnull(1, 2)));
 ) __attribute__((nonnull(1,2)));
 /**
 * @brief EdDSA signature verification.
@ -145,13 +150,15 @@ decaf_error_t decaf_ed448_verify (
 * safe.  The C++ wrapper is designed to make it harder to screw this up, but this C code gives
 * you no seat belt.
 */
-decaf_error_t decaf_ed448_verify_prehash (
+decaf_error_t decaf_ed448_verify_prehash(const uint8_t
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                                         signature
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                         [DECAF_EDDSA_448_SIGNATURE_BYTES],
                                         const uint8_t
                                         pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
                                         const uint8_t hash[64],
                                         const uint8_t *context,
-    uint8_t context_len
+                                         uint8_t context_len)
-) __attribute__((nonnull(1,2)));
+    __attribute__ ((nonnull(1, 2)));
 /**
 * @brief EdDSA point encoding.  Used internally, exposed externally.
@ -177,10 +184,11 @@ decaf_error_t decaf_ed448_verify_prehash (
 * @param [out] enc The encoded point.
 * @param [in] p The point.
 */
-void curve448_point_mul_by_ratio_and_encode_like_eddsa (
+void curve448_point_mul_by_ratio_and_encode_like_eddsa(uint8_t
-    uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                                       enc
-    const curve448_point_t p
+                                                       [DECAF_EDDSA_448_PUBLIC_BYTES],
-);
+                                                       const curve448_point_t
                                                       p);
 /**
 * @brief EdDSA point decoding.  Multiplies by DECAF_448_EDDSA_DECODE_RATIO,
@ -191,10 +199,12 @@ void curve448_point_mul_by_ratio_and_encode_like_eddsa (
 * @param [out] enc The encoded point.
 * @param [in] p The point.
 */
-decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio (
+decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio(curve448_point_t
-    curve448_point_t p,
+                                                                p,
-    const uint8_t enc[DECAF_EDDSA_448_PUBLIC_BYTES]
+                                                                const uint8_t
-);
+                                                                enc
                                                                [DECAF_EDDSA_448_PUBLIC_BYTES]
    );
 /**
 * @brief EdDSA to ECDH public key conversion
@ -207,10 +217,10 @@ decaf_error_t curve448_point_decode_like_eddsa_and_mul_by_ratio (
 * @param[out] x The ECDH public key as in RFC7748(point on Montgomery curve)
 * @param[in] ed The EdDSA public key(point on Edwards curve)
 */
-void decaf_ed448_convert_public_key_to_x448 (
+void decaf_ed448_convert_public_key_to_x448(uint8_t x[DECAF_X448_PUBLIC_BYTES],
-    uint8_t x[DECAF_X448_PUBLIC_BYTES],
+                                            const uint8_t
-    const uint8_t ed[DECAF_EDDSA_448_PUBLIC_BYTES]
+                                            ed[DECAF_EDDSA_448_PUBLIC_BYTES]
-);
+    );
 /**
 * @brief EdDSA to ECDH private key conversion
@ -220,10 +230,13 @@ void decaf_ed448_convert_public_key_to_x448 (
 * @param[out] x The ECDH private key as in RFC7748
 * @param[in] ed The EdDSA private key
 */
-decaf_error_t decaf_ed448_convert_private_key_to_x448 (
+decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
-    uint8_t x[DECAF_X448_PRIVATE_BYTES],
+                                                      x
-    const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]
+                                                      [DECAF_X448_PRIVATE_BYTES],
-);
+                                                      const uint8_t
                                                      ed
                                                      [DECAF_EDDSA_448_PRIVATE_BYTES]
    );
 #ifdef __cplusplus
 } /* extern "C" */
--- a/crypto/ec/curve448/eddsa.c
+++ b/crypto/ec/curve448/eddsa.c
@ -27,12 +27,12 @@
 #if NO_CONTEXT
 const uint8_t NO_CONTEXT_POINTS_HERE = 0;
-const uint8_t * const DECAF_ED448_NO_CONTEXT = &NO_CONTEXT_POINTS_HERE;
+const uint8_t *const DECAF_ED448_NO_CONTEXT = &NO_CONTEXT_POINTS_HERE;
 #endif
-/* EDDSA_BASE_POINT_RATIO = 1 or 2
+/*
- * Because EdDSA25519 is not on E_d but on the isogenous E_sigma_d,
+ * EDDSA_BASE_POINT_RATIO = 1 or 2 Because EdDSA25519 is not on E_d but on the
- * its base point is twice ours.
+ * isogenous E_sigma_d, its base point is twice ours.
 */
 #define EDDSA_BASE_POINT_RATIO (1+EDDSA_USE_SIGMA_ISOGENY) /* TODO: remove */
@ -55,11 +55,10 @@ static decaf_error_t oneshot_hash(uint8_t *out, size_t outlen,
    return DECAF_SUCCESS;
 }
-
+static void clamp(uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES]
-static void clamp (
+    )
-    uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES]
+{
-) {
+    uint8_t hibit = (1 << 0) >> 1;
    uint8_t hibit = (1<<0)>>1;
    /* Blarg */
    secret_scalar_ser[0] &= -COFACTOR;
@ -67,18 +66,17 @@ static void clamp (
        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] = 0;
        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 2] |= 0x80;
    } else {
-        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] &= hibit-1;
+        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] &= hibit - 1;
        secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES - 1] |= hibit;
    }
 }
-static decaf_error_t hash_init_with_dom(
+static decaf_error_t hash_init_with_dom(EVP_MD_CTX *hashctx,
    EVP_MD_CTX *hashctx,
                                        uint8_t prehashed,
                                        uint8_t for_prehash,
                                        const uint8_t *context,
-    size_t context_len
+                                        size_t context_len)
-) {
+{
    const char *dom_s = "SigEd448";
    uint8_t dom[2];
@ -108,24 +106,29 @@ static decaf_error_t hash_init_with_dom(
 }
 /* In this file because it uses the hash */
-decaf_error_t decaf_ed448_convert_private_key_to_x448 (
+decaf_error_t decaf_ed448_convert_private_key_to_x448(uint8_t
-    uint8_t x[DECAF_X448_PRIVATE_BYTES],
+                                                      x
-    const uint8_t ed[DECAF_EDDSA_448_PRIVATE_BYTES]
+                                                      [DECAF_X448_PRIVATE_BYTES],
-) {
+                                                      const uint8_t
                                                      ed
                                                      [DECAF_EDDSA_448_PRIVATE_BYTES]
    )
 {
    /* pass the private key through oneshot_hash function */
    /* and keep the first DECAF_X448_PRIVATE_BYTES bytes */
-    return oneshot_hash(
+    return oneshot_hash(x,
        x,
                        DECAF_X448_PRIVATE_BYTES,
-        ed,
+                        ed, DECAF_EDDSA_448_PRIVATE_BYTES);
        DECAF_EDDSA_448_PRIVATE_BYTES
    );
 }
-decaf_error_t decaf_ed448_derive_public_key (
+decaf_error_t decaf_ed448_derive_public_key(uint8_t
-    uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                            pubkey
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES]
+                                            [DECAF_EDDSA_448_PUBLIC_BYTES],
-) {
+                                            const uint8_t
                                            privkey
                                            [DECAF_EDDSA_448_PRIVATE_BYTES]
    )
 {
    /* only this much used for keygen */
    uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
    curve448_scalar_t secret_scalar;
@ -138,19 +141,22 @@ decaf_error_t decaf_ed448_derive_public_key (
    }
    clamp(secret_scalar_ser);
-    curve448_scalar_decode_long(secret_scalar, secret_scalar_ser, sizeof(secret_scalar_ser));
+    curve448_scalar_decode_long(secret_scalar, secret_scalar_ser,
                                sizeof(secret_scalar_ser));
-    /* Since we are going to mul_by_cofactor during encoding, divide by it here.
+    /*
-     * However, the EdDSA base point is not the same as the decaf base point if
+     * Since we are going to mul_by_cofactor during encoding, divide by it
-     * the sigma isogeny is in use: the EdDSA base point is on Etwist_d/(1-d) and
+     * here. However, the EdDSA base point is not the same as the decaf base
-     * the decaf base point is on Etwist_d, and when converted it effectively
+     * point if the sigma isogeny is in use: the EdDSA base point is on
-     * picks up a factor of 2 from the isogenies.  So we might start at 2 instead of 1. 
+     * Etwist_d/(1-d) and the decaf base point is on Etwist_d, and when
     * converted it effectively picks up a factor of 2 from the isogenies.  So
     * we might start at 2 instead of 1.
     */
-    for (c=1; c<DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
+    for (c = 1; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
-        curve448_scalar_halve(secret_scalar,secret_scalar);
+        curve448_scalar_halve(secret_scalar, secret_scalar);
    }
-    curve448_precomputed_scalarmul(p,curve448_precomputed_base,secret_scalar);
+    curve448_precomputed_scalarmul(p, curve448_precomputed_base, secret_scalar);
    curve448_point_mul_by_ratio_and_encode_like_eddsa(pubkey, p);
@ -162,21 +168,21 @@ decaf_error_t decaf_ed448_derive_public_key (
    return DECAF_SUCCESS;
 }
-decaf_error_t decaf_ed448_sign (
+decaf_error_t decaf_ed448_sign(uint8_t
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                               signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+                               const uint8_t
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                               privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
-    const uint8_t *message,
+                               const uint8_t
-    size_t message_len,
+                               pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    uint8_t prehashed,
+                               const uint8_t *message, size_t message_len,
-    const uint8_t *context,
+                               uint8_t prehashed, const uint8_t *context,
-    size_t context_len
+                               size_t context_len)
-) {
+{
    curve448_scalar_t secret_scalar;
    EVP_MD_CTX *hashctx = EVP_MD_CTX_new();
    decaf_error_t ret = DECAF_FAILURE;
    curve448_scalar_t nonce_scalar;
-    uint8_t nonce_point[DECAF_EDDSA_448_PUBLIC_BYTES] = {0};
+    uint8_t nonce_point[DECAF_EDDSA_448_PUBLIC_BYTES] = { 0 };
    unsigned int c;
    curve448_scalar_t challenge_scalar;
@ -188,18 +194,18 @@ decaf_error_t decaf_ed448_sign (
        struct {
            uint8_t secret_scalar_ser[DECAF_EDDSA_448_PRIVATE_BYTES];
            uint8_t seed[DECAF_EDDSA_448_PRIVATE_BYTES];
-        } __attribute__((packed)) expanded;
+        } __attribute__ ((packed)) expanded;
        if (!oneshot_hash((uint8_t *)&expanded, sizeof(expanded), privkey,
                          DECAF_EDDSA_448_PRIVATE_BYTES))
            goto err;
        clamp(expanded.secret_scalar_ser);
-        curve448_scalar_decode_long(secret_scalar, expanded.secret_scalar_ser, sizeof(expanded.secret_scalar_ser));
+        curve448_scalar_decode_long(secret_scalar, expanded.secret_scalar_ser,
                                    sizeof(expanded.secret_scalar_ser));
        /* Hash to create the nonce */
        if (!hash_init_with_dom(hashctx, prehashed, 0, context, context_len)
-                || !EVP_DigestUpdate(hashctx, expanded.seed,
+            || !EVP_DigestUpdate(hashctx, expanded.seed, sizeof(expanded.seed))
                                     sizeof(expanded.seed))
            || !EVP_DigestUpdate(hashctx, message, message_len)) {
            OPENSSL_cleanse(&expanded, sizeof(expanded));
            goto err;
@ -209,7 +215,7 @@ decaf_error_t decaf_ed448_sign (
    /* Decode the nonce */
    {
-        uint8_t nonce[2*DECAF_EDDSA_448_PRIVATE_BYTES];
+        uint8_t nonce[2 * DECAF_EDDSA_448_PRIVATE_BYTES];
        if (!EVP_DigestFinalXOF(hashctx, nonce, sizeof(nonce)))
            goto err;
@ -222,39 +228,41 @@ decaf_error_t decaf_ed448_sign (
        curve448_scalar_t nonce_scalar_2;
        curve448_point_t p;
-        curve448_scalar_halve(nonce_scalar_2,nonce_scalar);
+        curve448_scalar_halve(nonce_scalar_2, nonce_scalar);
        for (c = 2; c < DECAF_448_EDDSA_ENCODE_RATIO; c <<= 1) {
-            curve448_scalar_halve(nonce_scalar_2,nonce_scalar_2);
+            curve448_scalar_halve(nonce_scalar_2, nonce_scalar_2);
        }
-        curve448_precomputed_scalarmul(p,curve448_precomputed_base,nonce_scalar_2);
+        curve448_precomputed_scalarmul(p, curve448_precomputed_base,
                                       nonce_scalar_2);
        curve448_point_mul_by_ratio_and_encode_like_eddsa(nonce_point, p);
        curve448_point_destroy(p);
        curve448_scalar_destroy(nonce_scalar_2);
    }
    {
-        uint8_t challenge[2*DECAF_EDDSA_448_PRIVATE_BYTES];
+        uint8_t challenge[2 * DECAF_EDDSA_448_PRIVATE_BYTES];
        /* Compute the challenge */
        if (!hash_init_with_dom(hashctx, prehashed, 0, context, context_len)
            || !EVP_DigestUpdate(hashctx, nonce_point, sizeof(nonce_point))
-                || !EVP_DigestUpdate(hashctx, pubkey,
+            || !EVP_DigestUpdate(hashctx, pubkey, DECAF_EDDSA_448_PUBLIC_BYTES)
                                     DECAF_EDDSA_448_PUBLIC_BYTES)
            || !EVP_DigestUpdate(hashctx, message, message_len)
            || !EVP_DigestFinalXOF(hashctx, challenge, sizeof(challenge)))
            goto err;
-        curve448_scalar_decode_long(challenge_scalar,challenge,sizeof(challenge));
+        curve448_scalar_decode_long(challenge_scalar, challenge,
-        OPENSSL_cleanse(challenge,sizeof(challenge));
+                                    sizeof(challenge));
        OPENSSL_cleanse(challenge, sizeof(challenge));
    }
-    curve448_scalar_mul(challenge_scalar,challenge_scalar,secret_scalar);
+    curve448_scalar_mul(challenge_scalar, challenge_scalar, secret_scalar);
-    curve448_scalar_add(challenge_scalar,challenge_scalar,nonce_scalar);
+    curve448_scalar_add(challenge_scalar, challenge_scalar, nonce_scalar);
-    OPENSSL_cleanse(signature,DECAF_EDDSA_448_SIGNATURE_BYTES);
+    OPENSSL_cleanse(signature, DECAF_EDDSA_448_SIGNATURE_BYTES);
-    memcpy(signature,nonce_point,sizeof(nonce_point));
+    memcpy(signature, nonce_point, sizeof(nonce_point));
-    curve448_scalar_encode(&signature[DECAF_EDDSA_448_PUBLIC_BYTES],challenge_scalar);
+    curve448_scalar_encode(&signature[DECAF_EDDSA_448_PUBLIC_BYTES],
                           challenge_scalar);
    curve448_scalar_destroy(secret_scalar);
    curve448_scalar_destroy(nonce_scalar);
@ -266,52 +274,59 @@ decaf_error_t decaf_ed448_sign (
    return ret;
 }
-
+decaf_error_t decaf_ed448_sign_prehash(uint8_t
-decaf_error_t decaf_ed448_sign_prehash (
+                                       signature
-    uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                                       [DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
+                                       const uint8_t
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                       privkey[DECAF_EDDSA_448_PRIVATE_BYTES],
                                       const uint8_t
                                       pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
                                       const uint8_t hash[64],
                                       const uint8_t *context,
-    size_t context_len
+                                       size_t context_len)
-) {
+{
-    return decaf_ed448_sign(signature,privkey,pubkey,hash,64,1,context,
+    return decaf_ed448_sign(signature, privkey, pubkey, hash, 64, 1, context,
                            context_len);
-    /*OPENSSL_cleanse(hash,sizeof(hash));*/
+    /*
     * OPENSSL_cleanse(hash,sizeof(hash));
     */
 }
-decaf_error_t decaf_ed448_verify (
+decaf_error_t decaf_ed448_verify(const uint8_t
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                                 signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                 const uint8_t
-    const uint8_t *message,
+                                 pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
-    size_t message_len,
+                                 const uint8_t *message, size_t message_len,
-    uint8_t prehashed,
+                                 uint8_t prehashed, const uint8_t *context,
-    const uint8_t *context,
+                                 uint8_t context_len)
-    uint8_t context_len
+{
 ) { 
    curve448_point_t pk_point, r_point;
-    decaf_error_t error = curve448_point_decode_like_eddsa_and_mul_by_ratio(pk_point,pubkey);
+    decaf_error_t error =
        curve448_point_decode_like_eddsa_and_mul_by_ratio(pk_point, pubkey);
    curve448_scalar_t challenge_scalar;
    curve448_scalar_t response_scalar;
    unsigned int c;
-    if (DECAF_SUCCESS != error) { return error; }
+    if (DECAF_SUCCESS != error) {
        return error;
    }
-    error = curve448_point_decode_like_eddsa_and_mul_by_ratio(r_point,signature);
+    error =
-    if (DECAF_SUCCESS != error) { return error; }
+        curve448_point_decode_like_eddsa_and_mul_by_ratio(r_point, signature);
    if (DECAF_SUCCESS != error) {
        return error;
    }
    {
        /* Compute the challenge */
        EVP_MD_CTX *hashctx = EVP_MD_CTX_new();
-        uint8_t challenge[2*DECAF_EDDSA_448_PRIVATE_BYTES];
+        uint8_t challenge[2 * DECAF_EDDSA_448_PRIVATE_BYTES];
        if (hashctx == NULL
-                || !hash_init_with_dom(hashctx, prehashed, 0, context,
+            || !hash_init_with_dom(hashctx, prehashed, 0, context, context_len)
                                       context_len)
            || !EVP_DigestUpdate(hashctx, signature,
                                 DECAF_EDDSA_448_PUBLIC_BYTES)
-                || !EVP_DigestUpdate(hashctx, pubkey,
+            || !EVP_DigestUpdate(hashctx, pubkey, DECAF_EDDSA_448_PUBLIC_BYTES)
                                     DECAF_EDDSA_448_PUBLIC_BYTES)
            || !EVP_DigestUpdate(hashctx, message, message_len)
            || !EVP_DigestFinalXOF(hashctx, challenge, sizeof(challenge))) {
            EVP_MD_CTX_free(hashctx);
@ -319,43 +334,42 @@ decaf_error_t decaf_ed448_verify (
        }
        EVP_MD_CTX_free(hashctx);
-        curve448_scalar_decode_long(challenge_scalar,challenge,sizeof(challenge));
+        curve448_scalar_decode_long(challenge_scalar, challenge,
-        OPENSSL_cleanse(challenge,sizeof(challenge));
+                                    sizeof(challenge));
        OPENSSL_cleanse(challenge, sizeof(challenge));
    }
-    curve448_scalar_sub(challenge_scalar, curve448_scalar_zero, challenge_scalar);
+    curve448_scalar_sub(challenge_scalar, curve448_scalar_zero,
                        challenge_scalar);
-    curve448_scalar_decode_long(
+    curve448_scalar_decode_long(response_scalar,
        response_scalar,
                                &signature[DECAF_EDDSA_448_PUBLIC_BYTES],
-        DECAF_EDDSA_448_PRIVATE_BYTES
+                                DECAF_EDDSA_448_PRIVATE_BYTES);
    );
-    for (c=1; c<DECAF_448_EDDSA_DECODE_RATIO; c<<=1) {
+    for (c = 1; c < DECAF_448_EDDSA_DECODE_RATIO; c <<= 1) {
-        curve448_scalar_add(response_scalar,response_scalar,response_scalar);
+        curve448_scalar_add(response_scalar, response_scalar, response_scalar);
    }
    /* pk_point = -c(x(P)) + (cx + k)G = kG */
-    curve448_base_double_scalarmul_non_secret(
+    curve448_base_double_scalarmul_non_secret(pk_point,
        pk_point,
                                              response_scalar,
-        pk_point,
+                                              pk_point, challenge_scalar);
-        challenge_scalar
+    return decaf_succeed_if(curve448_point_eq(pk_point, r_point));
    );
    return decaf_succeed_if(curve448_point_eq(pk_point,r_point));
 }
-
+decaf_error_t decaf_ed448_verify_prehash(const uint8_t
-decaf_error_t decaf_ed448_verify_prehash (
+                                         signature
-    const uint8_t signature[DECAF_EDDSA_448_SIGNATURE_BYTES],
+                                         [DECAF_EDDSA_448_SIGNATURE_BYTES],
-    const uint8_t pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
+                                         const uint8_t
                                         pubkey[DECAF_EDDSA_448_PUBLIC_BYTES],
                                         const uint8_t hash[64],
                                         const uint8_t *context,
-    uint8_t context_len
+                                         uint8_t context_len)
-) {
+{
    decaf_error_t ret;
-    ret = decaf_ed448_verify(signature,pubkey,hash,64,1,context,context_len);
+    ret =
        decaf_ed448_verify(signature, pubkey, hash, 64, 1, context,
                           context_len);
    return ret;
 }
@ -370,7 +384,6 @@ int ED448_sign(uint8_t *out_sig, const uint8_t *message, size_t message_len,
        == DECAF_SUCCESS;
 }
 int ED448_verify(const uint8_t *message, size_t message_len,
                 const uint8_t signature[114], const uint8_t public_key[57],
                 const uint8_t *context, size_t context_len)
--- a/crypto/ec/curve448/f_arithmetic.c
+++ b/crypto/ec/curve448/f_arithmetic.c
@ -12,37 +12,35 @@
 #include "field.h"
-mask_t gf_isr (
+mask_t gf_isr(gf a, const gf x)
-    gf a,
+{
    const gf x
 ) {
    gf L0, L1, L2;
-    gf_sqr  (L1,     x );
+    gf_sqr(L1, x);
-    gf_mul  (L2,     x,   L1 );
+    gf_mul(L2, x, L1);
-    gf_sqr  (L1,   L2 );
+    gf_sqr(L1, L2);
-    gf_mul  (L2,     x,   L1 );
+    gf_mul(L2, x, L1);
-    gf_sqrn (L1,   L2,     3 );
+    gf_sqrn(L1, L2, 3);
-    gf_mul  (L0,   L2,   L1 );
+    gf_mul(L0, L2, L1);
-    gf_sqrn (L1,   L0,     3 );
+    gf_sqrn(L1, L0, 3);
-    gf_mul  (L0,   L2,   L1 );
+    gf_mul(L0, L2, L1);
-    gf_sqrn (L2,   L0,     9 );
+    gf_sqrn(L2, L0, 9);
-    gf_mul  (L1,   L0,   L2 );
+    gf_mul(L1, L0, L2);
-    gf_sqr  (L0,   L1 );
+    gf_sqr(L0, L1);
-    gf_mul  (L2,     x,   L0 );
+    gf_mul(L2, x, L0);
-    gf_sqrn (L0,   L2,    18 );
+    gf_sqrn(L0, L2, 18);
-    gf_mul  (L2,   L1,   L0 );
+    gf_mul(L2, L1, L0);
-    gf_sqrn (L0,   L2,    37 );
+    gf_sqrn(L0, L2, 37);
-    gf_mul  (L1,   L2,   L0 );
+    gf_mul(L1, L2, L0);
-    gf_sqrn (L0,   L1,    37 );
+    gf_sqrn(L0, L1, 37);
-    gf_mul  (L1,   L2,   L0 );
+    gf_mul(L1, L2, L0);
-    gf_sqrn (L0,   L1,   111 );
+    gf_sqrn(L0, L1, 111);
-    gf_mul  (L2,   L1,   L0 );
+    gf_mul(L2, L1, L0);
-    gf_sqr  (L0,   L2 );
+    gf_sqr(L0, L2);
-    gf_mul  (L1,     x,   L0 );
+    gf_mul(L1, x, L0);
-    gf_sqrn (L0,   L1,   223 );
+    gf_sqrn(L0, L1, 223);
-    gf_mul  (L1,   L2,   L0 );
+    gf_mul(L1, L2, L0);
-    gf_sqr  (L2, L1);
+    gf_sqr(L2, L1);
-    gf_mul  (L0, L2, x);
+    gf_mul(L0, L2, x);
-    gf_copy(a,L1);
+    gf_copy(a, L1);
-    return gf_eq(L0,ONE);
+    return gf_eq(L0, ONE);
 }
--- a/crypto/ec/curve448/f_field.h
+++ b/crypto/ec/curve448/f_field.h
@ -11,91 +11,97 @@
 */
 #ifndef __P448_F_FIELD_H__
-#define __P448_F_FIELD_H__ 1
+# define __P448_F_FIELD_H__ 1
-#include "constant_time.h"
+# include "constant_time.h"
-#include <string.h>
+# include <string.h>
-#include <assert.h>
+# include <assert.h>
-#include "word.h"
+# include "word.h"
-#define __DECAF_448_GF_DEFINED__ 1
+# define __DECAF_448_GF_DEFINED__ 1
-#define NLIMBS (64/sizeof(word_t))
+# define NLIMBS (64/sizeof(word_t))
-#define X_SER_BYTES 56
+# define X_SER_BYTES 56
-#define SER_BYTES 56
+# define SER_BYTES 56
 typedef struct gf_448_s {
    word_t limb[NLIMBS];
-} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
+} __attribute__ ((aligned(32))) gf_448_s, gf_448_t[1];
-#define GF_LIT_LIMB_BITS  56
+# define GF_LIT_LIMB_BITS  56
-#define GF_BITS           448
+# define GF_BITS           448
-#define ZERO              gf_448_ZERO
+# define ZERO              gf_448_ZERO
-#define ONE               gf_448_ONE
+# define ONE               gf_448_ONE
-#define MODULUS           gf_448_MODULUS
+# define MODULUS           gf_448_MODULUS
-#define gf                gf_448_t
+# define gf                gf_448_t
-#define gf_s              gf_448_s
+# define gf_s              gf_448_s
-#define gf_eq             gf_448_eq
+# define gf_eq             gf_448_eq
-#define gf_hibit          gf_448_hibit
+# define gf_hibit          gf_448_hibit
-#define gf_lobit          gf_448_lobit
+# define gf_lobit          gf_448_lobit
-#define gf_copy           gf_448_copy
+# define gf_copy           gf_448_copy
-#define gf_add            gf_448_add
+# define gf_add            gf_448_add
-#define gf_sub            gf_448_sub
+# define gf_sub            gf_448_sub
-#define gf_add_RAW        gf_448_add_RAW
+# define gf_add_RAW        gf_448_add_RAW
-#define gf_sub_RAW        gf_448_sub_RAW
+# define gf_sub_RAW        gf_448_sub_RAW
-#define gf_bias           gf_448_bias
+# define gf_bias           gf_448_bias
-#define gf_weak_reduce    gf_448_weak_reduce
+# define gf_weak_reduce    gf_448_weak_reduce
-#define gf_strong_reduce  gf_448_strong_reduce
+# define gf_strong_reduce  gf_448_strong_reduce
-#define gf_mul            gf_448_mul
+# define gf_mul            gf_448_mul
-#define gf_sqr            gf_448_sqr
+# define gf_sqr            gf_448_sqr
-#define gf_mulw_unsigned  gf_448_mulw_unsigned
+# define gf_mulw_unsigned  gf_448_mulw_unsigned
-#define gf_isr            gf_448_isr
+# define gf_isr            gf_448_isr
-#define gf_serialize      gf_448_serialize
+# define gf_serialize      gf_448_serialize
-#define gf_deserialize    gf_448_deserialize
+# define gf_deserialize    gf_448_deserialize
 /* RFC 7748 support */
-#define X_PUBLIC_BYTES  X_SER_BYTES
+# define X_PUBLIC_BYTES  X_SER_BYTES
-#define X_PRIVATE_BYTES X_PUBLIC_BYTES
+# define X_PRIVATE_BYTES X_PUBLIC_BYTES
-#define X_PRIVATE_BITS  448
+# define X_PRIVATE_BITS  448
-#define INLINE_UNUSED __inline__ __attribute__((unused,always_inline))
+# define INLINE_UNUSED __inline__ __attribute__((unused,always_inline))
 #ifdef __cplusplus
 extern "C" {
 #endif
 /* Defined below in f_impl.h */
-static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; }
+static INLINE_UNUSED void gf_copy(gf out, const gf a)
-static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b);
+{
-static INLINE_UNUSED void gf_sub_RAW (gf out, const gf a, const gf b);
+    *out = *a;
-static INLINE_UNUSED void gf_bias (gf inout, int amount);
+}
 static INLINE_UNUSED void gf_weak_reduce (gf inout);
-void gf_strong_reduce (gf inout);   
+static INLINE_UNUSED void gf_add_RAW(gf out, const gf a, const gf b);
-void gf_add (gf out, const gf a, const gf b);
+static INLINE_UNUSED void gf_sub_RAW(gf out, const gf a, const gf b);
-void gf_sub (gf out, const gf a, const gf b);
+static INLINE_UNUSED void gf_bias(gf inout, int amount);
-void gf_mul (gf_s *__restrict__ out, const gf a, const gf b);
+static INLINE_UNUSED void gf_weak_reduce(gf inout);
-void gf_mulw_unsigned (gf_s *__restrict__ out, const gf a, uint32_t b);
+
-void gf_sqr (gf_s *__restrict__ out, const gf a);
+void gf_strong_reduce(gf inout);
 void gf_add(gf out, const gf a, const gf b);
 void gf_sub(gf out, const gf a, const gf b);
 void gf_mul(gf_s * __restrict__ out, const gf a, const gf b);
 void gf_mulw_unsigned(gf_s * __restrict__ out, const gf a, uint32_t b);
 void gf_sqr(gf_s * __restrict__ out, const gf a);
 mask_t gf_isr(gf a, const gf x); /** a^2 x = 1, QNR, or 0 if x=0.  Return true if successful */
-mask_t gf_eq (const gf x, const gf y);
+mask_t gf_eq(const gf x, const gf y);
-mask_t gf_lobit (const gf x);
+mask_t gf_lobit(const gf x);
-mask_t gf_hibit (const gf x);
+mask_t gf_hibit(const gf x);
-void gf_serialize (uint8_t *serial, const gf x,int with_highbit);
+void gf_serialize(uint8_t *serial, const gf x, int with_highbit);
-mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES],int with_hibit,uint8_t hi_nmask);
+mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
                      uint8_t hi_nmask);
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
-#include "f_impl.h" /* Bring in the inline implementations */
+# include "f_impl.h"            /* Bring in the inline implementations */
-#ifndef LIMBPERM
+# ifndef LIMBPERM
-  #define LIMBPERM(i) (i)
+#  define LIMBPERM(i) (i)
-#endif
+# endif
-#define LIMB_MASK(i) (((1)<<LIMB_PLACE_VALUE(i))-1)
+# define LIMB_MASK(i) (((1)<<LIMB_PLACE_VALUE(i))-1)
-static const gf ZERO = {{{0}}}, ONE = {{{1}}};
+static const gf ZERO = { {{0}} }, ONE = { { {
 1}}};
 #endif                          /* __P448_F_FIELD_H__ */
--- a/crypto/ec/curve448/f_generic.c
+++ b/crypto/ec/curve448/f_generic.c
@ -11,24 +11,29 @@
 */
 #include "field.h"
-static const gf MODULUS = {FIELD_LITERAL(
+static const gf MODULUS =
-    0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff
+    { FIELD_LITERAL(0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
-)};
+                    0xffffffffffffff, 0xfffffffffffffe, 0xffffffffffffff,
                    0xffffffffffffff, 0xffffffffffffff)
 };
 /** Serialize to wire format. */
-void gf_serialize (uint8_t serial[SER_BYTES], const gf x, int with_hibit) {
+void gf_serialize(uint8_t serial[SER_BYTES], const gf x, int with_hibit)
-    unsigned int j=0, fill=0;
+{
    unsigned int j = 0, fill = 0;
    dword_t buffer = 0;
    unsigned int i;
    gf red;
    gf_copy(red, x);
    gf_strong_reduce(red);
-    if (!with_hibit) { assert(gf_hibit(red) == 0); }
+    if (!with_hibit) {
        assert(gf_hibit(red) == 0);
    }
-    UNROLL for (i=0; i<(with_hibit ? X_SER_BYTES : SER_BYTES); i++) {
+    UNROLL for (i = 0; i < (with_hibit ? X_SER_BYTES : SER_BYTES); i++) {
        if (fill < 8 && j < NLIMBS) {
-            buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
+            buffer |= ((dword_t) red->limb[LIMBPERM(j)]) << fill;
            fill += LIMB_PLACE_VALUE(LIMBPERM(j));
            j++;
        }
@ -39,49 +44,58 @@ void gf_serialize (uint8_t serial[SER_BYTES], const gf x, int with_hibit) {
 }
 /** Return high bit of x = low bit of 2x mod p */
-mask_t gf_hibit(const gf x) {
+mask_t gf_hibit(const gf x)
 {
    gf y;
-    gf_add(y,x,x);
+    gf_add(y, x, x);
    gf_strong_reduce(y);
-    return -(y->limb[0]&1);
+    return -(y->limb[0] & 1);
 }
 /** Return high bit of x = low bit of 2x mod p */
-mask_t gf_lobit(const gf x) {
+mask_t gf_lobit(const gf x)
 {
    gf y;
-    gf_copy(y,x);
+    gf_copy(y, x);
    gf_strong_reduce(y);
-    return -(y->limb[0]&1);
+    return -(y->limb[0] & 1);
 }
 /** Deserialize from wire format; return -1 on success and 0 on failure. */
-mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES], int with_hibit, uint8_t hi_nmask) {
+mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
-    unsigned int j=0, fill=0;
+                      uint8_t hi_nmask)
 {
    unsigned int j = 0, fill = 0;
    dword_t buffer = 0;
    dsword_t scarry = 0;
    const unsigned nbytes = with_hibit ? X_SER_BYTES : SER_BYTES;
    unsigned int i;
    mask_t succ;
-    UNROLL for (i=0; i<NLIMBS; i++) {
+    UNROLL for (i = 0; i < NLIMBS; i++) {
        UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < nbytes) {
            uint8_t sj = serial[j];
-            if (j==nbytes-1) sj &= ~hi_nmask;
+            if (j == nbytes - 1)
-            buffer |= ((dword_t)sj) << fill;
+                sj &= ~hi_nmask;
            buffer |= ((dword_t) sj) << fill;
            fill += 8;
            j++;
        }
-        x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
+        x->limb[LIMBPERM(i)] =
            (i < NLIMBS - 1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
        fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
        buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
-        scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t));
+        scarry =
            (scarry + x->limb[LIMBPERM(i)] -
             MODULUS->limb[LIMBPERM(i)]) >> (8 * sizeof(word_t));
    }
-    succ = with_hibit ? -(mask_t)1 : ~gf_hibit(x);
+    succ = with_hibit ? -(mask_t) 1 : ~gf_hibit(x);
    return succ & word_is_zero(buffer) & ~word_is_zero(scarry);
 }
 /** Reduce to canonical form. */
-void gf_strong_reduce (gf a) {
+void gf_strong_reduce(gf a)
 {
    dsword_t scarry;
    word_t scarry_0;
    dword_t carry = 0;
@ -94,23 +108,26 @@ void gf_strong_reduce (gf a) {
    /* compute total_value - p.  No need to reduce mod p. */
    scarry = 0;
-    for (i=0; i<NLIMBS; i++) {
+    for (i = 0; i < NLIMBS; i++) {
        scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)];
        a->limb[LIMBPERM(i)] = scarry & LIMB_MASK(LIMBPERM(i));
        scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
    }
-    /* uncommon case: it was >= p, so now scarry = 0 and this = x
+    /*
-     * common case: it was < p, so now scarry = -1 and this = x - p + 2^255
+     * uncommon case: it was >= p, so now scarry = 0 and this = x common case:
-     * so let's add back in p.  will carry back off the top for 2^255.
+     * it was < p, so now scarry = -1 and this = x - p + 2^255 so let's add
     * back in p.  will carry back off the top for 2^255.
     */
-    assert(word_is_zero(scarry) | word_is_zero(scarry+1));
+    assert(word_is_zero(scarry) | word_is_zero(scarry + 1));
    scarry_0 = scarry;
    /* add it back */
-    for (i=0; i<NLIMBS; i++) {
+    for (i = 0; i < NLIMBS; i++) {
-        carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
+        carry =
            carry + a->limb[LIMBPERM(i)] +
            (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
        a->limb[LIMBPERM(i)] = carry & LIMB_MASK(LIMBPERM(i));
        carry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
    }
@ -119,28 +136,31 @@ void gf_strong_reduce (gf a) {
 }
 /** Subtract two gf elements d=a-b */
-void gf_sub (gf d, const gf a, const gf b) {
+void gf_sub(gf d, const gf a, const gf b)
-    gf_sub_RAW ( d, a, b );
+{
-    gf_bias( d, 2 );
+    gf_sub_RAW(d, a, b);
-    gf_weak_reduce ( d );
+    gf_bias(d, 2);
    gf_weak_reduce(d);
 }
 /** Add two field elements d = a+b */
-void gf_add (gf d, const gf a, const gf b) {
+void gf_add(gf d, const gf a, const gf b)
-    gf_add_RAW ( d, a, b );
+{
-    gf_weak_reduce ( d );
+    gf_add_RAW(d, a, b);
    gf_weak_reduce(d);
 }
 /** Compare a==b */
-mask_t gf_eq(const gf a, const gf b) {
+mask_t gf_eq(const gf a, const gf b)
 {
    gf c;
-    mask_t ret=0;
+    mask_t ret = 0;
    unsigned int i;
-    gf_sub(c,a,b);
+    gf_sub(c, a, b);
    gf_strong_reduce(c);
-    for (i=0; i<NLIMBS; i++) {
+    for (i = 0; i < NLIMBS; i++) {
        ret |= c->limb[LIMBPERM(i)];
    }
--- a/crypto/ec/curve448/field.h
+++ b/crypto/ec/curve448/field.h
@ -11,85 +11,90 @@
 */
 #ifndef __GF_H__
-#define __GF_H__
+# define __GF_H__
-#include "constant_time.h"
+# include "constant_time.h"
-#include "f_field.h"
+# include "f_field.h"
-#include <string.h>
+# include <string.h>
 /** Square x, n times. */
-static ossl_inline void gf_sqrn (
+static ossl_inline void gf_sqrn(gf_s * __restrict__ y, const gf x, int n)
-    gf_s *__restrict__ y,
+{
    const gf x,
    int n
 ) {
    gf tmp;
-    assert(n>0);
+    assert(n > 0);
-    if (n&1) {
+    if (n & 1) {
-        gf_sqr(y,x);
+        gf_sqr(y, x);
        n--;
    } else {
-        gf_sqr(tmp,x);
+        gf_sqr(tmp, x);
-        gf_sqr(y,tmp);
+        gf_sqr(y, tmp);
-        n-=2;
+        n -= 2;
    }
-    for (; n; n-=2) {
+    for (; n; n -= 2) {
-        gf_sqr(tmp,y);
+        gf_sqr(tmp, y);
-        gf_sqr(y,tmp);
+        gf_sqr(y, tmp);
    }
 }
-#define gf_add_nr gf_add_RAW
+# define gf_add_nr gf_add_RAW
 /** Subtract mod p.  Bias by 2 and don't reduce  */
-static ossl_inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
+static ossl_inline void gf_sub_nr(gf c, const gf a, const gf b)
-    gf_sub_RAW(c,a,b);
+{
    gf_sub_RAW(c, a, b);
    gf_bias(c, 2);
-    if (GF_HEADROOM < 3) gf_weak_reduce(c);
+    if (GF_HEADROOM < 3)
        gf_weak_reduce(c);
 }
 /** Subtract mod p. Bias by amt but don't reduce.  */
-static ossl_inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
+static ossl_inline void gf_subx_nr(gf c, const gf a, const gf b, int amt)
-    gf_sub_RAW(c,a,b);
+{
    gf_sub_RAW(c, a, b);
    gf_bias(c, amt);
-    if (GF_HEADROOM < amt+1) gf_weak_reduce(c);
+    if (GF_HEADROOM < amt + 1)
        gf_weak_reduce(c);
 }
 /** Mul by signed int.  Not constant-time WRT the sign of that int. */
-static ossl_inline void gf_mulw(gf c, const gf a, int32_t w) {
+static ossl_inline void gf_mulw(gf c, const gf a, int32_t w)
-    if (w>0) {
+{
    if (w > 0) {
        gf_mulw_unsigned(c, a, w);
    } else {
        gf_mulw_unsigned(c, a, -w);
-        gf_sub(c,ZERO,c);
+        gf_sub(c, ZERO, c);
    }
 }
 /** Constant time, x = is_z ? z : y */
-static ossl_inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
+static ossl_inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z)
-    constant_time_select(x,y,z,sizeof(gf),is_z,0);
+{
    constant_time_select(x, y, z, sizeof(gf), is_z, 0);
 }
 /** Constant time, if (neg) x=-x; */
-static ossl_inline void gf_cond_neg(gf x, mask_t neg) {
+static ossl_inline void gf_cond_neg(gf x, mask_t neg)
 {
    gf y;
-    gf_sub(y,ZERO,x);
+    gf_sub(y, ZERO, x);
-    gf_cond_sel(x,x,y,neg);
+    gf_cond_sel(x, x, y, neg);
 }
 /** Constant time, if (swap) (x,y) = (y,x); */
-static ossl_inline void
+static ossl_inline void gf_cond_swap(gf x, gf_s * __restrict__ y, mask_t swap)
-gf_cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) {
+{
-    constant_time_cond_swap(x,y,sizeof(gf_s),swap);
+    constant_time_cond_swap(x, y, sizeof(gf_s), swap);
 }
-static ossl_inline void gf_mul_qnr(gf_s *__restrict__ out, const gf x) {
+static ossl_inline void gf_mul_qnr(gf_s * __restrict__ out, const gf x)
-    gf_sub(out,ZERO,x);
+{
    gf_sub(out, ZERO, x);
 }
-static ossl_inline void gf_div_qnr(gf_s *__restrict__ out, const gf x) {
+static ossl_inline void gf_div_qnr(gf_s * __restrict__ out, const gf x)
-    gf_sub(out,ZERO,x);
+{
    gf_sub(out, ZERO, x);
 }
 #endif                          /* __GF_H__ */
--- a/crypto/ec/curve448/point_448.h
+++ b/crypto/ec/curve448/point_448.h
@ -11,52 +11,52 @@
 */
 #ifndef __DECAF_POINT_448_H__
-#define __DECAF_POINT_448_H__ 1
+# define __DECAF_POINT_448_H__ 1
-#include "curve448utils.h"
+# include "curve448utils.h"
-#include "field.h"
+# include "field.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
 /** @cond internal */
-#define DECAF_448_SCALAR_LIMBS ((446-1)/DECAF_WORD_BITS+1)
+# define DECAF_448_SCALAR_LIMBS ((446-1)/DECAF_WORD_BITS+1)
 /** @endcond */
 /** The number of bits in a scalar */
-#define DECAF_448_SCALAR_BITS 446
+# define DECAF_448_SCALAR_BITS 446
 /** Number of bytes in a serialized point. */
-#define DECAF_448_SER_BYTES 56
+# define DECAF_448_SER_BYTES 56
 /** Number of bytes in an elligated point.  For now set the same as SER_BYTES
 * but could be different for other curves.
 */
-#define DECAF_448_HASH_BYTES 56
+# define DECAF_448_HASH_BYTES 56
 /** Number of bytes in a serialized scalar. */
-#define DECAF_448_SCALAR_BYTES 56
+# define DECAF_448_SCALAR_BYTES 56
 /** Number of bits in the "which" field of an elligator inverse */
-#define DECAF_448_INVERT_ELLIGATOR_WHICH_BITS 3
+# define DECAF_448_INVERT_ELLIGATOR_WHICH_BITS 3
 /** The cofactor the curve would have, if we hadn't removed it */
-#define DECAF_448_REMOVED_COFACTOR 4
+# define DECAF_448_REMOVED_COFACTOR 4
 /** X448 encoding ratio. */
-#define DECAF_X448_ENCODE_RATIO 2
+# define DECAF_X448_ENCODE_RATIO 2
 /** Number of bytes in an x448 public key */
-#define DECAF_X448_PUBLIC_BYTES 56
+# define DECAF_X448_PUBLIC_BYTES 56
 /** Number of bytes in an x448 private key */
-#define DECAF_X448_PRIVATE_BYTES 56
+# define DECAF_X448_PRIVATE_BYTES 56
 /** Twisted Edwards extended homogeneous coordinates */
 typedef struct curve448_point_s {
    /** @cond internal */
-    gf_448_t x,y,z,t;
+    gf_448_t x, y, z, t;
    /** @endcond */
 } curve448_point_t[1];
@ -98,10 +98,10 @@ extern const struct curve448_precomputed_s *curve448_precomputed_base;
 * @retval DECAF_FAILURE The scalar was greater than the modulus,
 * and has been reduced modulo that modulus.
 */
-__owur decaf_error_t curve448_scalar_decode (
+__owur decaf_error_t curve448_scalar_decode(curve448_scalar_t out,
-    curve448_scalar_t out,
+                                            const unsigned char
-    const unsigned char ser[DECAF_448_SCALAR_BYTES]
+                                            ser[DECAF_448_SCALAR_BYTES]
-);
+    );
 /**
 * @brief Read a scalar from wire format or from bytes.  Reduces mod
@ -111,11 +111,8 @@ __owur decaf_error_t curve448_scalar_decode (
 * @param [in] ser_len Length of serialized form.
 * @param [out] out Deserialized form.
 */
-void curve448_scalar_decode_long (
+void curve448_scalar_decode_long(curve448_scalar_t out,
-    curve448_scalar_t out,
+                                 const unsigned char *ser, size_t ser_len);
    const unsigned char *ser,
    size_t ser_len
 );
 /**
 * @brief Serialize a scalar to wire format.
@ -123,10 +120,8 @@ void curve448_scalar_decode_long (
 * @param [out] ser Serialized form of a scalar.
 * @param [in] s Deserialized scalar.
 */
-void curve448_scalar_encode (
+void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
-    unsigned char ser[DECAF_448_SCALAR_BYTES],
+                            const curve448_scalar_t s);
    const curve448_scalar_t s
 );
 /**
 * @brief Add two scalars.  The scalars may use the same memory.
@ -134,11 +129,8 @@ void curve448_scalar_encode (
 * @param [in] b Another scalar.
 * @param [out] out a+b.
 */
-void curve448_scalar_add (
+void curve448_scalar_add(curve448_scalar_t out,
-    curve448_scalar_t out,
+                         const curve448_scalar_t a, const curve448_scalar_t b);
    const curve448_scalar_t a,
    const curve448_scalar_t b
 );
 /**
 * @brief Subtract two scalars.  The scalars may use the same memory.
@ -146,11 +138,8 @@ void curve448_scalar_add (
 * @param [in] b Another scalar.
 * @param [out] out a-b.
 */
-void curve448_scalar_sub (
+void curve448_scalar_sub(curve448_scalar_t out,
-    curve448_scalar_t out,
+                         const curve448_scalar_t a, const curve448_scalar_t b);
    const curve448_scalar_t a,
    const curve448_scalar_t b
 );
 /**
 * @brief Multiply two scalars.  The scalars may use the same memory.
@ -158,21 +147,15 @@ void curve448_scalar_sub (
 * @param [in] b Another scalar.
 * @param [out] out a*b.
 */
-void curve448_scalar_mul (
+void curve448_scalar_mul(curve448_scalar_t out,
-    curve448_scalar_t out,
+                         const curve448_scalar_t a, const curve448_scalar_t b);
    const curve448_scalar_t a,
    const curve448_scalar_t b
 );
 /**
 * @brief Halve a scalar.  The scalars may use the same memory.
 * @param [in] a A scalar.
 * @param [out] out a/2.
 */
-void curve448_scalar_halve (
+void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a);
   curve448_scalar_t out,
   const curve448_scalar_t a
 );
 /**
 * @brief Copy a scalar.  The scalars may use the same memory, in which
@ -180,10 +163,9 @@ void curve448_scalar_halve (
 * @param [in] a A scalar.
 * @param [out] out Will become a copy of a.
 */
-static ossl_inline void curve448_scalar_copy (
+static ossl_inline void curve448_scalar_copy(curve448_scalar_t out,
-    curve448_scalar_t out,
+                                             const curve448_scalar_t a)
-    const curve448_scalar_t a
+{
 ) {
    *out = *a;
 }
@ -194,11 +176,10 @@ static ossl_inline void curve448_scalar_copy (
 * @param [out] a A copy of the point.
 * @param [in] b Any point.
 */
-static ossl_inline void curve448_point_copy (
+static ossl_inline void curve448_point_copy(curve448_point_t a,
-    curve448_point_t a,
+                                            const curve448_point_t b)
-    const curve448_point_t b
+{
-) {
+    *a = *b;
    *a=*b;
 }
 /**
@ -210,10 +191,8 @@ static ossl_inline void curve448_point_copy (
 * @retval DECAF_TRUE The points are equal.
 * @retval DECAF_FALSE The points are not equal.
 */
-__owur decaf_bool_t curve448_point_eq (
+__owur decaf_bool_t curve448_point_eq(const curve448_point_t a,
-    const curve448_point_t a,
+                                      const curve448_point_t b);
    const curve448_point_t b
 );
 /**
 * @brief Double a point.  Equivalent to
@ -222,10 +201,7 @@ __owur decaf_bool_t curve448_point_eq (
 * @param [out] two_a The sum a+a.
 * @param [in] a A point.
 */
-void curve448_point_double (
+void curve448_point_double(curve448_point_t two_a, const curve448_point_t a);
    curve448_point_t two_a,
    const curve448_point_t a
 );
 /**
 * @brief RFC 7748 Diffie-Hellman scalarmul.  This function uses a different
@ -239,11 +215,10 @@ void curve448_point_double (
 * @retval DECAF_FAILURE The scalarmul didn't succeed, because the base
 * point is in a small subgroup.
 */
-__owur decaf_error_t decaf_x448 (
+__owur decaf_error_t decaf_x448(uint8_t out[DECAF_X448_PUBLIC_BYTES],
    uint8_t out[DECAF_X448_PUBLIC_BYTES],
                                const uint8_t base[DECAF_X448_PUBLIC_BYTES],
                                const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
-);
+    );
 /**
 * @brief Multiply a point by DECAF_X448_ENCODE_RATIO,
@ -265,10 +240,10 @@ __owur decaf_error_t decaf_x448 (
 * @param [out] out The scaled and encoded point.
 * @param [in] p The point to be scaled and encoded.
 */
-void curve448_point_mul_by_ratio_and_encode_like_x448 (
+void curve448_point_mul_by_ratio_and_encode_like_x448(uint8_t
-    uint8_t out[DECAF_X448_PUBLIC_BYTES],
+                                                      out
-    const curve448_point_t p
+                                                      [DECAF_X448_PUBLIC_BYTES],
-);
+                                                      const curve448_point_t p);
 /** The base point for X448 Diffie-Hellman */
 extern const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES];
@ -283,11 +258,9 @@ extern const uint8_t decaf_x448_base_point[DECAF_X448_PUBLIC_BYTES];
 * @param [out] scaled The scaled point base*scalar
 * @param [in] scalar The scalar to multiply by.
 */
-void decaf_x448_derive_public_key (
+void decaf_x448_derive_public_key(uint8_t out[DECAF_X448_PUBLIC_BYTES],
    uint8_t out[DECAF_X448_PUBLIC_BYTES],
                                  const uint8_t scalar[DECAF_X448_PRIVATE_BYTES]
-);
+    );
 /**
 * @brief Multiply a precomputed base point by a scalar:
@ -300,12 +273,9 @@ void decaf_x448_derive_public_key (
 * @param [in] base The point to be scaled.
 * @param [in] scalar The scalar to multiply by.
 */
-void curve448_precomputed_scalarmul (
+void curve448_precomputed_scalarmul(curve448_point_t scaled,
-    curve448_point_t scaled,
+                                    const curve448_precomputed_s * base,
-    const curve448_precomputed_s *base,
+                                    const curve448_scalar_t scalar);
    const curve448_scalar_t scalar
 );
 /**
 * @brief Multiply two base points by two scalars:
@ -322,12 +292,10 @@ void curve448_precomputed_scalarmul (
 * @warning: This function takes variable time, and may leak the scalars
 * used.  It is designed for signature verification.
 */
-void curve448_base_double_scalarmul_non_secret (
+void curve448_base_double_scalarmul_non_secret(curve448_point_t combo,
    curve448_point_t combo,
                                               const curve448_scalar_t scalar1,
                                               const curve448_point_t base2,
-    const curve448_scalar_t scalar2
+                                               const curve448_scalar_t scalar2);
 );
 /**
 * @brief Test that a point is valid, for debugging purposes.
@ -336,23 +304,17 @@ void curve448_base_double_scalarmul_non_secret (
 * @retval DECAF_TRUE The point is valid.
 * @retval DECAF_FALSE The point is invalid.
 */
-__owur decaf_bool_t curve448_point_valid (
+__owur decaf_bool_t curve448_point_valid(const curve448_point_t to_test);
    const curve448_point_t to_test
 );
 /**
 * @brief Overwrite scalar with zeros.
 */
-void curve448_scalar_destroy (
+void curve448_scalar_destroy(curve448_scalar_t scalar);
    curve448_scalar_t scalar
 );
 /**
 * @brief Overwrite point with zeros.
 */
-void curve448_point_destroy (
+void curve448_point_destroy(curve448_point_t point);
    curve448_point_t point
 );
 #ifdef __cplusplus
 } /* extern "C" */
--- a/crypto/ec/curve448/scalar.c
+++ b/crypto/ec/curve448/scalar.c
@ -15,63 +15,72 @@
 #include "constant_time.h"
 #include "point_448.h"
-static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t)0x3bd440fae918bc5;
+static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t) 0x3bd440fae918bc5;
-static const curve448_scalar_t sc_p = {{{
+static const curve448_scalar_t sc_p = { {{
-    SC_LIMB(0x2378c292ab5844f3), SC_LIMB(0x216cc2728dc58f55), SC_LIMB(0xc44edb49aed63690), SC_LIMB(0xffffffff7cca23e9), SC_LIMB(0xffffffffffffffff), SC_LIMB(0xffffffffffffffff), SC_LIMB(0x3fffffffffffffff)
+                                          SC_LIMB(0x2378c292ab5844f3),
-}}}, sc_r2 = {{{
+                                          SC_LIMB(0x216cc2728dc58f55),
-    SC_LIMB(0xe3539257049b9b60), SC_LIMB(0x7af32c4bc1b195d9), SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838), SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af), SC_LIMB(0x3402a939f823b729)
+                                          SC_LIMB(0xc44edb49aed63690),
                                          SC_LIMB(0xffffffff7cca23e9),
                                          SC_LIMB(0xffffffffffffffff),
                                          SC_LIMB(0xffffffffffffffff),
                                          SC_LIMB(0x3fffffffffffffff)
                                          }}
 }, sc_r2 = { { {
            SC_LIMB(0xe3539257049b9b60), SC_LIMB(0x7af32c4bc1b195d9),
                SC_LIMB(0x0d66de2388ea1859), SC_LIMB(0xae17cf725ee4d838),
                SC_LIMB(0x1a9cc14ba3c47c44), SC_LIMB(0x2052bcb7e4d070af),
                SC_LIMB(0x3402a939f823b729)
 }}};
 /* End of template stuff */
 #define WBITS DECAF_WORD_BITS   /* NB this may be different from ARCH_WORD_BITS */
-const curve448_scalar_t curve448_scalar_one = {{{1}}}, curve448_scalar_zero = {{{0}}};
+const curve448_scalar_t curve448_scalar_one = { {{1}} }, curve448_scalar_zero = { { {
 0}}};
 /** {extra,accum} - sub +? p
 * Must have extra <= 1
 */
-static void sc_subx(
+static void sc_subx(curve448_scalar_t out,
    curve448_scalar_t out,
                    const decaf_word_t accum[DECAF_448_SCALAR_LIMBS],
                    const curve448_scalar_t sub,
-    const curve448_scalar_t p,
+                    const curve448_scalar_t p, decaf_word_t extra)
-    decaf_word_t extra
+{
 ) {
    decaf_dsword_t chain = 0;
    unsigned int i;
    decaf_word_t borrow;
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        chain = (chain + accum[i]) - sub->limb[i];
        out->limb[i] = chain;
        chain >>= WBITS;
    }
-    borrow = chain+extra; /* = 0 or -1 */
+    borrow = chain + extra;     /* = 0 or -1 */
    chain = 0;
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
        out->limb[i] = chain;
        chain >>= WBITS;
    }
 }
-static void sc_montmul (
+static void sc_montmul(curve448_scalar_t out,
-    curve448_scalar_t out,
+                       const curve448_scalar_t a, const curve448_scalar_t b)
-    const curve448_scalar_t a,
+{
-    const curve448_scalar_t b
+    unsigned int i, j;
-) {
+    decaf_word_t accum[DECAF_448_SCALAR_LIMBS + 1] = { 0 };
    unsigned int i,j;
    decaf_word_t accum[DECAF_448_SCALAR_LIMBS+1] = {0};
    decaf_word_t hi_carry = 0;
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        decaf_word_t mand = a->limb[i];
        const decaf_word_t *mier = b->limb;
        decaf_dword_t chain = 0;
-        for (j=0; j<DECAF_448_SCALAR_LIMBS; j++) {
+        for (j = 0; j < DECAF_448_SCALAR_LIMBS; j++) {
-            chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
+            chain += ((decaf_dword_t) mand) * mier[j] + accum[j];
            accum[j] = chain;
            chain >>= WBITS;
        }
@ -80,45 +89,40 @@ static void sc_montmul (
        mand = accum[0] * MONTGOMERY_FACTOR;
        chain = 0;
        mier = sc_p->limb;
-        for (j=0; j<DECAF_448_SCALAR_LIMBS; j++) {
+        for (j = 0; j < DECAF_448_SCALAR_LIMBS; j++) {
-            chain += (decaf_dword_t)mand*mier[j] + accum[j];
+            chain += (decaf_dword_t) mand *mier[j] + accum[j];
-            if (j) accum[j-1] = chain;
+            if (j)
                accum[j - 1] = chain;
            chain >>= WBITS;
        }
        chain += accum[j];
        chain += hi_carry;
-        accum[j-1] = chain;
+        accum[j - 1] = chain;
        hi_carry = chain >> WBITS;
    }
    sc_subx(out, accum, sc_p, sc_p, hi_carry);
 }
-void curve448_scalar_mul (
+void curve448_scalar_mul(curve448_scalar_t out,
-    curve448_scalar_t out,
+                         const curve448_scalar_t a, const curve448_scalar_t b)
-    const curve448_scalar_t a,
+{
-    const curve448_scalar_t b
+    sc_montmul(out, a, b);
-) {
+    sc_montmul(out, out, sc_r2);
    sc_montmul(out,a,b);
    sc_montmul(out,out,sc_r2);
 }
-void curve448_scalar_sub (
+void curve448_scalar_sub(curve448_scalar_t out,
-    curve448_scalar_t out,
+                         const curve448_scalar_t a, const curve448_scalar_t b)
-    const curve448_scalar_t a,
+{
    const curve448_scalar_t b
 ) {
    sc_subx(out, a->limb, b, sc_p, 0);
 }
-void curve448_scalar_add (
+void curve448_scalar_add(curve448_scalar_t out,
-    curve448_scalar_t out,
+                         const curve448_scalar_t a, const curve448_scalar_t b)
-    const curve448_scalar_t a,
+{
    const curve448_scalar_t b
 ) {
    decaf_dword_t chain = 0;
    unsigned int i;
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        chain = (chain + a->limb[i]) + b->limb[i];
        out->limb[i] = chain;
        chain >>= WBITS;
@ -126,50 +130,47 @@ void curve448_scalar_add (
    sc_subx(out, out->limb, sc_p, sc_p, chain);
 }
-static ossl_inline void scalar_decode_short (
+static ossl_inline void scalar_decode_short(curve448_scalar_t s,
    curve448_scalar_t s,
                                            const unsigned char *ser,
-    unsigned int nbytes
+                                            unsigned int nbytes)
-) {
+{
-    unsigned int i,j,k=0;
+    unsigned int i, j, k = 0;
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        decaf_word_t out = 0;
-        for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
+        for (j = 0; j < sizeof(decaf_word_t) && k < nbytes; j++, k++) {
-            out |= ((decaf_word_t)ser[k])<<(8*j);
+            out |= ((decaf_word_t) ser[k]) << (8 * j);
        }
        s->limb[i] = out;
    }
 }
-decaf_error_t curve448_scalar_decode(
+decaf_error_t curve448_scalar_decode(curve448_scalar_t s,
-    curve448_scalar_t s,
+                                     const unsigned char
-    const unsigned char ser[DECAF_448_SCALAR_BYTES]
+                                     ser[DECAF_448_SCALAR_BYTES]
-) {
+    )
 {
    unsigned int i;
    decaf_dsword_t accum = 0;
    scalar_decode_short(s, ser, DECAF_448_SCALAR_BYTES);
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
    }
    /* Here accum == 0 or -1 */
-    curve448_scalar_mul(s,s,curve448_scalar_one); /* ham-handed reduce */
+    curve448_scalar_mul(s, s, curve448_scalar_one); /* ham-handed reduce */
    return decaf_succeed_if(~word_is_zero(accum));
 }
-void curve448_scalar_destroy (
+void curve448_scalar_destroy(curve448_scalar_t scalar)
-    curve448_scalar_t scalar
+{
 ) {
    OPENSSL_cleanse(scalar, sizeof(curve448_scalar_t));
 }
-void curve448_scalar_decode_long(
+void curve448_scalar_decode_long(curve448_scalar_t s,
-    curve448_scalar_t s,
+                                 const unsigned char *ser, size_t ser_len)
-    const unsigned char *ser,
+{
    size_t ser_len
 ) {
    size_t i;
    curve448_scalar_t t1, t2;
@ -178,23 +179,24 @@ void curve448_scalar_decode_long(
        return;
    }
-    i = ser_len - (ser_len%DECAF_448_SCALAR_BYTES);
+    i = ser_len - (ser_len % DECAF_448_SCALAR_BYTES);
-    if (i==ser_len) i -= DECAF_448_SCALAR_BYTES;
+    if (i == ser_len)
        i -= DECAF_448_SCALAR_BYTES;
-    scalar_decode_short(t1, &ser[i], ser_len-i);
+    scalar_decode_short(t1, &ser[i], ser_len - i);
    if (ser_len == sizeof(curve448_scalar_t)) {
-        assert(i==0);
+        assert(i == 0);
        /* ham-handed reduce */
-        curve448_scalar_mul(s,t1,curve448_scalar_one);
+        curve448_scalar_mul(s, t1, curve448_scalar_one);
        curve448_scalar_destroy(t1);
        return;
    }
    while (i) {
        i -= DECAF_448_SCALAR_BYTES;
-        sc_montmul(t1,t1,sc_r2);
+        sc_montmul(t1, t1, sc_r2);
-        ignore_result( curve448_scalar_decode(t2, ser+i) );
+        ignore_result(curve448_scalar_decode(t2, ser + i));
        curve448_scalar_add(t1, t1, t2);
    }
@ -203,33 +205,29 @@ void curve448_scalar_decode_long(
    curve448_scalar_destroy(t2);
 }
-void curve448_scalar_encode(
+void curve448_scalar_encode(unsigned char ser[DECAF_448_SCALAR_BYTES],
-    unsigned char ser[DECAF_448_SCALAR_BYTES],
+                            const curve448_scalar_t s)
-    const curve448_scalar_t s
+{
-) {
+    unsigned int i, j, k = 0;
-    unsigned int i,j,k=0;
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+        for (j = 0; j < sizeof(decaf_word_t); j++, k++) {
-        for (j=0; j<sizeof(decaf_word_t); j++,k++) {
+            ser[k] = s->limb[i] >> (8 * j);
            ser[k] = s->limb[i] >> (8*j);
        }
    }
 }
-void curve448_scalar_halve (
+void curve448_scalar_halve(curve448_scalar_t out, const curve448_scalar_t a)
-    curve448_scalar_t out,
+{
    const curve448_scalar_t a
 ) {
    decaf_word_t mask = -(a->limb[0] & 1);
    decaf_dword_t chain = 0;
    unsigned int i;
-    for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS; i++) {
        chain = (chain + a->limb[i]) + (sc_p->limb[i] & mask);
        out->limb[i] = chain;
        chain >>= DECAF_WORD_BITS;
    }
-    for (i=0; i<DECAF_448_SCALAR_LIMBS-1; i++) {
+    for (i = 0; i < DECAF_448_SCALAR_LIMBS - 1; i++) {
-        out->limb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1);
+        out->limb[i] = out->limb[i] >> 1 | out->limb[i + 1] << (WBITS - 1);
    }
-    out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1);
+    out->limb[i] = out->limb[i] >> 1 | chain << (WBITS - 1);
 }
--- a/crypto/ec/curve448/word.h
+++ b/crypto/ec/curve448/word.h
@ -11,207 +11,211 @@
 */
 #ifndef __WORD_H__
-#define __WORD_H__
+# define __WORD_H__
-#include <string.h>
+# include <string.h>
-#include <assert.h>
+# include <assert.h>
-#include <openssl/e_os2.h>
+# include <openssl/e_os2.h>
-#include "arch_intrinsics.h"
+# include "arch_intrinsics.h"
-#include "curve448utils.h"
+# include "curve448utils.h"
-#ifndef _BSD_SOURCE
+# ifndef _BSD_SOURCE
-#define _BSD_SOURCE 1
+#  define _BSD_SOURCE 1
-#endif
+# endif
-#ifndef _DEFAULT_SOURCE
+# ifndef _DEFAULT_SOURCE
-#define _DEFAULT_SOURCE 1
+#  define _DEFAULT_SOURCE 1
-#endif
+# endif
-#include <stdlib.h>
+# include <stdlib.h>
-#if defined(__ARM_NEON__)
+# if defined(__ARM_NEON__)
-#include <arm_neon.h>
+#  include <arm_neon.h>
-#elif defined(__SSE2__)
+# elif defined(__SSE2__)
-    #if !defined(__GNUC__) || defined(__clang__) || __GNUC__ >= 5 || (__GNUC__==4 && __GNUC_MINOR__ >= 4)
+#  if !defined(__GNUC__) || defined(__clang__) || __GNUC__ >= 5 || (__GNUC__==4 && __GNUC_MINOR__ >= 4)
-        #include <immintrin.h>
+#   include <immintrin.h>
-    #else
+#  else
-        #include <emmintrin.h>
+#   include <emmintrin.h>
-    #endif
+#  endif
-#endif
+# endif
-#if (ARCH_WORD_BITS == 64)
+# if (ARCH_WORD_BITS == 64)
-    typedef uint64_t word_t, mask_t;
+typedef uint64_t word_t, mask_t;
-    typedef __uint128_t dword_t;
+typedef __uint128_t dword_t;
-    typedef int32_t hsword_t;
+typedef int32_t hsword_t;
-    typedef int64_t sword_t;
+typedef int64_t sword_t;
-    typedef __int128_t dsword_t;
+typedef __int128_t dsword_t;
-#elif (ARCH_WORD_BITS == 32)
+# elif (ARCH_WORD_BITS == 32)
-    typedef uint32_t word_t, mask_t;
+typedef uint32_t word_t, mask_t;
-    typedef uint64_t dword_t;
+typedef uint64_t dword_t;
-    typedef int16_t hsword_t;
+typedef int16_t hsword_t;
-    typedef int32_t sword_t;
+typedef int32_t sword_t;
-    typedef int64_t dsword_t;
+typedef int64_t dsword_t;
-#else
+# else
-    #error "For now, libdecaf only supports 32- and 64-bit architectures."
+#  error "For now, libdecaf only supports 32- and 64-bit architectures."
-#endif
+# endif
-/* Scalar limbs are keyed off of the API word size instead of the arch word size. */
+/*
-#if DECAF_WORD_BITS == 64
+ * Scalar limbs are keyed off of the API word size instead of the arch word
-    #define SC_LIMB(x) (x)
+ * size.
-#elif DECAF_WORD_BITS == 32
+ */
-    #define SC_LIMB(x) ((uint32_t)x),(x>>32)
+# if DECAF_WORD_BITS == 64
-#else
+#  define SC_LIMB(x) (x)
-    #error "For now, libdecaf only supports 32- and 64-bit architectures."
+# elif DECAF_WORD_BITS == 32
-#endif
+#  define SC_LIMB(x) ((uint32_t)x),(x>>32)
 # else
 #  error "For now, libdecaf only supports 32- and 64-bit architectures."
 # endif
-#ifdef __ARM_NEON__
+# ifdef __ARM_NEON__
-    typedef uint32x4_t vecmask_t;
+typedef uint32x4_t vecmask_t;
-#elif defined(__clang__)
+# elif defined(__clang__)
-    typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
+typedef uint64_t uint64x2_t __attribute__ ((ext_vector_type(2)));
-    typedef int64_t  int64x2_t __attribute__((ext_vector_type(2)));
+typedef int64_t int64x2_t __attribute__ ((ext_vector_type(2)));
-    typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
+typedef uint64_t uint64x4_t __attribute__ ((ext_vector_type(4)));
-    typedef int64_t  int64x4_t __attribute__((ext_vector_type(4)));
+typedef int64_t int64x4_t __attribute__ ((ext_vector_type(4)));
-    typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
+typedef uint32_t uint32x4_t __attribute__ ((ext_vector_type(4)));
-    typedef int32_t  int32x4_t __attribute__((ext_vector_type(4)));
+typedef int32_t int32x4_t __attribute__ ((ext_vector_type(4)));
-    typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
+typedef uint32_t uint32x2_t __attribute__ ((ext_vector_type(2)));
-    typedef int32_t  int32x2_t __attribute__((ext_vector_type(2)));
+typedef int32_t int32x2_t __attribute__ ((ext_vector_type(2)));
-    typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
+typedef uint32_t uint32x8_t __attribute__ ((ext_vector_type(8)));
-    typedef int32_t  int32x8_t __attribute__((ext_vector_type(8)));
+typedef int32_t int32x8_t __attribute__ ((ext_vector_type(8)));
-    typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
+typedef word_t vecmask_t __attribute__ ((ext_vector_type(4)));
-#else /* GCC, hopefully? */
+# else                          /* GCC, hopefully? */
-    typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
+typedef uint64_t uint64x2_t __attribute__ ((vector_size(16)));
-    typedef int64_t  int64x2_t __attribute__((vector_size(16)));
+typedef int64_t int64x2_t __attribute__ ((vector_size(16)));
-    typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
+typedef uint64_t uint64x4_t __attribute__ ((vector_size(32)));
-    typedef int64_t  int64x4_t __attribute__((vector_size(32)));
+typedef int64_t int64x4_t __attribute__ ((vector_size(32)));
-    typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
+typedef uint32_t uint32x4_t __attribute__ ((vector_size(16)));
-    typedef int32_t  int32x4_t __attribute__((vector_size(16)));
+typedef int32_t int32x4_t __attribute__ ((vector_size(16)));
-    typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
+typedef uint32_t uint32x2_t __attribute__ ((vector_size(8)));
-    typedef int32_t  int32x2_t __attribute__((vector_size(8)));
+typedef int32_t int32x2_t __attribute__ ((vector_size(8)));
-    typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
+typedef uint32_t uint32x8_t __attribute__ ((vector_size(32)));
-    typedef int32_t  int32x8_t __attribute__((vector_size(32)));
+typedef int32_t int32x8_t __attribute__ ((vector_size(32)));
-    typedef word_t vecmask_t __attribute__((vector_size(32)));
+typedef word_t vecmask_t __attribute__ ((vector_size(32)));
-#endif
+# endif
-#if defined(__AVX2__)
+# if defined(__AVX2__)
-    #define VECTOR_ALIGNED __attribute__((aligned(32)))
+#  define VECTOR_ALIGNED __attribute__((aligned(32)))
-    typedef uint32x8_t big_register_t;
+typedef uint32x8_t big_register_t;
-    typedef uint64x4_t uint64xn_t;
+typedef uint64x4_t uint64xn_t;
-    typedef uint32x8_t uint32xn_t;
+typedef uint32x8_t uint32xn_t;
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_set_to_mask(mask_t x)
-    br_set_to_mask(mask_t x) {
+{
    uint32_t y = (uint32_t)x;
-        big_register_t ret = {y,y,y,y,y,y,y,y};
+    big_register_t ret = { y, y, y, y, y, y, y, y };
    return ret;
-    }
+}
-#elif defined(__SSE2__)
+# elif defined(__SSE2__)
-    #define VECTOR_ALIGNED __attribute__((aligned(16)))
+#  define VECTOR_ALIGNED __attribute__((aligned(16)))
-    typedef uint32x4_t big_register_t;
+typedef uint32x4_t big_register_t;
-    typedef uint64x2_t uint64xn_t;
+typedef uint64x2_t uint64xn_t;
-    typedef uint32x4_t uint32xn_t;
+typedef uint32x4_t uint32xn_t;
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_set_to_mask(mask_t x)
-    br_set_to_mask(mask_t x) {
+{
    uint32_t y = x;
-        big_register_t ret = {y,y,y,y};
+    big_register_t ret = { y, y, y, y };
    return ret;
-    }
+}
-#elif defined(__ARM_NEON__)
+# elif defined(__ARM_NEON__)
-    #define VECTOR_ALIGNED __attribute__((aligned(16)))
+#  define VECTOR_ALIGNED __attribute__((aligned(16)))
-    typedef uint32x4_t big_register_t;
+typedef uint32x4_t big_register_t;
-    typedef uint64x2_t uint64xn_t;
+typedef uint64x2_t uint64xn_t;
-    typedef uint32x4_t uint32xn_t;
+typedef uint32x4_t uint32xn_t;
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_set_to_mask(mask_t x)
-    br_set_to_mask(mask_t x) {
+{
    return vdupq_n_u32(x);
-    }
+}
-#elif defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \
+# elif defined(_WIN64) || defined(__amd64__) || defined(__X86_64__) \
      || defined(__aarch64__)
-    #define VECTOR_ALIGNED __attribute__((aligned(8)))
+#  define VECTOR_ALIGNED __attribute__((aligned(8)))
-    typedef uint64_t big_register_t, uint64xn_t;
+typedef uint64_t big_register_t, uint64xn_t;
-    typedef uint32_t uint32xn_t;
+typedef uint32_t uint32xn_t;
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_set_to_mask(mask_t x)
-    br_set_to_mask(mask_t x) {
+{
-        return (big_register_t)x;
+    return (big_register_t) x;
-    }
+}
-#else
+# else
-    #define VECTOR_ALIGNED __attribute__((aligned(4)))
+#  define VECTOR_ALIGNED __attribute__((aligned(4)))
-    typedef uint64_t uint64xn_t;
+typedef uint64_t uint64xn_t;
-    typedef uint32_t uint32xn_t;
+typedef uint32_t uint32xn_t;
-    typedef uint32_t big_register_t;
+typedef uint32_t big_register_t;
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_set_to_mask(mask_t x)
-    br_set_to_mask(mask_t x) {
+{
-        return (big_register_t)x;
+    return (big_register_t) x;
-    }
+}
-#endif
+# endif
-#if defined(__AVX2__)
+# if defined(__AVX2__)
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_is_zero(big_register_t x)
-    br_is_zero(big_register_t x) {
+{
-        return (big_register_t)(x == br_set_to_mask(0));
+    return (big_register_t) (x == br_set_to_mask(0));
-    }
+}
-#elif defined(__SSE2__)
+# elif defined(__SSE2__)
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_is_zero(big_register_t x)
-    br_is_zero(big_register_t x) {
+{
-        return (big_register_t)_mm_cmpeq_epi32((__m128i)x, _mm_setzero_si128());
+    return (big_register_t) _mm_cmpeq_epi32((__m128i) x, _mm_setzero_si128());
-        //return (big_register_t)(x == br_set_to_mask(0));
+    // return (big_register_t)(x == br_set_to_mask(0));
-    }
+}
-#elif defined(__ARM_NEON__)
+# elif defined(__ARM_NEON__)
-    static ossl_inline big_register_t
+static ossl_inline big_register_t br_is_zero(big_register_t x)
-    br_is_zero(big_register_t x) {
+{
-        return vceqq_u32(x,x^x);
+    return vceqq_u32(x, x ^ x);
-    }
+}
-#else
+# else
-    #define br_is_zero word_is_zero
+#  define br_is_zero word_is_zero
-#endif
+# endif
 /* PERF: vectorize vs unroll */
-#ifdef __clang__
+# ifdef __clang__
-#if 100*__clang_major__ + __clang_minor__ > 305
+#  if 100*__clang_major__ + __clang_minor__ > 305
-#define UNROLL _Pragma("clang loop unroll(full)")
+#   define UNROLL _Pragma("clang loop unroll(full)")
-#endif
+#  endif
-#endif
+# endif
-#ifndef UNROLL
+# ifndef UNROLL
-#define UNROLL
+#  define UNROLL
-#endif
+# endif
-/* The plan on booleans:
+/*
- *
+ * The plan on booleans: The external interface uses decaf_bool_t, but this
- * The external interface uses decaf_bool_t, but this might be a different
+ * might be a different size than our particular arch's word_t (and thus
- * size than our particular arch's word_t (and thus mask_t).  Also, the caller
+ * mask_t).  Also, the caller isn't guaranteed to pass it as nonzero.  So
- * isn't guaranteed to pass it as nonzero.  So bool_to_mask converts word sizes
+ * bool_to_mask converts word sizes and checks nonzero. On the flip side,
- * and checks nonzero.
+ * mask_t is always -1 or 0, but it might be a different size than
- *
+ * decaf_bool_t. On the third hand, we have success vs boolean types, but
- * On the flip side, mask_t is always -1 or 0, but it might be a different size
+ * that's handled in common.h: it converts between decaf_bool_t and
- * than decaf_bool_t.
+ * decaf_error_t.
 *
 * On the third hand, we have success vs boolean types, but that's handled in
 * common.h: it converts between decaf_bool_t and decaf_error_t.
 */
-static ossl_inline decaf_bool_t mask_to_bool (mask_t m) {
+static ossl_inline decaf_bool_t mask_to_bool(mask_t m)
-    return (decaf_sword_t)(sword_t)m;
+{
    return (decaf_sword_t) (sword_t) m;
 }
-static ossl_inline mask_t bool_to_mask (decaf_bool_t m) {
+static ossl_inline mask_t bool_to_mask(decaf_bool_t m)
 {
    /* On most arches this will be optimized to a simple cast. */
    mask_t ret = 0;
    unsigned int i;
-    unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
+    unsigned int limit = sizeof(decaf_bool_t) / sizeof(mask_t);
-    if (limit < 1) limit = 1;
+    if (limit < 1)
-    for (i=0; i<limit; i++) {
+        limit = 1;
-        ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
+    for (i = 0; i < limit; i++) {
        ret |= ~word_is_zero(m >> (i * 8 * sizeof(word_t)));
    }
    return ret;
 }
-static ossl_inline void ignore_result ( decaf_bool_t boo ) {
+static ossl_inline void ignore_result(decaf_bool_t boo)
 {
    (void)boo;
 }