Commit 38455f34 authored by Michael Hamburg's avatar Michael Hamburg

one ser/deser to rule them all (TODO test on NEON and other places with LIMBPERM)

parent a5bed6b3
......@@ -86,12 +86,45 @@ const size_t API_NS2(alignof,precomputed_s) = 32;
#define UNROLL
#endif
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<sizeof(gf)/sizeof(word_t); i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<sizeof(gf)/sizeof(word_t); i++) { op; }}
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }}
void gf_serialize (uint8_t serial[SER_BYTES], const gf x) {
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
unsigned int j=0, fill=0;
dword_t buffer = 0;
UNROLL for (unsigned int i=0; i<SER_BYTES; i++) {
if (fill < 8 && j < NLIMBS) {
buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
fill += LIMB_PLACE_VALUE(LIMBPERM(j));
j++;
}
serial[i] = buffer;
fill -= 8;
buffer >>= 8;
}
}
/** Copy x = y */
static INLINE void
gf_cpy(gf x, const gf y) { x[0] = y[0]; }
mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) {
unsigned int j=0, fill=0;
dword_t buffer = 0;
dsword_t scarry = 0;
UNROLL for (unsigned int i=0; i<NLIMBS; i++) {
UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < SER_BYTES) {
buffer |= ((dword_t)serial[j]) << fill;
fill += 8;
j++;
}
x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t));
}
return word_is_zero(buffer) & ~word_is_zero(scarry);
}
/** Constant time, x = is_z ? z : y */
static INLINE void
......@@ -120,9 +153,7 @@ cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
/** Compare a==b */
/* Not static because it's used in inverse square root. */
decaf_word_t gf_eq(const gf a, const gf b);
decaf_word_t
gf_eq(const gf a, const gf b) {
decaf_word_t gf_eq(const gf a, const gf b) {
gf c;
gf_sub(c,a,b);
gf_strong_reduce(c);
......@@ -153,13 +184,10 @@ gf_invert(gf y, const gf x) {
(void)ret; assert(ret);
gf_sqr(t1, t2);
gf_mul(t2, t1, x); // not direct to y in case of alias.
gf_cpy(y, t2);
gf_copy(y, t2);
}
/**
* Mul by signed int. Not constant-time WRT the sign of that int.
* Just uses a full mul (PERF)
*/
/** Mul by signed int. Not constant-time WRT the sign of that int. */
static INLINE void
gf_mulw_sgn(gf c, const gf a, int w) {
if (w>0) {
......@@ -182,7 +210,7 @@ static decaf_word_t hibit(const gf x) {
/** Return high bit of x = low bit of 2x mod p */
static decaf_word_t lobit(const gf x) {
gf y;
gf_cpy(y,x);
gf_copy(y,x);
gf_strong_reduce(y);
return -(y->limb[0]&1);
}
......@@ -394,16 +422,9 @@ API_NS(scalar_eq) (
return word_is_zero(diff);
}
/* *** API begins here *** */
/** identity = (0,1) */
const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};
static void
gf_encode ( unsigned char ser[SER_BYTES], gf a ) {
gf_serialize(ser, (gf_s *)a);
}
static void
deisogenize (
gf_s *__restrict__ s,
......@@ -508,14 +529,7 @@ deisogenize (
void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
gf s, mtos;
deisogenize(s,mtos,p,0,0,0);
gf_encode ( ser, s );
}
/**
* Deserialize a field element, return TRUE if < p.
*/
static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) {
return gf_deserialize((gf_s *)s, ser);
gf_serialize ( ser, s );
}
decaf_error_t API_NS(point_decode) (
......@@ -524,7 +538,7 @@ decaf_error_t API_NS(point_decode) (
decaf_bool_t allow_identity
) {
gf s, a, b, c, d, e, f;
decaf_bool_t succ = gf_deser(s, ser), zero = gf_eq(s, ZERO);
decaf_bool_t succ = gf_deserialize(s, ser), zero = gf_eq(s, ZERO);
allow_identity = ~word_is_zero(allow_identity);
succ &= allow_identity | ~zero;
succ &= ~hibit(s);
......@@ -592,8 +606,6 @@ decaf_error_t API_NS(point_decode) (
#define NEG_D 0
#endif
void API_NS(point_sub) (
point_t p,
const point_t q,
......@@ -688,8 +700,8 @@ void API_NS(point_negate) (
const point_t a
) {
gf_sub(nega->x, ZERO, a->x);
gf_cpy(nega->y, a->y);
gf_cpy(nega->z, a->z);
gf_copy(nega->y, a->y);
gf_copy(nega->z, a->z);
gf_sub(nega->t, ZERO, a->t);
}
......@@ -827,7 +839,7 @@ niels_to_pt (
gf_add ( e->y, n->b, n->a );
gf_sub ( e->x, n->b, n->a );
gf_mul ( e->t, e->y, e->x );
gf_cpy ( e->z, ONE );
gf_copy ( e->z, ONE );
}
static NOINLINE void
......@@ -882,7 +894,7 @@ add_pniels_to_pt (
) {
gf L0;
gf_mul ( L0, p->z, pn->z );
gf_cpy ( p->z, L0 );
gf_copy ( p->z, L0 );
add_niels_to_pt( p, pn->n, before_double );
}
......@@ -894,7 +906,7 @@ sub_pniels_from_pt (
) {
gf L0;
gf_mul ( L0, p->z, pn->z );
gf_cpy ( p->z, L0 );
gf_copy ( p->z, L0 );
sub_niels_from_pt( p, pn->n, before_double );
}
......@@ -1203,7 +1215,7 @@ void API_NS(point_from_hash_nonuniform) (
// TODO: simplify since we don't return a hint anymore
// TODO: test pathological case ur0^2 = 1/(1-d)
gf r0,r,a,b,c,dee,D,N,rN,e;
gf_deser(r0,ser);
gf_deserialize(r0,ser);
gf_strong_reduce(r0);
gf_sqr(a,r0);
#if P_MOD_8 == 5
......@@ -1265,7 +1277,7 @@ void API_NS(point_from_hash_nonuniform) (
/* isogenize */
#if IMAGINE_TWIST
gf_mul(c,a,SQRT_MINUS_ONE);
gf_cpy(a,c);
gf_copy(a,c);
#endif
gf_sqr(c,a); /* s^2 */
......@@ -1326,7 +1338,7 @@ API_NS(invert_elligator_nonuniform) (
succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */
#endif
gf_encode(recovered_hash, b);
gf_serialize(recovered_hash, b);
/* TODO: deal with overflow flag */
return decaf_succeed_if(succ);
}
......@@ -1380,14 +1392,14 @@ void API_NS(point_debugging_torque) (
gf tmp;
gf_mul(tmp,p->x,SQRT_MINUS_ONE);
gf_mul(q->x,p->y,SQRT_MINUS_ONE);
gf_cpy(q->y,tmp);
gf_cpy(q->z,p->z);
gf_copy(q->y,tmp);
gf_copy(q->z,p->z);
gf_sub(q->t,ZERO,p->t);
#else
gf_sub(q->x,ZERO,p->x);
gf_sub(q->y,ZERO,p->y);
gf_cpy(q->z,p->z);
gf_cpy(q->t,p->t);
gf_copy(q->z,p->z);
gf_copy(q->t,p->t);
#endif
}
......@@ -1397,16 +1409,16 @@ void API_NS(point_debugging_pscale) (
const uint8_t factor[SER_BYTES]
) {
gf gfac,tmp;
ignore_result(gf_deser(gfac,factor));
ignore_result(gf_deserialize(gfac,factor));
cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
gf_mul(tmp,p->x,gfac);
gf_cpy(q->x,tmp);
gf_copy(q->x,tmp);
gf_mul(tmp,p->y,gfac);
gf_cpy(q->y,tmp);
gf_copy(q->y,tmp);
gf_mul(tmp,p->z,gfac);
gf_cpy(q->z,tmp);
gf_copy(q->z,tmp);
gf_mul(tmp,p->t,gfac);
gf_cpy(q->t,tmp);
gf_copy(q->t,tmp);
}
static void gf_batch_invert (
......@@ -1417,7 +1429,7 @@ static void gf_batch_invert (
gf t1;
assert(n>1);
gf_cpy(out[1], in[0]);
gf_copy(out[1], in[0]);
int i;
for (i=1; i<(int) (n-1); i++) {
gf_mul(out[i+1], out[i], in[i]);
......@@ -1428,9 +1440,9 @@ static void gf_batch_invert (
for (i=n-1; i>0; i--) {
gf_mul(t1, out[i], out[0]);
gf_cpy(out[i], t1);
gf_copy(out[i], t1);
gf_mul(t1, out[0], in[i]);
gf_cpy(out[0], t1);
gf_copy(out[0], t1);
}
}
......@@ -1447,15 +1459,15 @@ static void batch_normalize_niels (
for (i=0; i<n; i++) {
gf_mul(product, table[i]->a, zis[i]);
gf_strong_reduce(product);
gf_cpy(table[i]->a, product);
gf_copy(table[i]->a, product);
gf_mul(product, table[i]->b, zis[i]);
gf_strong_reduce(product);
gf_cpy(table[i]->b, product);
gf_copy(table[i]->b, product);
gf_mul(product, table[i]->c, zis[i]);
gf_strong_reduce(product);
gf_cpy(table[i]->c, product);
gf_copy(table[i]->c, product);
}
decaf_bzero(product,sizeof(product));
......@@ -1500,7 +1512,7 @@ void API_NS(precompute) (
pt_to_pniels(pn_tmp, start);
memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
gf_cpy(zs[idx], pn_tmp->z);
gf_copy(zs[idx], pn_tmp->z);
if (j >= (1u<<(t-1)) - 1) break;
int delta = (j+1) ^ ((j+1)>>1) ^ gray;
......@@ -1733,7 +1745,7 @@ void API_NS(precompute_wnafs) (
prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
memcpy(out[i], tmp[i]->n, sizeof(niels_t));
gf_cpy(zs[i], tmp[i]->z);
gf_copy(zs[i], tmp[i]->z);
}
batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);
......
......@@ -13,8 +13,9 @@ f_field_h = gen_file(
#include "word.h"
#define __DECAF_%(gf_shortname)s_GF_DEFINED__ 1
#define NLIMBS (%(gf_impl_bits)d/sizeof(word_t)/8)
typedef struct gf_%(gf_shortname)s_s {
word_t limb[%(gf_impl_bits)d/sizeof(word_t)/8];
word_t limb[NLIMBS];
} __attribute__((aligned(32))) gf_%(gf_shortname)s_s, gf_%(gf_shortname)s_t[1];
#define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d
......@@ -33,6 +34,7 @@ typedef struct gf_%(gf_shortname)s_s {
#define gf_isr gf_%(gf_shortname)s_isr
#define gf_serialize gf_%(gf_shortname)s_serialize
#define gf_deserialize gf_%(gf_shortname)s_deserialize
#define MODULUS gf_%(gf_shortname)s_MODULUS
#define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */
......@@ -42,6 +44,8 @@ typedef struct gf_%(gf_shortname)s_s {
extern "C" {
#endif
const gf MODULUS;
/* Defined below in f_impl.h */
static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; }
static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b);
......@@ -61,4 +65,9 @@ mask_t gf_deserialize (gf x, const uint8_t serial[(GF_BITS-1)/8+1]);
#endif
#include "f_impl.h" /* Bring in the inline implementations */
#ifndef LIMBPERM
#define LIMBPERM(i) (i)
#endif
#define LIMB_MASK(i) (((1ull)<<LIMB_PLACE_VALUE(i))-1)
""")
......@@ -91,88 +91,37 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
}
void gf_strong_reduce (gf a) {
uint32_t maske = (1<<26)-1, masko = (1<<25)-1;
/* first, clear high */
a->limb[0] += (a->limb[9]>>25)*19;
a->limb[9] &= masko;
a->limb[9] &= LIMB_MASK(9);
/* now the total is less than 2p */
/* compute total_value - p. No need to reduce mod p. */
int64_t scarry = 0;
int i;
for (i=0; i<10; /*i+=2*/) {
scarry = scarry + a->limb[i] - ((i==0)?maske-18:maske);
a->limb[i] = scarry & maske;
scarry >>= 26;
i++;
scarry = scarry + a->limb[i] - masko;
a->limb[i] = scarry & masko;
scarry >>= 25;
i++;
dsword_t scarry = 0;
for (unsigned int i=0; i<10; i++) {
scarry = scarry + a->limb[i] - MODULUS->limb[i];
a->limb[i] = scarry & LIMB_MASK(i);
scarry >>= LIMB_PLACE_VALUE(i);
}
/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/
assert(word_is_zero(scarry) | word_is_zero(scarry+1));
uint32_t scarry_masko = scarry & masko, scarry_maske = scarry & maske;
uint64_t carry = 0;
word_t scarry_0 = scarry;
dword_t carry = 0;
/* add it back */
for (i=0; i<10; /*i+=2*/) {
carry = carry + a->limb[i] + ((i==0)?(scarry_maske&~18):scarry_maske);
a->limb[i] = carry & maske;
carry >>= 26;
i++;
carry = carry + a->limb[i] + scarry_masko;
a->limb[i] = carry & masko;
carry >>= 25;
for (unsigned int i=0; i<10; i++) {
carry = carry + a->limb[i] + (scarry_0 & MODULUS->limb[i]);
a->limb[i] = carry & LIMB_MASK(i);
carry >>= LIMB_PLACE_VALUE(i);
i++;
}
assert(word_is_zero(carry + scarry));
}
#define LIMB_PLACE_VALUE(i) (((i)&1)?25:26)
void gf_serialize (uint8_t serial[32], const gf x) {
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
unsigned int j=0, fill=0;
dword_t buffer = 0;
for (unsigned int i=0; i<32; i++) {
if (fill < 8 && j < sizeof(red->limb)/sizeof(red->limb[0])) {
buffer |= ((dword_t)red->limb[j]) << fill;
fill += LIMB_PLACE_VALUE(j);
j++;
}
serial[i] = buffer;
fill -= 8;
buffer >>= 8;
}
assert(word_is_zero(carry + scarry_0));
}
mask_t gf_deserialize (gf x, const uint8_t serial[32]) {
unsigned int j=0, fill=0;
dword_t buffer = 0;
for (unsigned int i=0; i<32; i++) {
buffer |= ((dword_t)serial[i]) << fill;
fill += 8;
if (fill >= LIMB_PLACE_VALUE(j) || i == 31) {
assert(j < sizeof(x->limb)/sizeof(x->limb[0]));
word_t mask = ((1ull)<<LIMB_PLACE_VALUE(j))-1;
x->limb[j] = (i==31) ? buffer : (buffer & mask); // FIXME: this can in theory truncate the buffer if it's not in field.
buffer >>= LIMB_PLACE_VALUE(j);
fill -= LIMB_PLACE_VALUE(j);
j++;
}
}
return -1; // FIXME: test whether in field.
}
......@@ -3,8 +3,9 @@
*/
#define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26
#define FIELD_LITERAL(a,b,c,d,e) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}}
#define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}}
#define LIMB_PLACE_VALUE(i) (((i)&1)?25:26)
void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<10; i++) {
......
......@@ -97,45 +97,3 @@ void gf_strong_reduce (gf a) {
assert(word_is_zero(carry + scarry));
}
void gf_serialize (uint8_t serial[32], const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
uint64_t *r = red->limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) {
for (j=0; j<8; j++) {
serial[8*i+j] = ser64[i];
ser64[i] >>= 8;
}
}
}
mask_t gf_deserialize (gf x, const uint8_t serial[32]) {
int i,j;
uint64_t ser64[4], mask = ((1ull<<51)-1);
for (i=0; i<4; i++) {
uint64_t out = 0;
for (j=0; j<8; j++) {
out |= ((uint64_t)serial[8*i+j])<<(8*j);
}
ser64[i] = out;
}
/* Test for >= 2^255-19 */
uint64_t ge = -(((__uint128_t)ser64[0]+19)>>64);
ge &= ser64[1];
ge &= ser64[2];
ge &= (ser64[3]<<1) + 1;
ge |= -(((__uint128_t)ser64[3]+0x8000000000000000)>>64);
x->limb[0] = ser64[0] & mask;
x->limb[1] = (ser64[0]>>51 | ser64[1]<<13) & mask;
x->limb[2] = (ser64[1]>>38 | ser64[2]<<26) & mask;
x->limb[3] = (ser64[2]>>25 | ser64[3]<<39) & mask;
x->limb[4] = ser64[3]>>12;
return ~word_is_zero(~ge);
}
......@@ -4,6 +4,8 @@
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
#define LIMB_PLACE_VALUE(i) 51
void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
......
......@@ -208,45 +208,3 @@ void gf_strong_reduce (gf a) {
assert(word_is_zero(carry + scarry));
}
void gf_serialize (uint8_t serial[32], const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
uint64_t *r = red->limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) {
for (j=0; j<8; j++) {
serial[8*i+j] = ser64[i];
ser64[i] >>= 8;
}
}
}
mask_t gf_deserialize (gf x, const uint8_t serial[32]) {
int i,j;
uint64_t ser64[4], mask = ((1ull<<51)-1);
for (i=0; i<4; i++) {
uint64_t out = 0;
for (j=0; j<8; j++) {
out |= ((uint64_t)serial[8*i+j])<<(8*j);
}
ser64[i] = out;
}
/* Test for >= 2^255-19 */
uint64_t ge = -(((__uint128_t)ser64[0]+19)>>64);
ge &= ser64[1];
ge &= ser64[2];
ge &= (ser64[3]<<1) + 1;
ge |= -(((__uint128_t)ser64[3]+0x8000000000000000)>>64);
x->limb[0] = ser64[0] & mask;
x->limb[1] = (ser64[0]>>51 | ser64[1]<<13) & mask;
x->limb[2] = (ser64[1]>>38 | ser64[2]<<26) & mask;
x->limb[3] = (ser64[2]>>25 | ser64[3]<<39) & mask;
x->limb[4] = ser64[3]>>12;
return ~word_is_zero(~ge);
}
......@@ -4,6 +4,8 @@
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
#define LIMB_PLACE_VALUE(i) 51
void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
......
......@@ -18,14 +18,17 @@ const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
0x78595a6804c9e,
0x2b8324804fc1d
)};
const gf MODULUS = {FIELD_LITERAL(
0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff
)};
/* TODO put in header */
extern const gf_25519_t decaf_255_ONE;
extern mask_t decaf_255_gf_eq(const gf_25519_t a, const gf_25519_t b);
/* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */
void
gf_isr (
void gf_isr (
gf_25519_t a,
const gf_25519_t x
) {
......
......@@ -142,53 +142,3 @@ void gf_strong_reduce (gf a) {
assert(word_is_zero(carry + scarry));
}
void gf_serialize (uint8_t *serial, const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
}
assert(limb == 0);
}
}
mask_t gf_deserialize (gf x, const uint8_t serial[56]) {
int i,j;
for (i=0; i<8; i++) {
uint64_t out = 0;
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[2*i] = out & ((1ull<<28)-1);
x->limb[2*i+1] = out >> 28;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
uint32_t ge = -1, mask = (1ull<<28)-1;
for (i=0; i<8; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[8] + 1)) | word_is_zero(x->limb[8] ^ mask);
/* Propagate the rest */
for (i=9; i<16; i++) {
ge &= x->limb[i];
}
return ~word_is_zero(ge ^ mask);
}
......@@ -5,6 +5,8 @@
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
#define LIMB_PLACE_VALUE(i) 28
void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
......
......@@ -834,9 +834,7 @@ void gf_mulw (
c[1] += accum8 >> 28;
}
void gf_strong_reduce (
gf a
) {
void gf_strong_reduce (gf a) {
word_t mask = (1ull<<28)-1;
/* first, clear high */
......@@ -875,59 +873,3 @@ void gf_strong_reduce (
assert(word_is_zero(carry + scarry));
}
void gf_serialize (
uint8_t *serial,
const gf x
) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
}
assert(limb == 0);
}
}
mask_t
gf_deserialize (
gf x,
const uint8_t serial[56]
) {
int i,j;
for (i=0; i<8; i++) {
uint64_t out = 0;
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[2*i] = out & ((1ull<<28)-1);
x->limb[2*i+1] = out >> 28;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
uint32_t ge = -1, mask = (1ull<<28)-1;
for (i=0; i<8; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[8] + 1)) | word_is_zero(x->limb[8] ^ mask);
/* Propagate the rest */
for (i=9; i<16; i++) {
ge &= x->limb[i];
}
return ~word_is_zero(ge ^ mask);