Commit b2dc216b authored by Mike Hamburg's avatar Mike Hamburg

finish porting precomputed verify to decaf_fast. Remove tables as dependency of decaf slow

parent 616536db
......@@ -70,8 +70,12 @@ LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \
build/$(FIELD).o build/ec_point.o build/scalarmul.o build/sha512.o build/magic.o \
build/f_arithmetic.o build/arithmetic.o
DECAFCOMPONENTS= build/$(DECAF).o build/shake.o build/decaf_crypto.o build/decaf_tables.o \
DECAFCOMPONENTS= build/$(DECAF).o build/shake.o build/decaf_crypto.o \
build/$(FIELD).o build/f_arithmetic.o # TODO
ifeq ($(DECAF),decaf_fast)
DECAFCOMPONENTS += build/decaf_tables.o
endif
TESTCOMPONENTS=build/test.o build/test_scalarmul.o build/test_sha512.o \
build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o build/magic.o \
......
......@@ -89,35 +89,47 @@ const decaf_448_point_t decaf_448_point_base = {{
struct decaf_448_precomputed_s { decaf_448_point_t p[1]; };
/* FIXME: restore */
// const struct decaf_448_precomputed_s *decaf_448_precomputed_base =
// (const struct decaf_448_precomputed_s *)decaf_448_point_base;
extern const decaf_word_t decaf_448_precomputed_base_as_words[];
const decaf_448_precomputed_s *decaf_448_precomputed_base =
(const decaf_448_precomputed_s *) &decaf_448_precomputed_base_as_words;
const struct decaf_448_precomputed_s *decaf_448_precomputed_base =
(const struct decaf_448_precomputed_s *)decaf_448_point_base;
const size_t sizeof_decaf_448_precomputed_s = sizeof(struct decaf_448_precomputed_s);
const size_t alignof_decaf_448_precomputed_s = 32;
#ifdef __clang__
#if 100*__clang_major__ + __clang_minor__ > 305
#define VECTORIZE _Pragma("clang loop unroll(disable) vectorize(enable) vectorize_width(8)")
#endif
#endif
#ifndef VECTORIZE
#define VECTORIZE
#endif
#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__)) || defined(DECAF_FORCE_UNROLL)
#if DECAF_448_LIMBS==8
#define FOR_LIMB(i,op) { unsigned int i=0; \
#define FOR_LIMB_U(i,op) { unsigned int i=0; \
op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
}
#elif DECAF_448_LIMBS==16
#define FOR_LIMB(i,op) { unsigned int i=0; \
#define FOR_LIMB_U(i,op) { unsigned int i=0; \
op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
}
#else
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
#endif
#else
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
#endif
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
/* TODO: figure out why this horribly degrades speed if you use it */
#define FOR_LIMB_V(i,op) { unsigned int i=0; VECTORIZE for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
/** Copy x = y */
siv gf_cpy(gf x, const gf y) { FOR_LIMB(i, x->limb[i] = y->limb[i]); }
siv gf_cpy(gf x, const gf y) { FOR_LIMB_U(i, x->limb[i] = y->limb[i]); }
/** Mostly-unoptimized multiply (PERF), but at least it's unrolled. */
snv gf_mul (gf c, const gf a, const gf b) {
......@@ -125,19 +137,19 @@ snv gf_mul (gf c, const gf a, const gf b) {
gf_cpy(aa,a);
decaf_dword_t accum[DECAF_448_LIMBS] = {0};
FOR_LIMB(i, {
FOR_LIMB(j,{ accum[(i+j)%DECAF_448_LIMBS] += (decaf_dword_t)b->limb[i] * aa->limb[j]; });
FOR_LIMB_U(i, {
FOR_LIMB_U(j,{ accum[(i+j)%DECAF_448_LIMBS] += (decaf_dword_t)b->limb[i] * aa->limb[j]; });
aa->limb[(DECAF_448_LIMBS-1-i)^(DECAF_448_LIMBS/2)] += aa->limb[DECAF_448_LIMBS-1-i];
});
accum[DECAF_448_LIMBS-1] += accum[DECAF_448_LIMBS-2] >> LBITS;
accum[DECAF_448_LIMBS-2] &= LMASK;
accum[DECAF_448_LIMBS/2] += accum[DECAF_448_LIMBS-1] >> LBITS;
FOR_LIMB(j,{
FOR_LIMB_U(j,{
accum[j] += accum[(j-1)%DECAF_448_LIMBS] >> LBITS;
accum[(j-1)%DECAF_448_LIMBS] &= LMASK;
});
FOR_LIMB(j, c->limb[j] = accum[j] );
FOR_LIMB_U(j, c->limb[j] = accum[j] );
}
/** No dedicated square (PERF) */
......@@ -166,7 +178,7 @@ snv gf_isqrt(gf y, const gf x) {
/** Weak reduce mod p. */
siv gf_reduce(gf x) {
x->limb[DECAF_448_LIMBS/2] += x->limb[DECAF_448_LIMBS-1] >> LBITS;
FOR_LIMB(j,{
FOR_LIMB_U(j,{
x->limb[j] += x->limb[(j-1)%DECAF_448_LIMBS] >> LBITS;
x->limb[(j-1)%DECAF_448_LIMBS] &= LMASK;
});
......@@ -174,19 +186,19 @@ siv gf_reduce(gf x) {
/** Add mod p. Conservatively always weak-reduce. (PERF) */
sv gf_add ( gf x, const gf y, const gf z ) {
FOR_LIMB(i, x->limb[i] = y->limb[i] + z->limb[i] );
FOR_LIMB_U(i, x->limb[i] = y->limb[i] + z->limb[i] );
gf_reduce(x);
}
/** Subtract mod p. Conservatively always weak-reduce. (PERF) */
sv gf_sub ( gf x, const gf y, const gf z ) {
FOR_LIMB(i, x->limb[i] = y->limb[i] - z->limb[i] + 2*P->limb[i] );
FOR_LIMB_U(i, x->limb[i] = y->limb[i] - z->limb[i] + 2*P->limb[i] );
gf_reduce(x);
}
/** Constant time, x = is_z ? z : y */
sv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {
FOR_LIMB(i, x->limb[i] = (y->limb[i] & ~is_z) | (z->limb[i] & is_z) );
FOR_LIMB_U(i, x->limb[i] = (y->limb[i] & ~is_z) | (z->limb[i] & is_z) );
}
/** Constant time, if (neg) x=-x; */
......@@ -198,7 +210,7 @@ siv cond_neg(gf x, decaf_bool_t neg) {
/** Constant time, if (swap) (x,y) = (y,x); */
sv cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
FOR_LIMB(i, {
FOR_LIMB_U(i, {
decaf_word_t s = (x->limb[i] ^ y->limb[i]) & swap;
x->limb[i] ^= s;
y->limb[i] ^= s;
......@@ -388,9 +400,9 @@ decaf_bool_t decaf_448_scalar_eq (
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) {
int i;
decaf_word_t diff = 0;
unsigned int i;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
for(i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
diff |= a->limb[i] ^ b->limb[i];
}
return (((decaf_dword_t)diff)-1)>>WBITS;
......@@ -424,14 +436,14 @@ void decaf_448_point_encode( unsigned char ser[DECAF_448_SER_BYTES], const decaf
cond_neg ( a, hibit(a) );
gf_canon(a);
int i, k=0, bits=0;
int k=0, bits=0;
decaf_dword_t buf=0;
for (i=0; i<DECAF_448_LIMBS; i++) {
FOR_LIMB(i, {
buf |= (decaf_dword_t)a->limb[i]<<bits;
for (bits += LBITS; (bits>=8 || i==DECAF_448_LIMBS-1) && k<DECAF_448_SER_BYTES; bits-=8, buf>>=8) {
ser[k++]=buf;
}
}
});
}
/**
......
......@@ -1073,6 +1073,31 @@ static void gf_batch_invert (
}
}
static void batch_normalize_niels (
niels_t *table,
gf *zs,
gf *zis,
int n
) {
int i;
gf product;
gf_batch_invert(zis, zs, n);
for (i=0; i<n; i++) {
gf_mul(product, table[i]->a, zis[i]);
gf_canon(product);
gf_cpy(table[i]->a, product);
gf_mul(product, table[i]->b, zis[i]);
gf_canon(product);
gf_cpy(table[i]->b, product);
gf_mul(product, table[i]->c, zis[i]);
gf_canon(product);
gf_cpy(table[i]->c, product);
}
}
void
decaf_448_precompute (
decaf_448_precomputed_s *table,
......@@ -1129,22 +1154,7 @@ decaf_448_precompute (
}
}
gf_batch_invert(zis, zs, n<<(t-1));
gf product;
for (i=0; i<n<<(t-1); i++) {
gf_mul(product, table->table[i]->a, zis[i]);
gf_canon(product);
gf_cpy(table->table[i]->a, product);
gf_mul(product, table->table[i]->b, zis[i]);
gf_canon(product);
gf_cpy(table->table[i]->b, product);
gf_mul(product, table->table[i]->c, zis[i]);
gf_canon(product);
gf_cpy(table->table[i]->c, product);
}
batch_normalize_niels(table->table,zs,zis,n<<(t-1));
}
extern const decaf_448_scalar_t decaf_448_precomputed_scalarmul_adjustment;
......@@ -1396,93 +1406,115 @@ static int recode_wnaf (
sv prepare_wnaf_table(
pniels_t *output,
decaf_448_point_t working,
const decaf_448_point_t working,
unsigned int tbits
) {
decaf_448_point_t tmp;
int i;
pt_to_pniels(output[0], working);
if (tbits == 0) return;
decaf_448_point_double(working,working);
decaf_448_point_double(tmp,working);
pniels_t twop;
pt_to_pniels(twop, working);
pt_to_pniels(twop, tmp);
add_pniels_to_pt(working, output[0],0);
pt_to_pniels(output[1], working);
add_pniels_to_pt(tmp, output[0],0);
pt_to_pniels(output[1], tmp);
for (i=2; i < 1<<tbits; i++) {
add_pniels_to_pt(working, twop,0);
pt_to_pniels(output[i], working);
add_pniels_to_pt(tmp, twop,0);
pt_to_pniels(output[i], tmp);
}
}
extern const decaf_word_t decaf_448_precomputed_wnaf_as_words[];
static const niels_t *decaf_448_wnaf_base = (const niels_t *)decaf_448_precomputed_wnaf_as_words;
const size_t sizeof_decaf_448_precomputed_wnafs __attribute((visibility("hidden"))) = sizeof(niels_t)<<5;
void decaf_448_precompute_wnafs (
niels_t out[1<<5],
const decaf_448_point_t base
) __attribute__ ((visibility ("hidden")));
void decaf_448_precompute_wnafs (
niels_t out[1<<5],
const decaf_448_point_t base
) {
// TODO MAGIC
pniels_t tmp[1<<5];
gf zs[1<<5], zis[1<<5];
int i;
prepare_wnaf_table(tmp,base,5);
for (i=0; i<1<<5; i++) {
memcpy(out[i], tmp[i]->n, sizeof(niels_t));
gf_cpy(zs[i], tmp[i]->z);
}
batch_normalize_niels(out, zs, zis, 1<<5);
}
void decaf_448_base_double_scalarmul_non_secret (
decaf_448_point_t combo,
const decaf_448_scalar_t scalar1,
const decaf_448_point_t base2,
const decaf_448_scalar_t scalar2
) {
int i;
unsigned j,k;
const unsigned int n = 5, t = 5;
const int s = 18; // TODO MAGIC
const int table_bits_var = 3, table_bits_pre = 5; // TODO MAGIC
struct smvt_control control_var[DECAF_448_SCALAR_BITS/(table_bits_var+1)+3];
struct smvt_control control_pre[DECAF_448_SCALAR_BITS/(table_bits_pre+1)+3];
decaf_448_scalar_t scalar1x;
decaf_448_scalar_add(scalar1x, scalar1, decaf_448_precomputed_scalarmul_adjustment);
decaf_448_halve(scalar1x,scalar1x,decaf_448_scalar_p);
decaf_448_point_copy(combo, base2);
const int table_bits = 4; // TODO MAGIC
struct smvt_control control[DECAF_448_SCALAR_BITS/(table_bits+1)+3];
int control_bits = recode_wnaf(control, scalar2, table_bits);
int ncb_pre = recode_wnaf(control_pre, scalar1, table_bits_pre);
int ncb_var = recode_wnaf(control_var, scalar2, table_bits_var);
pniels_t precmp[1<<table_bits];
prepare_wnaf_table(precmp, combo, table_bits);
decaf_448_point_copy(combo, decaf_448_point_identity);
pniels_t precmp_var[1<<table_bits_var];
prepare_wnaf_table(precmp_var, base2, table_bits_var);
int contp=0, contv=0, i = control_var[0].power;
int conti = 0;
for (i = control[0].power; i >= 0; i--) {
if (i < 0) {
decaf_448_point_copy(combo, decaf_448_point_identity);
return;
} else if (i > control_pre[0].power) {
pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
contv++;
} else if (i == control_pre[0].power && i >=0 ) {
pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]);
add_niels_to_pt(combo, decaf_448_wnaf_base[control_pre[0].addend >> 1], i);
contv++; contp++;
} else {
i = control_pre[0].power;
niels_to_pt(combo, decaf_448_wnaf_base[control_pre[0].addend >> 1]);
contp++;
}
for (i--; i >= 0; i--) {
int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power);
decaf_448_point_double_internal(combo,combo,i && !(cv||cp));
if (i == control[conti].power) {
decaf_448_point_double_internal(combo,combo,0);
assert(control[conti].addend);
if (cv) {
assert(control_var[contv].addend);
if (control[conti].addend > 0) {
add_pniels_to_pt(combo, precmp[control[conti].addend >> 1], i>=s); // TODO PERF: internal
if (control_var[contv].addend > 0) {
add_pniels_to_pt(combo, precmp_var[control_var[contv].addend >> 1], i&&!cp);
} else {
sub_pniels_from_pt(combo, precmp[(-control[conti].addend) >> 1], i>=s); // TODO PERF: internal
sub_pniels_from_pt(combo, precmp_var[(-control_var[contv].addend) >> 1], i&&!cp);
}
conti++;
assert(conti <= control_bits);
} else {
decaf_448_point_double_internal(combo,combo,i>=s);
contv++;
}
if (i < s) {
/* comb component */
for (j=0; j<n; j++) {
int tab = 0;
for (k=0; k<t; k++) {
unsigned int bit = i + s*(k + j*t);
if (bit < SCALAR_WORDS * WBITS) {
tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k;
}
}
decaf_bool_t invert = (tab>>(t-1))-1;
tab ^= invert;
tab &= (1<<(t-1)) - 1;
if (invert) {
sub_niels_from_pt(combo, decaf_448_precomputed_base->table[(j<<(t-1)) + tab], j==n-1 && i);
} else {
add_niels_to_pt(combo, decaf_448_precomputed_base->table[(j<<(t-1)) + tab], j==n-1 && i);
}
if (cp) {
assert(control_pre[contp].addend);
if (control_pre[contp].addend > 0) {
add_niels_to_pt(combo, decaf_448_wnaf_base[control_pre[contp].addend >> 1], i);
} else {
sub_niels_from_pt(combo, decaf_448_wnaf_base[(-control_pre[contp].addend) >> 1], i);
}
contp++;
}
}
assert(contv == ncb_var); (void)ncb_var;
assert(contp == ncb_pre); (void)ncb_pre;
}
......@@ -18,6 +18,15 @@ const decaf_word_t decaf_448_precomputed_base_as_words[1];
const decaf_448_scalar_t decaf_448_precomputed_scalarmul_adjustment;
const decaf_448_scalar_t decaf_448_point_scalarmul_adjustment;
struct niels_s;
const decaf_word_t *decaf_448_precomputed_wnaf_as_words;
extern const size_t sizeof_decaf_448_precomputed_wnafs;
void decaf_448_precompute_wnafs (
struct niels_s *out,
const decaf_448_point_t base
);
static void scalar_print(const char *name, const decaf_448_scalar_t sc) {
printf("const decaf_448_scalar_t %s = {{{\n", name);
unsigned i;
......@@ -36,6 +45,11 @@ int main(int argc, char **argv) {
if (ret || !pre) return 1;
decaf_448_precompute(pre, decaf_448_point_base);
struct niels_s *preWnaf;
ret = posix_memalign((void**)&preWnaf, alignof_decaf_448_precomputed_s, sizeof_decaf_448_precomputed_wnafs);
if (ret || !preWnaf) return 1;
decaf_448_precompute_wnafs(preWnaf, decaf_448_point_base);
const decaf_word_t *output = (const decaf_word_t *)pre;
unsigned i;
......@@ -43,7 +57,7 @@ int main(int argc, char **argv) {
printf("#include \"decaf.h\"\n\n");
printf("const decaf_word_t decaf_448_precomputed_base_as_words[%d]\n",
(int)(sizeof_decaf_448_precomputed_s / sizeof(decaf_word_t)));
printf("__attribute__((aligned(%d))) = {\n ", (int)alignof_decaf_448_precomputed_s);
printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)alignof_decaf_448_precomputed_s);
for (i=0; i < sizeof_decaf_448_precomputed_s; i+=sizeof(decaf_word_t)) {
if (i && (i%8==0)) printf(",\n ");
......@@ -53,6 +67,18 @@ int main(int argc, char **argv) {
}
printf("\n};\n");
output = (const decaf_word_t *)preWnaf;
printf("const decaf_word_t decaf_448_precomputed_wnaf_as_words[%d]\n",
(int)(sizeof_decaf_448_precomputed_wnafs / sizeof(decaf_word_t)));
printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)alignof_decaf_448_precomputed_s);
for (i=0; i < sizeof_decaf_448_precomputed_wnafs; i+=sizeof(decaf_word_t)) {
if (i && (i%8==0)) printf(",\n ");
else if (i) printf(", ");
printf("0x%0*llxull", (int)sizeof(decaf_word_t)*2, (unsigned long long)*output );
output++;
}
printf("\n};\n");
decaf_448_scalar_t smadj;
decaf_448_scalar_copy(smadj,decaf_448_scalar_one);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment