Commit a585d7f1 authored by Michael Hamburg's avatar Michael Hamburg

factor out elligator, decaf, scalar code. scalar_halve is now public

parent 55766b76
......@@ -165,7 +165,8 @@ endef
################################################################
define define_curve
LIBCOMPONENTS += $$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/$(1)/crypto.o $$(BUILD_OBJ)/$(1)/decaf_tables.o
LIBCOMPONENTS += $$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/$(1)/elligator.o $$(BUILD_OBJ)/$(1)/scalar.o \
$$(BUILD_OBJ)/$(1)/crypto.o $$(BUILD_OBJ)/$(1)/decaf_tables.o
PER_OBJ_DIRS += $$(BUILD_OBJ)/$(1)
GLOBAL_HEADERS_OF_$(1) = $(BUILD_INC)/decaf/decaf_$(3).h $(BUILD_INC)/decaf/decaf_$(3).hxx \
$(BUILD_INC)/decaf/crypto_$(3).h $(BUILD_INC)/decaf/crypto_$(3).hxx
......@@ -181,11 +182,17 @@ $$(BUILD_H)/$(1)/%.h: src/per_curve/%.tmpl.h src/gen_headers/* $$(HEADERS_OF_$(2
$$(BUILD_INC)/decaf/decaf_$(3).%: src/per_curve/decaf.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<
$$(BUILD_INC)/decaf/elligator_$(3).%: src/per_curve/elligator.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<
$$(BUILD_INC)/decaf/scalar_$(3).%: src/per_curve/scalar.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<
$$(BUILD_INC)/decaf/crypto_$(3).%: src/per_curve/crypto.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<
$$(BUILD_IBIN)/decaf_gen_tables_$(1): $$(BUILD_OBJ)/$(1)/decaf_gen_tables.o \
$$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/utils.o \
$$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/$(1)/scalar.o $$(BUILD_OBJ)/utils.o \
$$(COMPONENTS_OF_$(2))
$$(LD) $$(LDFLAGS) -o $$@ $$^
......
......@@ -15,8 +15,7 @@
#include <string.h>
/** Square x, n times. */
static INLINE UNUSED void
gf_sqrn (
static INLINE UNUSED void gf_sqrn (
gf_s *__restrict__ y,
const gf x,
int n
......@@ -58,5 +57,44 @@ static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
if (sizeof(word_t)==4) gf_weak_reduce(c); // HACK PERF MAGIC
}
/** Mul by signed int. Not constant-time WRT the sign of that int. */
static inline void gf_mulw(gf c, const gf a, int32_t w) {
if (w>0) {
gf_mulw_unsigned(c, a, w);
} else {
gf_mulw_unsigned(c, a, -w);
gf_sub(c,ZERO,c);
}
}
/** Constant time, x = is_z ? z : y */
static inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
constant_time_select(x,y,z,sizeof(gf),is_z,0);
}
/** Constant time, if (neg) x=-x; */
static inline void gf_cond_neg(gf x, mask_t neg) {
gf y;
gf_sub(y,ZERO,x);
gf_cond_sel(x,x,y,neg);
}
/** Constant time, if (swap) (x,y) = (y,x); */
static inline void
gf_cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) {
constant_time_cond_swap(x,y,sizeof(gf_s),swap);
}
static INLINE void gf_mul_qnr(gf_s *__restrict__ out, gf x) {
#if P_MOD_8 == 5
/* r = QNR * r0^2 */
gf_mul(out,x,SQRT_MINUS_ONE);
#elif P_MOD_8 == 3 || P_MOD_8 == 7
gf_sub(out,ZERO,x);
#else
#error "Only supporting p=3,5,7 mod 8"
#endif
}
#endif // __GF_H__
......@@ -7,7 +7,10 @@
/* for posix_memalign */
#define _XOPEN_SOURCE 600
#define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
#include <string.h>
#include <assert.h>
#include <stdint.h>
#include "arch_intrinsics.h"
......@@ -240,4 +243,36 @@ malloc_vector(size_t size) {
#define UNROLL
#endif
/* The plan on booleans:
*
* The external interface uses decaf_bool_t, but this might be a different
* size than our particular arch's word_t (and thus mask_t). Also, the caller
* isn't guaranteed to pass it as nonzero. So bool_to_mask converts word sizes
* and checks nonzero.
*
* On the flip side, mask_t is always -1 or 0, but it might be a different size
* than decaf_bool_t.
*
* On the third hand, we have success vs boolean types, but that's handled in
* common.h: it converts between decaf_bool_t and decaf_error_t.
*/
static INLINE decaf_bool_t mask_to_bool (mask_t m) {
return (decaf_sword_t)(sword_t)m;
}
static INLINE mask_t bool_to_mask (decaf_bool_t m) {
/* On most arches this will be optimized to a simple cast. */
mask_t ret = 0;
unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
if (limit < 1) limit = 1;
for (unsigned int i=0; i<limit; i++) {
ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
}
return ret;
}
static INLINE void ignore_result ( decaf_bool_t boo ) {
(void)boo;
}
#endif /* __WORD_H__ */
......@@ -51,7 +51,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += accum;
}
void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint32_t *a = as->limb, maske = ((1<<26)-1), masko = ((1<<25)-1);
uint32_t blo = b & maske, bhi = b>>26, bhi2 = 2*bhi;
uint32_t *c = cs->limb;
......
......@@ -34,7 +34,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += accum;
}
void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
int i;
......
......@@ -140,7 +140,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
c[1] = c1 + (accum1>>51);
}
void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
uint64_t *c = cs->limb;
......
......@@ -60,7 +60,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += ((uint32_t)(accum1));
}
void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
assert(b<1<<28);
const uint32_t *a = as->limb;
......
......@@ -721,7 +721,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
c[1] += ((uint32_t)(accum1));
}
void gf_mulw (
void gf_mulw_unsigned (
gf_s *__restrict__ cs,
const gf as,
uint32_t b
......
......@@ -549,7 +549,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf bs) {
);
}
void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
uint32x2_t vmask = {(1<<28) - 1, (1<<28)-1};
assert(b<(1<<28));
......
......@@ -165,7 +165,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += ((uint64_t)(accum1));
}
void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
......
......@@ -139,7 +139,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[0] += ((uint64_t)(accum1));
}
void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
......
This diff is collapsed.
......@@ -167,6 +167,16 @@ void $(c_ns)_scalar_mul (
const $(c_ns)_scalar_t a,
const $(c_ns)_scalar_t b
) API_VIS NONNULL3 NOINLINE;
/**
* @brief Halve a scalar. The scalars may use the same memory.
* @param [in] a A scalar.
* @param [out] out a/2.
*/
void $(c_ns)_scalar_halve (
$(c_ns)_scalar_t out,
const $(c_ns)_scalar_t a
) API_VIS NONNULL2 NOINLINE;
/**
* @brief Invert a scalar. When passed zero, return 0. The input and output may alias.
......
......@@ -193,6 +193,9 @@ public:
/** Divide by inverting q. If q == 0, return 0. */
inline Scalar &operator/=(const Scalar &q) throw(CryptoException) { return *this *= q.inverse(); }
/** Return half this scalar. Much faster than /2. */
inline Scalar half() const { Scalar out; $(c_ns)_scalar_halve(out.s,s); return out; }
/** Compare in constant time */
inline bool operator!=(const Scalar &q) const NOEXCEPT { return !(*this == q); }
......
/** @brief Elligator high-level functions. */
#include "word.h"
#include "field.h"
#include <decaf.h>
/* Template stuff */
#define API_NS(_id) $(c_ns)_##_id
#define point_t API_NS(point_t)
#define IMAGINE_TWIST $(imagine_twist)
#define COFACTOR $(cofactor)
static const int EDWARDS_D = $(d);
/* End of template stuff */
extern void API_NS(deisogenize) (
gf_s *__restrict__ s,
gf_s *__restrict__ minus_t_over_s,
const point_t p,
mask_t toggle_hibit_s,
mask_t toggle_hibit_t_over_s,
mask_t toggle_rotation
);
void API_NS(point_from_hash_nonuniform) (
point_t p,
const unsigned char ser[SER_BYTES]
) {
gf r0,r,a,b,c,N,e;
ignore_result(gf_deserialize(r0,ser,0));
gf_strong_reduce(r0);
gf_sqr(a,r0);
gf_mul_qnr(r,a);
/* Compute D@c := (dr+a-d)(dr-ar-d) with a=1 */
gf_sub(a,r,ONE);
gf_mulw(b,a,EDWARDS_D); /* dr-d */
gf_add(a,b,ONE);
gf_sub(b,b,r);
gf_mul(c,a,b);
/* compute N := (r+1)(a-2d) */
gf_add(a,r,ONE);
gf_mulw(N,a,1-2*EDWARDS_D);
/* e = +-sqrt(1/ND) or +-r0 * sqrt(qnr/ND) */
gf_mul(a,c,N);
mask_t square = gf_isr(b,a);
gf_cond_sel(c,r0,ONE,square); /* r? = square ? 1 : r0 */
gf_mul(e,b,c);
/* s@a = +-|N.e| */
gf_mul(a,N,e);
gf_cond_neg(a,gf_hibit(a)^square); /* NB this is - what is listen in the paper */
/* t@b = -+ cN(r-1)((a-2d)e)^2 - 1 */
gf_mulw(c,e,1-2*EDWARDS_D); /* (a-2d)e */
gf_sqr(b,c);
gf_sub(e,r,ONE);
gf_mul(c,b,e);
gf_mul(b,c,N);
gf_cond_neg(b,square);
gf_sub(b,b,ONE);
/* isogenize */
#if IMAGINE_TWIST
gf_mul(c,a,SQRT_MINUS_ONE);
gf_copy(a,c);
#endif
gf_sqr(c,a); /* s^2 */
gf_add(a,a,a); /* 2s */
gf_add(e,c,ONE);
gf_mul(p->t,a,e); /* 2s(1+s^2) */
gf_mul(p->x,a,b); /* 2st */
gf_sub(a,ONE,c);
gf_mul(p->y,e,a); /* (1+s^2)(1-s^2) */
gf_mul(p->z,a,b); /* (1-s^2)t */
assert(API_NS(point_valid)(p));
}
void API_NS(point_from_hash_uniform) (
point_t pt,
const unsigned char hashed_data[2*SER_BYTES]
) {
point_t pt2;
API_NS(point_from_hash_nonuniform)(pt,hashed_data);
API_NS(point_from_hash_nonuniform)(pt2,&hashed_data[SER_BYTES]);
API_NS(point_add)(pt,pt,pt2);
}
decaf_error_t
API_NS(invert_elligator_nonuniform) (
unsigned char recovered_hash[SER_BYTES],
const point_t p,
uint16_t hint_
) {
mask_t hint = hint_;
mask_t sgn_s = -(hint & 1),
sgn_t_over_s = -(hint>>1 & 1),
sgn_r0 = -(hint>>2 & 1), /* FIXME: but it's SER_BYTES ... */
sgn_ed_T = -(hint>>3 & 1);
gf a, b, c, d;
API_NS(deisogenize)(a,c,p,sgn_s,sgn_t_over_s,sgn_ed_T);
/* ok, a = s; c = -t/s */
gf_mul(b,c,a);
gf_sub(b,ONE,b); /* t+1 */
gf_sqr(c,a); /* s^2 */
mask_t is_identity = gf_eq(p->t,ZERO);
/* identity adjustments */
/* in case of identity, currently c=0, t=0, b=1, will encode to 1 */
/* if hint is 0, -> 0 */
/* if hint is to neg t/s, then go to infinity, effectively set s to 1 */
gf_cond_sel(c,c,ONE,is_identity & sgn_t_over_s);
gf_cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s);
gf_mulw(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
gf_add(a,b,d); /* num? */
gf_sub(d,d,b); /* den? */
gf_mul(b,a,d); /* n*d */
gf_cond_sel(a,d,a,sgn_s);
gf_mul_qnr(d,b);
mask_t succ = gf_isr(c,d)|gf_eq(d,ZERO);
gf_mul(b,a,c);
gf_cond_neg(b, sgn_r0^gf_hibit(b));
succ &= ~(gf_eq(b,ZERO) & sgn_r0);
#if COFACTOR == 8
succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */
#endif
gf_serialize(recovered_hash,b,1); /* FIXME: ,0 */
/* TODO: deal with overflow flag */
return decaf_succeed_if(mask_to_bool(succ));
}
decaf_error_t
API_NS(invert_elligator_uniform) (
unsigned char partial_hash[2*SER_BYTES],
const point_t p,
uint16_t hint
) {
point_t pt2;
API_NS(point_from_hash_nonuniform)(pt2,&partial_hash[SER_BYTES]);
API_NS(point_sub)(pt2,p,pt2);
return API_NS(invert_elligator_nonuniform)(partial_hash,pt2,hint);
}
/** @brief Decaf high-level functions. */
#include "word.h"
#include "constant_time.h"
#include <decaf.h>
/* Template stuff */
#define API_NS(_id) $(c_ns)_##_id
#define SCALAR_BITS $(C_NS)_SCALAR_BITS
#define SCALAR_SER_BYTES $(C_NS)_SCALAR_BYTES
#define SCALAR_LIMBS $(C_NS)_SCALAR_LIMBS
#define scalar_t API_NS(scalar_t)
static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t)0x$("%x" % pow(-q,2**64-1,2**64))ull;
static const scalar_t sc_p = {{{
$(ser(q,64,"SC_LIMB"))
}}}, sc_r2 = {{{
$(ser(((2**128)**((scalar_bits+63)/64))%q,64,"SC_LIMB"))
}}};
/* End of template stuff */
#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */
const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};
/** {extra,accum} - sub +? p
* Must have extra <= 1
*/
static NOINLINE void sc_subx(
scalar_t out,
const decaf_word_t accum[SCALAR_LIMBS],
const scalar_t sub,
const scalar_t p,
decaf_word_t extra
) {
decaf_dsword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + accum[i]) - sub->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
decaf_word_t borrow = chain+extra; /* = 0 or -1 */
chain = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
out->limb[i] = chain;
chain >>= WBITS;
}
}
static NOINLINE void sc_montmul (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
unsigned int i,j;
decaf_word_t accum[SCALAR_LIMBS+1] = {0};
decaf_word_t hi_carry = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
decaf_word_t mand = a->limb[i];
const decaf_word_t *mier = b->limb;
decaf_dword_t chain = 0;
for (j=0; j<SCALAR_LIMBS; j++) {
chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
accum[j] = chain;
chain >>= WBITS;
}
accum[j] = chain;
mand = accum[0] * MONTGOMERY_FACTOR;
chain = 0;
mier = sc_p->limb;
for (j=0; j<SCALAR_LIMBS; j++) {
chain += (decaf_dword_t)mand*mier[j] + accum[j];
if (j) accum[j-1] = chain;
chain >>= WBITS;
}
chain += accum[j];
chain += hi_carry;
accum[j-1] = chain;
hi_carry = chain >> WBITS;
}
sc_subx(out, accum, sc_p, sc_p, hi_carry);
}
void API_NS(scalar_mul) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
sc_montmul(out,a,b);
sc_montmul(out,out,sc_r2);
}
/* PERF: could implement this */
static INLINE void sc_montsqr (scalar_t out, const scalar_t a) {
sc_montmul(out,a,a);
}
decaf_error_t API_NS(scalar_invert) (
scalar_t out,
const scalar_t a
) {
/* Fermat's little theorem, sliding window.
* Sliding window is fine here because the modulus isn't secret.
*/
const int SCALAR_WINDOW_BITS = 3;
scalar_t precmp[1<<SCALAR_WINDOW_BITS];
const int LAST = (1<<SCALAR_WINDOW_BITS)-1;
/* Precompute precmp = [a^1,a^3,...] */
sc_montmul(precmp[0],a,sc_r2);
if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]);
int i;
for (i=1; i<=LAST; i++) {
sc_montmul(precmp[i],precmp[i-1],precmp[LAST]);
}
/* Sliding window */
unsigned residue = 0, trailing = 0, started = 0;
for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) {
if (started) sc_montsqr(out,out);
decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0;
if (i >= 0 && i<WBITS) {
assert(w >= 2);
w-=2;
}
residue = (residue<<1) | ((w>>(i%WBITS))&1);
if (residue>>SCALAR_WINDOW_BITS != 0) {
assert(trailing == 0);
trailing = residue;
residue = 0;
}
if (trailing > 0 && (trailing & ((1<<SCALAR_WINDOW_BITS)-1)) == 0) {
if (started) {
sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
} else {
API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
started = 1;
}
trailing = 0;
}
trailing <<= 1;
}
assert(residue==0);
assert(trailing==0);
/* Demontgomerize */
sc_montmul(out,out,API_NS(scalar_one));
decaf_bzero(precmp, sizeof(precmp));
return decaf_succeed_if(~API_NS(scalar_eq)(out,API_NS(scalar_zero)));
}
void API_NS(scalar_sub) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
sc_subx(out, a->limb, b, sc_p, 0);
}
void API_NS(scalar_add) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
decaf_dword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + b->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
sc_subx(out, out->limb, sc_p, sc_p, chain);
}
void
API_NS(scalar_set_unsigned) (
scalar_t out,
uint64_t w
) {
memset(out,0,sizeof(scalar_t));
unsigned int i = 0;
for (; i<sizeof(uint64_t)/sizeof(decaf_word_t); i++) {
out->limb[i] = w;
w >>= (sizeof(uint64_t) > sizeof(decaf_word_t)) ? 8*sizeof(decaf_word_t) : 0;
}
}
decaf_bool_t
API_NS(scalar_eq) (
const scalar_t a,
const scalar_t b
) {
decaf_word_t diff = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
diff |= a->limb[i] ^ b->limb[i];
}
return mask_to_bool(word_is_zero(diff));
}
static INLINE void scalar_decode_short (
scalar_t s,
const unsigned char *ser,
unsigned int nbytes
) {
unsigned int i,j,k=0;
for (i=0; i<SCALAR_LIMBS; i++) {
decaf_word_t out = 0;
for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
out |= ((decaf_word_t)ser[k])<<(8*j);
}
s->limb[i] = out;
}
}
decaf_error_t API_NS(scalar_decode)(
scalar_t s,
const unsigned char ser[SCALAR_SER_BYTES]
) {
unsigned int i;
scalar_decode_short(s, ser, SCALAR_SER_BYTES);
decaf_dsword_t accum = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
}
/* Here accum == 0 or -1 */
API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */
return decaf_succeed_if(~word_is_zero(accum));
}
void API_NS(scalar_destroy) (
scalar_t scalar
) {
decaf_bzero(scalar, sizeof(scalar_t));
}
void API_NS(scalar_decode_long)(
scalar_t s,
const unsigned char *ser,
size_t ser_len
) {
if (ser_len == 0) {
API_NS(scalar_copy)(s, API_NS(scalar_zero));
return;
}
size_t i;
scalar_t t1, t2;
i = ser_len - (ser_len%SCALAR_SER_BYTES);
if (i==ser_len) i -= SCALAR_SER_BYTES;
scalar_decode_short(t1, &ser[i], ser_len-i);
if (ser_len == sizeof(scalar_t)) {
assert(i==0);
/* ham-handed reduce */
API_NS(scalar_mul)(s,t1,API_NS(scalar_one));
API_NS(scalar_destroy)(t1);
return;
}
while (i) {
i -= SCALAR_SER_BYTES;
sc_montmul(t1,t1,sc_r2);
ignore_result( API_NS(scalar_decode)(t2, ser+i) );
API_NS(scalar_add)(t1, t1, t2);
}
API_NS(scalar_copy)(s, t1);
API_NS(scalar_destroy)(t1);
API_NS(scalar_destroy)(t2);
}
void API_NS(scalar_encode)(
unsigned char ser[SCALAR_SER_BYTES],
const scalar_t s
) {
unsigned int i,j,k=0;
for (i=0; i<SCALAR_LIMBS; i++) {
for (j=0; j<sizeof(decaf_word_t); j++,k++) {
ser[k] = s->limb[i] >> (8*j);
}
}
}
void API_NS(scalar_cond_sel) (
scalar_t out,
const scalar_t a,
const scalar_t b,
decaf_bool_t pick_b
) {
constant_time_select(out,a,b,sizeof(scalar_t),bool_to_mask(pick_b),sizeof(out->limb[0]));
}
void API_NS(scalar_halve) (
scalar_t out,
const scalar_t a
) {
decaf_word_t mask = -(a->limb[0] & 1);
decaf_dword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + (sc_p->limb[i] & mask);
out->limb[i] = chain;
chain >>= DECAF_WORD_BITS;
}
for (i=0; i<SCALAR_LIMBS-1; i++) {
out->limb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1);
}
out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1