Commit 4a13ad4b authored by Michael Hamburg's avatar Michael Hamburg

unroll loops in arch_32/f_impl.c except on -Os

parent bb1eef37
......@@ -1156,13 +1156,11 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80);
enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80;
mask_t succ = DECAF_TRUE;
mask_t succ = gf_deserialize(p->y, enc2, 1);
#if 7 == 0
succ = word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]);
succ &= word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]);
#endif
succ &= gf_deserialize(p->y, enc2, 1);
gf_sqr(p->x,p->y);
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
#if EDDSA_USE_SIGMA_ISOGENY
......@@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
decaf_bzero(enc2,sizeof(enc2));
assert(API_NS(point_valid)(p) || ~succ);
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}
decaf_error_t decaf_x25519 (
......
......@@ -1156,13 +1156,11 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80);
enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80;
mask_t succ = DECAF_TRUE;
mask_t succ = gf_deserialize(p->y, enc2, 1);
#if 0 == 0
succ = word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]);
succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]);
#endif
succ &= gf_deserialize(p->y, enc2, 1);
gf_sqr(p->x,p->y);
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
#if EDDSA_USE_SIGMA_ISOGENY
......@@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
decaf_bzero(enc2,sizeof(enc2));
assert(API_NS(point_valid)(p) || ~succ);
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}
decaf_error_t decaf_x448 (
......
......@@ -4,6 +4,14 @@
#include "f_field.h"
#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \
|| defined(DECAF_FORCE_UNROLL)
#define REPEAT8(_x) _x _x _x _x _x _x _x _x
#define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0)
#else
#define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0)
#endif
void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
const uint32_t *a = as->limb, *b = bs->limb;
uint32_t *c = cs->limb;
......@@ -19,24 +27,24 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
bb[i] = b[i] + b[i+8];
}
for (j=0; j<8; j++) {
FOR_LIMB(j,0,8,{
accum2 = 0;
for (i=0; i<=j; i++) {
FOR_LIMB (i,0,j+1,{
accum2 += widemul(a[j-i],b[i]);
accum1 += widemul(aa[j-i],bb[i]);
accum0 += widemul(a[8+j-i], b[8+i]);
}
});
accum1 -= accum2;
accum0 += accum2;
accum2 = 0;
for (; i<8; i++) {
FOR_LIMB (i,j+1,8,{
accum0 -= widemul(a[8+j-i], b[i]);
accum2 += widemul(aa[8+j-i], bb[i]);
accum1 += widemul(a[16+j-i], b[8+i]);
}
});
accum1 += accum2;
accum0 += accum2;
......@@ -46,7 +54,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
accum0 >>= 28;
accum1 >>= 28;
}
});
accum0 += accum1;
accum0 += c[8];
......@@ -66,24 +74,17 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint32_t *a = as->limb;
uint32_t *c = cs->limb;
uint64_t accum0, accum8;
uint64_t accum0 = 0, accum8 = 0;
uint32_t mask = (1ull<<28)-1;
int i;
accum0 = widemul(b, a[0]);
accum8 = widemul(b, a[8]);
c[0] = accum0 & mask; accum0 >>= 28;
c[8] = accum8 & mask; accum8 >>= 28;
for (i=1; i<8; i++) {
FOR_LIMB(i,0,8,{
accum0 += widemul(b, a[i]);
accum8 += widemul(b, a[i+8]);
c[i] = accum0 & mask; accum0 >>= 28;
c[i+8] = accum8 & mask; accum8 >>= 28;
}
});
accum0 += accum8 + c[8];
c[8] = accum0 & mask;
......
......@@ -1145,13 +1145,11 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] & 0x80);
enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] &= ~0x80;
mask_t succ = DECAF_TRUE;
mask_t succ = gf_deserialize(p->y, enc2, 1);
#if $(gf_bits % 8) == 0
succ = word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]);
succ &= word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]);
#endif
succ &= gf_deserialize(p->y, enc2, 1);
gf_sqr(p->x,p->y);
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
#if EDDSA_USE_SIGMA_ISOGENY
......@@ -1236,7 +1234,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
decaf_bzero(enc2,sizeof(enc2));
assert(API_NS(point_valid)(p) || ~succ);
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}
decaf_error_t decaf_x$(gf_shortname) (
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment