Commit 24027889 authored by Michael Hamburg's avatar Michael Hamburg

further reduce the code in f_impl.h

parent 4218223d
......@@ -4,14 +4,12 @@
#ifndef __P25519_H__
#define __P25519_H__ 1
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include "decaf/decaf_255.h"
#include "word.h"
#define LBITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
#ifdef __cplusplus
......@@ -20,54 +18,29 @@ extern "C" {
/* -------------- Inline functions begin here -------------- */
void
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
}
gf_25519_weak_reduce(out);
gf_weak_reduce(out);
}
void
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1);
}
gf_25519_weak_reduce(out);
}
void
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) {
memcpy(out,a,sizeof(*a));
gf_weak_reduce(out);
}
void
gf_25519_bias (
gf_25519_t a,
int amt
) {
void gf_bias (gf a, int amt) {
(void) a;
(void) amt;
}
void
gf_25519_weak_reduce (
gf_25519_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51;
int i;
......
......@@ -4,36 +4,24 @@
#ifndef __P25519_H__
#define __P25519_H__ 1
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include "decaf/decaf_255.h"
#include "word.h"
#define DECAF_255_LIMB_BITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
/* -------------- Inline functions begin here -------------- */
void
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
}
}
void
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
for (i=0; i<5; i++) {
......@@ -41,11 +29,7 @@ gf_25519_sub_RAW (
}
}
void
gf_25519_bias (
gf_25519_t a,
int amt
) {
void gf_bias (gf a, int amt) {
a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt;
int i;
for (i=1; i<5; i++) {
......@@ -53,10 +37,7 @@ gf_25519_bias (
}
}
void
gf_25519_weak_reduce (
gf_25519_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51;
int i;
......
......@@ -4,17 +4,12 @@
#ifndef __P448_H__
#define __P448_H__ 1
#include "word.h"
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
......@@ -24,12 +19,7 @@ extern "C" {
/* -------------- Inline functions begin here -------------- */
void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
......@@ -42,12 +32,7 @@ gf_448_add_RAW (
*/
}
void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
......@@ -60,11 +45,7 @@ gf_448_sub_RAW (
*/
}
void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
uint32x4_t *aa = (uint32x4_t*) a;
......@@ -74,10 +55,7 @@ gf_448_bias (
aa[3] += lo;
}
void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<28) - 1;
uint64_t tmp = a->limb[15] >> 28;
int i;
......
......@@ -4,17 +4,12 @@
#ifndef __P448_H__
#define __P448_H__ 1
#include "word.h"
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
......@@ -24,12 +19,7 @@ extern "C" {
/* -------------- Inline functions begin here -------------- */
void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
......@@ -42,12 +32,7 @@ gf_448_add_RAW (
*/
}
void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
......@@ -60,11 +45,7 @@ gf_448_sub_RAW (
*/
}
void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
uint32x4_t lo = {co1,co1,co1,co1}, hi = {co2,co1,co1,co1};
uint32x4_t *aa = (uint32x4_t*) a;
......@@ -74,10 +55,7 @@ gf_448_bias (
aa[3] += lo;
}
void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<28) - 1;
uint64_t tmp = a->limb[15] >> 28;
int i;
......
......@@ -4,20 +4,15 @@
#ifndef __P448_H__
#define __P448_H__ 1
#include "word.h"
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
#define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
#define USE_NEON_PERM 1
#define LBITS 28
#define LIMBHI(x) ((x##ull)>>LBITS)
#define LIMBLO(x) ((x##ull)&((1ull<<LBITS)-1))
#define LIMBHI(x) ((x##ull)>>28)
#define LIMBLO(x) ((x##ull)&((1ull<<28)-1))
# define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMBLO(a),LIMBLO(e), LIMBHI(a),LIMBHI(e), \
LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
......@@ -30,24 +25,14 @@ extern "C" {
/* -------------- Inline functions begin here -------------- */
void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] + ((const uint32xn_t*)b)[i];
}
}
void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
((uint32xn_t*)out)[i] = ((const uint32xn_t*)a)[i] - ((const uint32xn_t*)b)[i];
......@@ -60,11 +45,7 @@ gf_448_sub_RAW (
*/
}
void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
uint32x4_t lo = {co1,co2,co1,co1}, hi = {co1,co1,co1,co1};
uint32x4_t *aa = (uint32x4_t*) a;
......@@ -74,10 +55,7 @@ gf_448_bias (
aa[3] += hi;
}
void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},
tmp = vshr_n_u32(aa[7],28);
......
......@@ -4,17 +4,12 @@
#ifndef __P448_H__
#define __P448_H__ 1
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include "word.h"
typedef struct gf_448_s {
uint64_t limb[8];
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
#define LBITS 56
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
#ifdef __cplusplus
......@@ -23,46 +18,29 @@ extern "C" {
/* -------------- Inline functions begin here -------------- */
void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<8; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
}
gf_448_weak_reduce(out);
gf_weak_reduce(out);
}
void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i;
uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
for (i=0; i<8; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==4) ? co2 : co1);
}
gf_448_weak_reduce(out);
gf_weak_reduce(out);
}
void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
(void) a;
(void) amt;
}
void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
uint64_t mask = (1ull<<56) - 1;
uint64_t tmp = a->limb[7] >> 56;
int i;
......
......@@ -4,13 +4,11 @@
#ifndef __P448_H__
#define __P448_H__ 1
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
#include "decaf/decaf_448.h"
#include "word.h"
#define LBITS 56
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
#ifdef __cplusplus
......@@ -19,12 +17,7 @@ extern "C" {
/* -------------- Inline functions begin here -------------- */
void
gf_448_add_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_add_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
......@@ -37,12 +30,7 @@ gf_448_add_RAW (
*/
}
void
gf_448_sub_RAW (
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
void gf_sub_RAW (gf out, const gf a, const gf b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
......@@ -55,11 +43,7 @@ gf_448_sub_RAW (
*/
}
void
gf_448_bias (
gf_448_t a,
int amt
) {
void gf_bias (gf a, int amt) {
uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
#if __AVX2__
......@@ -82,10 +66,7 @@ gf_448_bias (
#endif
}
void
gf_448_weak_reduce (
gf_448_t a
) {
void gf_weak_reduce (gf a) {
/* PERF: use pshufb/palignr if anyone cares about speed of this */
uint64_t mask = (1ull<<56) - 1;
uint64_t tmp = a->limb[7] >> 56;
......
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#ifndef __gf_480_H__
#define __gf_480_H__ 1
#ifndef __gf_H__
#define __gf_H__ 1
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
#include "word.h"
typedef struct gf_480_t {
uint64_t limb[8];
} __attribute__((aligned(32))) gf_480_t;
#ifdef __cplusplus
extern "C" {
#endif
static __inline__ void
gf_480_weak_reduce (
gf_480_t *inout
) __attribute__((unused,always_inline));
void
gf_480_strong_reduce (
gf_480_t *inout
);
static __inline__ void
gf_480_bias (
gf_480_t *inout,
int amount
) __attribute__((unused,always_inline));
void
gf_480_mul (
gf_480_t *__restrict__ out,
const gf_480_t *a,
const gf_480_t *b
);
void
gf_480_mulw (
gf_480_t *__restrict__ out,
const gf_480_t *a,
uint64_t b
);
void
gf_480_sqr (
gf_480_t *__restrict__ out,
const gf_480_t *a
);
void
gf_480_serialize (
uint8_t *serial,
const struct gf_480_t *x
);
mask_t
gf_480_deserialize (
gf_480_t *x,
const uint8_t serial[60]
);
/* -------------- Inline functions begin here -------------- */
void
gf_480_add_RAW (
gf_480_t *out,
const gf_480_t *a,
const gf_480_t *b
) {
void gf_add_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
......@@ -85,12 +30,7 @@ gf_480_add_RAW (
*/
}
void
gf_480_sub_RAW (
gf_480_t *out,
const gf_480_t *a,
const gf_480_t *b
) {
void gf_sub_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] - ((const uint64xn_t*)b)[i];
......@@ -103,21 +43,15 @@ gf_480_sub_RAW (
*/
}
void
gf_480_copy (
gf_480_t *out,
const gf_480_t *a
) {
void gf_copy (gf *out, const gf *a) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
((big_register_t *)out)[i] = ((const big_register_t *)a)[i];
}
}
void
gf_480_bias (
gf_480_t *a,
int amt
void gf_bias (
gf *a, int amt
) {
uint64_t co1 = ((1ull<<60)-1)*amt, co2 = co1-amt;
......@@ -141,10 +75,7 @@ gf_480_bias (
#endif
}
void
gf_480_weak_reduce (
gf_480_t *a
) {
void gf_weak_reduce (gf *a) {
/* PERF: use pshufb/palignr if anyone cares about speed of this */
uint64_t mask = (1ull<<60) - 1;
uint64_t tmp = a->limb[7] >> 60;
......@@ -160,4 +91,4 @@ gf_480_weak_reduce (
}; /* extern "C" */
#endif
#endif /* __gf_480_H__ */
#endif /* __gf_H__ */
......@@ -4,118 +4,41 @@
#ifndef __P521_H__
#define __P521_H__ 1
#include "f_field.h"
#include <stdint.h>
#include <assert.h>
#include <string.h>
#include "word.h"
typedef struct gf_521_t {
uint64_t limb[9];
} gf_521_t;
#ifdef __cplusplus
extern "C" {
#endif
static __inline__ void
gf_521_weak_reduce (
gf_521_t *inout
) __attribute__((unused));
void
gf_521_strong_reduce (
gf_521_t *inout
);
static __inline__ void
gf_521_bias (
gf_521_t *inout,
int amount
) __attribute__((unused));
void
gf_521_mul (
gf_521_t *__restrict__ out,
const gf_521_t *a,
const gf_521_t *b
);
void
gf_521_mulw (
gf_521_t *__restrict__ out,
const gf_521_t *a,
uint64_t b
);
void
gf_521_sqr (
gf_521_t *__restrict__ out,
const gf_521_t *a
);
void
gf_521_serialize (
uint8_t *serial,
const struct gf_521_t *x
);
mask_t
gf_521_deserialize (
gf_521_t *x,
const uint8_t serial[66]
);
/* -------------- Inline functions begin here -------------- */
void
gf_521_add_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
void gf_add_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i;
for (i=0; i<9; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
}
gf_521_weak_reduce(out);
gf_weak_reduce(out);
}
void
gf_521_sub_RAW (
gf_521_t *out,
const gf_521_t *a,
const gf_521_t *b
) {
void gf_sub_RAW (gf *out, const gf *a, const gf *b) {
unsigned int i;
uint64_t co1 = ((1ull<<58)-1)*4, co2 = ((1ull<<57)-1)*4;
for (i=0; i<9; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==8) ? co2 : co1);
}
gf_521_weak_reduce(out);
}
void
gf_521_copy (
gf_521_t *out,
const gf_521_t *a
) {
memcpy(out,a,sizeof(*a));
gf_weak_reduce(out);
}