Commit 25697caf authored by Michael Hamburg's avatar Michael Hamburg

Import the code

parent acd09505
# Copyright (c) 2014 Cryptography Research, Inc.
# Released under the MIT License. See LICENSE.txt for license information.
CC = clang
CFLAGS = -O3 -std=c99 -pedantic -Wall -Wextra -Werror \
-mavx2 -DMUST_HAVE_SSSE3 -mbmi2 \
-ffunction-sections -fdata-sections -fomit-frame-pointer -fPIC
.PHONY: clean all runbench
.PRECIOUS: build/%.s
HEADERS= Makefile $(shell find . -name "*.h") build/timestamp
LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \
build/p448.o build/ec_point.o build/scalarmul.o
all: bench
bench: *.h *.c
$(CC) $(CFLAGS) -o $@ *.c
build/timestamp:
mkdir -p build
touch $@
build/%.o: build/%.s
$(CC) -c -o $@ $<
build/%.s: %.c $(HEADERS)
$(CC) $(CFLAGS) -S -c -o $@ $<
build/goldilocks.so: $(LIBCOMPONENTS)
rm -f $@
libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \
-exported_symbols_list exported.sym \
$(LIBCOMPONENTS)
runbench: bench
./$<
clean:
rm -fr build bench *.o *.s
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#include "barrett_field.h"
#include <assert.h>
word_t
add_nr_ext_packed(
word_t *out,
const word_t *a,
int nwords_a,
const word_t *c,
int nwords_c,
word_t mask
) {
int i;
dword_t carry = 0;
for (i=0; i<nwords_c; i++) {
out[i] = carry = carry + a[i] + (c[i]&mask);
carry >>= WORD_BITS;
}
for (; i<nwords_a; i++) {
out[i] = carry = carry + a[i];
carry >>= WORD_BITS;
}
return carry;
}
static __inline__ word_t
add_nr_packed(
word_t *a,
const word_t *c,
int nwords
) {
int i;
dword_t carry = 0;
for (i=0; i<nwords; i++) {
a[i] = carry = carry + a[i] + c[i];
carry >>= WORD_BITS;
}
return carry;
}
static __inline__ word_t
sub_nr_packed(
word_t *a,
const word_t *c,
int nwords
) {
int i;
dsword_t carry = 0;
for (i=0; i<nwords; i++) {
a[i] = carry = carry + a[i] - c[i];
carry >>= WORD_BITS;
}
return carry;
}
word_t
sub_nr_ext_packed(
word_t *out,
const word_t *a,
int nwords_a,
const word_t *c,
int nwords_c,
word_t mask
) {
int i;
dsword_t carry = 0;
for (i=0; i<nwords_c; i++) {
out[i] = carry = carry + a[i] - (c[i]&mask);
carry >>= WORD_BITS;
}
for (; i<nwords_a; i++) {
out[i] = carry = carry + a[i];
carry >>= WORD_BITS;
}
return carry;
}
static word_t
widemac(
word_t *accum,
int nwords_accum,
const word_t *mier,
int nwords_mier,
word_t mand,
word_t carry
) {
int i;
assert(nwords_accum >= nwords_mier);
for (i=0; i<nwords_mier; i++) {
/* UMAAL chain for the wordy part of p */
dword_t product = ((dword_t)mand) * mier[i];
product += accum[i];
product += carry;
accum[i] = product;
carry = product >> WORD_BITS;
}
for (; i<nwords_accum; i++) {
dword_t sum = ((dword_t)carry) + accum[i];
accum[i] = sum;
carry = sum >> WORD_BITS;
}
return carry;
}
void
barrett_reduce(
word_t *a,
int nwords_a,
word_t a_carry,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift
) {
/* TODO: non 2^k-c primes. */
int repeat, nwords_left_in_a=nwords_a;
/* TODO: is there a point to this a_carry business? */
assert(a_carry < ((word_t)1)<<p_shift && nwords_a >= nwords_p);
for (; nwords_left_in_a >= nwords_p; nwords_left_in_a--) {
for (repeat=0; repeat<2; repeat++) {
/* PERF: surely a more careful implementation could
* avoid this double round
*/
word_t mand = a[nwords_left_in_a-1] >> p_shift;
a[nwords_left_in_a-1] &= (((word_t)1)<<p_shift)-1;
if (p_shift && !repeat) {
/* collect high bits when there are any */
if (nwords_left_in_a < nwords_a) {
mand |= a[nwords_left_in_a] << (WORD_BITS-p_shift);
a[nwords_left_in_a] = 0;
} else {
mand |= a_carry << (WORD_BITS-p_shift);
}
}
word_t carry = widemac(a+nwords_left_in_a-nwords_p, nwords_p, p_lo, nwords_lo, mand, 0);
assert(!carry);
(void)carry;
}
}
assert(nwords_left_in_a == nwords_p-1);
/* OK, but it still isn't reduced. Add and subtract p_lo. */
word_t cout = add_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,-1);
if (p_shift) {
cout = (cout<<(WORD_BITS-p_shift)) + (a[nwords_p-1]>>p_shift);
a[nwords_p-1] &= (((word_t)1)<<p_shift)-1;
}
/* mask = carry-1: if no carry then do sub, otherwise don't */
sub_nr_ext_packed(a,a,nwords_p,p_lo,nwords_lo,cout-1);
}
/* PERF: This function is horribly slow. Enough to break 1%. */
void
barrett_mul_or_mac(
word_t *accum,
int nwords_accum,
const word_t *a,
int nwords_a,
const word_t *b,
int nwords_b,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift,
mask_t doMac
) {
assert(nwords_accum >= nwords_p);
/* nwords_tmp = max(nwords_a + 1, nwords_p + 1, nwords_accum if doMac); */
int nwords_tmp = (nwords_a > nwords_p) ? nwords_a : nwords_p;
nwords_tmp++;
if (nwords_tmp < nwords_accum && doMac)
nwords_tmp = nwords_accum;
word_t tmp[nwords_tmp];
int bpos, i;
for (i=0; i<nwords_tmp; i++) {
tmp[i] = 0;
}
if (doMac) {
for (i=0; i<nwords_accum; i++) {
tmp[i] = accum[i];
}
barrett_reduce(tmp, nwords_tmp, 0, p_lo, nwords_p, nwords_lo, p_shift);
}
for (bpos=nwords_b-1; bpos >= 0; bpos--) {
/* Invariant at the beginning of the loop: the high word is unused. */
assert(tmp[nwords_tmp-1] == 0);
/* shift up */
for (i=nwords_tmp-2; i>=0; i--) {
tmp[i+1] = tmp[i];
}
/* mac and reduce */
word_t carry = widemac(tmp, nwords_tmp, a, nwords_a, b[bpos], 0);
/* the mac can't carry, because nwords_tmp >= nwords_a+1 and its high word is clear */
assert(!carry);
barrett_reduce(tmp, nwords_tmp, carry, p_lo, nwords_p, nwords_lo, p_shift);
/* at this point, the number of words used is nwords_p <= nwords_tmp-1,
* so the high word is again clear */
}
for (i=0; i<nwords_tmp && i<nwords_accum; i++) {
accum[i] = tmp[i];
}
for (; i<nwords_tmp; i++) {
assert(tmp[i] == 0);
}
for (; i<nwords_accum; i++) {
accum[i] = 0;
}
}
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#ifndef __BARRETT_FIELD_H__
#define __BARRETT_FIELD_H__ 1
#include "word.h"
#ifdef __cplusplus
extern "C" {
#endif
void
barrett_reduce(
word_t *a,
int nwords_a,
word_t a_carry,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift
);
/*
* out = a+(c&mask), with carry returned.
* #out must equal #a (HACK?)
*/
word_t
add_nr_ext_packed(
word_t *out,
const word_t *a,
int nwords_a,
const word_t *c,
int nwords_c,
word_t mask
);
word_t
sub_nr_ext_packed(
word_t *out,
const word_t *a,
int nwords_a,
const word_t *c,
int nwords_c,
word_t mask
);
/*
* If doMac, accum = accum + a*b mod p.
* Otherwise, accum = a*b mod p.
*
* This function is not __restrict__; you may pass accum,
* a, b, etc all from the same location.
*/
void
barrett_mul_or_mac(
word_t *accum,
int nwords_accum,
const word_t *a,
int nwords_a,
const word_t *b,
int nwords_b,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift,
mask_t doMac
);
static inline void
barrett_mul(
word_t *out,
int nwords_out,
const word_t *a,
int nwords_a,
const word_t *b,
int nwords_b,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift
) {
barrett_mul_or_mac(out,nwords_out,a,nwords_a,b,nwords_b,p_lo,nwords_p,nwords_lo,p_shift,0);
}
static inline void
barrett_mac(
word_t *out,
int nwords_out,
const word_t *a,
int nwords_a,
const word_t *b,
int nwords_b,
const word_t *p_lo,
int nwords_p,
int nwords_lo,
int p_shift
) {
barrett_mul_or_mac(out,nwords_out,a,nwords_a,b,nwords_b,p_lo,nwords_p,nwords_lo,p_shift,-1);
}
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* __BARRETT_FIELD_H__ */
This diff is collapsed.
This diff is collapsed.
/* Copyright (c) 2011 Stanford University.
* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
/* A miniature version of the (as of yet incomplete) crandom project. */
#ifndef __GOLDI_CRANDOM_H__
#define __GOLDI_CRANDOM_H__ 1
#include <stdint.h> /* for uint64_t */
#include <fcntl.h> /* for open */
#include <errno.h> /* for returning errors after open */
#include <stdlib.h> /* for abort */
#include <string.h> /* for memcpy */
#include <strings.h> /* for bzero */
#include <unistd.h> /* for read */
struct crandom_state_t {
unsigned char seed[32];
unsigned char buffer[96];
uint64_t ctr;
uint64_t magic;
unsigned int fill;
int reseed_countdown;
int reseed_interval;
int reseeds_mandatory;
int randomfd;
} __attribute__((aligned(16))) ;
#ifdef __cplusplus
extern "C" {
#endif
int
crandom_init_from_file(
struct crandom_state_t *state,
const char *filename,
int reseed_interval,
int reseeds_mandatory
) __attribute__((warn_unused_result));
void
crandom_init_from_buffer(
struct crandom_state_t *state,
const char initial_seed[32]
);
/* TODO : attribute warn for not checking return type? */
int
crandom_generate(
struct crandom_state_t *state,
unsigned char *output,
unsigned long long length
);
void
crandom_destroy(
struct crandom_state_t *state
);
#ifdef __cplusplus
}; /* extern "C" */
#endif
#endif /* __GOLDI_CRANDOM_H__ */
This diff is collapsed.
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
/* This file was generated with the assistance of a tool written in SAGE. */
#ifndef __CC_INCLUDED_P448_EDWARDS_H__
#define __CC_INCLUDED_P448_EDWARDS_H__
#include "p448.h"
#ifdef __cplusplus
extern "C" {
#endif
/*
* Affine point on an Edwards curve.
*/
struct affine_t {
struct p448_t x, y;
};
/*
* Affine point on a twisted Edwards curve.
*/
struct tw_affine_t {
struct p448_t x, y;
};
/*
* Montgomery buffer.
*/
struct montgomery_t {
struct p448_t z0, xd, zd, xa, za;
};
/*
* Extensible coordinates for Edwards curves, suitable for
* accumulators.
*
* Represents the point (x/z, y/z). The extra coordinates
* t,u satisfy xy = tuz, allowing for conversion to Extended
* form by multiplying t and u.
*
* The idea is that you don't have to do this multiplication
* when doubling the accumulator, because the t-coordinate
* isn't used there. At the same time, as long as you only
* have one point in extensible form, additions don't cost
* extra.
*
* This is essentially a lazier version of Hisil et al's
* lookahead trick. It might be worth considering that trick
* instead.
*/
struct extensible_t {
struct p448_t x, y, z, t, u;
};
/*
* Extensible coordinates for twisted Edwards curves,
* suitable for accumulators.
*/
struct tw_extensible_t {
struct p448_t x, y, z, t, u;
};
/*
* Niels coordinates for twisted Edwards curves. Good for
* mixed readdition; suitable for fixed tables.
*/
struct tw_niels_t {
struct p448_t a, b, c;
};
/*
* Projective niels coordinates for twisted Edwards curves.
* Good for readdition; suitable for temporary tables.
*/
struct tw_pniels_t {
struct tw_niels_t n;
struct p448_t z;
};
/*
* Auto-generated copy method.
*/
static __inline__ void
copy_affine (
struct affine_t* a,
const struct affine_t* ds
) __attribute__((unused,always_inline));
/*
* Auto-generated copy method.
*/
static __inline__ void
copy_tw_affine (
struct tw_affine_t* a,
const struct tw_affine_t* ds
) __attribute__((unused,always_inline));
/*
* Auto-generated copy method.
*/
static __inline__ void
copy_montgomery (
struct montgomery_t* a,
const struct montgomery_t* ds
) __attribute__((unused,always_inline));
/*
* Auto-generated copy method.
*/
static __inline__ void
copy_extensible (
struct extensible_t* a,
const struct extensible_t* ds
) __attribute__((unused,always_inline));
/*
* Auto-generated copy method.
*/
static __inline__ void
copy_tw_extensible (
struct tw_extensible_t* a,
const struct tw_extensible_t* ds
) __attribute__((unused,always_inline));
/*
* Auto-generated copy method.
*/
static __inline__ void
copy_tw_niels (
struct tw_niels_t* a,
const struct tw_niels_t* ds
) __attribute__((unused,always_inline));
/*
* Auto-generated copy method.
*/
static __inline__ void
copy_tw_pniels (
struct tw_pniels_t* a,
const struct tw_pniels_t* ds
) __attribute__((unused,always_inline));
/*
* Returns 1/sqrt(+- x).
*
* The Legendre symbol of the result is the same as that of the
* input.
*
* If x=0, returns 0.
*/
void
p448_isr (
struct p448_t* a,
const struct p448_t* x
);
/*
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);
/*
* Add two points on a twisted Edwards curve, one in Extensible form
* and the other in half-Niels form.
*/
void
p448_tw_extensible_add_niels (
struct tw_extensible_t* d,
const struct tw_niels_t* e
);
/*
* Add two points on a twisted Edwards curve, one in Extensible form
* and the other in projective Niels form.
*/
void
p448_tw_extensible_add_pniels (
struct tw_extensible_t* e,
const struct tw_pniels_t* a
);
/*
* Double a point on a twisted Edwards curve, in "extensible" coordinates.
*/
void
p448_tw_extensible_double (
struct tw_extensible_t* a
);
/*
* Double a point on an Edwards curve, in "extensible" coordinates.
*/
void
p448_extensible_double (
struct extensible_t* a
);
/*
* 4-isogeny from untwisted to twisted.
*/
void
p448_isogeny_un_to_tw (
struct tw_extensible_t* b,
const struct extensible_t* a
);
/*
* Dual 4-isogeny from twisted to untwisted.
*/
void
p448_isogeny_tw_to_un (
struct extensible_t* b,
const struct tw_extensible_t* a
);
void
convert_tw_affine_to_tw_pniels (
struct tw_pniels_t* b,
const struct tw_affine_t* a
);
void
convert_tw_affine_to_tw_extensible (
struct tw_extensible_t* b,
const struct tw_affine_t* a
);
void
convert_affine_to_extensible (
struct extensible_t* b,
const struct affine_t* a
);
void
convert_tw_extensible_to_tw_pniels (
struct tw_pniels_t* b,
const struct tw_extensible_t* a
);
void
convert_tw_pniels_to_tw_extensible (
struct tw_extensible_t* e,
const struct tw_pniels_t* d
);
void
convert_tw_niels_to_tw_extensible (
struct tw_extensible_t* e,
const struct tw_niels_t* d
);
void
p448_montgomery_step (
struct montgomery_t* a
);
void
p448_montgomery_serialize (
struct p448_t* sign,
struct p448_t* ser,
const struct montgomery_t* a,
const struct p448_t* sbz
);
/*
* Serialize a point on an Edwards curve
* The serialized form would be sqrt((z-y)/(z+y)) with sign of xz
* It would be on 4y^2/(1-d) = x^3 + 2(1+d)/(1-d) * x^2 + x.
* But 4/(1-d) isn't square, so we need to twist it:
* -x is on 4y^2/(d-1) = x^3 + 2(d+1)/(d-1) * x^2 + x
*/
void
extensible_serialize (
struct p448_t* b,
const struct extensible_t* a
);
/*
*
*/
void
isogeny_and_serialize (
struct p448_t* b,
const struct tw_extensible_t* a
);
/*