Commit d4085b96 authored by Michael Hamburg's avatar Michael Hamburg

Internal changes which break compatibility with previous versions

(you knew this would happen).

Added ARM NEON support.

Added support for precomputation on public keys, which speeds up
later signatures and ECDH calls.  See history.txt or the doc for
details.

Reworked internals so that private keys can be derived from any
32-byte secret random value.  This also means that secret keys
can be "compressed" for cold storage.

Added more tests.  Running the tests now requires GMP, though
Goldilocks itself does not.

Linking now uses visibility instead of exported.sym.
parent d7f64fd8
May 3, 2104:
Minor changes to internal routines mean that this version is not
compatible with the previous one.
Added ARM NEON code.
Added the ability to precompute multiples of a partner's public key. This
takes slightly longer than a signature verification, but reduces future
verifications with the precomputed key by ~63% and ECDH by ~70%.
goldilocks_precompute_public_key
goldilocks_destroy_precomputed_public_key
goldilocks_verify_precomputed
goldilocks_shared_secret_precomputed
The precomputation feature are is protected by a macro
GOLDI_IMPLEMENT_PRECOMPUTED_KEYS
which can be #defined to 0 to compile these functions out. Unlike most
of Goldilocks' functions, goldilocks_precompute_public_key uses malloc()
(and goldilocks_destroy_precomputed_public_key uses free()).
Changed private keys to be derived from just the symmetric part. This
means that you can compress them to 32 bytes for cold storage, or derive
keypairs from crypto secrets from other systems.
goldilocks_derive_private_key
goldilocks_underive_private_key
goldilocks_private_to_public
Fixed a number of bugs related to vector alignment on Sandy Bridge, which
has AVX but uses SSE2 alignment (because it doesn't have AVX2). Maybe I
should just switch it to use AVX2 alignment?
Beginning to factor out curve-specific magic, so as to build other curves
with the Goldilocks framework. That would enable fair tests against eg
E-521, Ed25519 etc. Still would be a lot of work.
More thorough testing of arithmetic. Now uses GMP for testing framework,
but not in the actual library.
Added some high-level tests for the whole library, including some (bs)
negative testing. Obviously, effective negative testing is a very difficult
proposition in a crypto library.
March 29, 2014:
Added a test directory with various tests. Currently testing SHA512 Monte
Carlo, compatibility of the different scalarmul functions, and some
......
# Copyright (c) 2014 Cryptography Research, Inc.
# Released under the MIT License. See LICENSE.txt for license information.
UNAME := $(shell uname)
MACHINE := $(shell uname -m)
ifeq ($(UNAME),Darwin)
CC = clang
LD = clang
else
CC = gcc
endif
LD = $(CC)
ifneq (,$(findstring x86_64,$(MACHINE)))
ARCH ?= arch_x86_64
else
# no i386 port yet
ARCH ?= arch_arm_32
endif
ARCH = arch_x86_64
WARNFLAGS = -pedantic -Wall -Wextra -Werror -Wunreachable-code \
-Wgcc-compat -Wmissing-declarations
-Wmissing-declarations -Wunused-function $(EXWARN)
INCFLAGS = -Isrc/include -Iinclude -Isrc/$(ARCH)
LANGFLAGS = -std=c99
GENFLAGS = -ffunction-sections -fdata-sections -fomit-frame-pointer -fPIC
GENFLAGS = -ffunction-sections -fdata-sections -fvisibility=hidden -fomit-frame-pointer -fPIC
OFLAGS = -O3
#XFLAGS = -DN_TESTS_BASE=1000
ARCHFLAGS = -mssse3 -maes -mavx2 -DMUST_HAVE_AVX2 -mbmi2
#ARCHFLAGS = -m32 -mcpu=cortex-a9 -mfpu=vfpv3-d16
CFLAGS = $(LANGFLAGS) $(WARNFLAGS) $(INCFLAGS) $(OFLAGS) $(ARCHFLAGS) $(GENFLAGS) $(XFLAGS)
LDFLAGS = $(ARCHFLAGS)
ifneq (,$(findstring arm,$(MACHINE)))
ifneq (,$(findstring neon,$(ARCH)))
ARCHFLAGS += -mfpu=neon
else
ARCHFLAGS += -mfpu=vfpv3-d16
endif
ARCHFLAGS += -mcpu=cortex-a9 # FIXME
GENFLAGS = -DN_TESTS_BASE=1000 # sooooo sloooooow
else
ARCHFLAGS += -mssse3 -maes -mavx -mavx2 -DMUST_HAVE_AVX2 -mbmi2 #TODO
endif
ifeq ($(CC),clang)
WARNFLAGS += -Wgcc-compat
endif
ifeq (,$(findstring 64,$(ARCH))$(findstring gcc,$(CC)))
# ARCHFLAGS += -m32
ARCHFLAGS += -DGOLDI_FORCE_32_BIT=1
endif
CFLAGS = $(LANGFLAGS) $(WARNFLAGS) $(INCFLAGS) $(OFLAGS) $(ARCHFLAGS) $(GENFLAGS) $(XCFLAGS)
LDFLAGS = $(ARCHFLAGS) $(XLDFLAGS)
ASFLAGS = $(ARCHFLAGS)
.PHONY: clean all test bench todo doc lib
......@@ -29,7 +63,7 @@ LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \
build/p448.o build/ec_point.o build/scalarmul.o build/sha512.o
TESTCOMPONENTS=build/test.o build/test_scalarmul.o build/test_sha512.o \
build/test_pointops.o
build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o
BENCHCOMPONENTS=build/bench.o
......@@ -45,15 +79,20 @@ build/bench: $(LIBCOMPONENTS) $(BENCHCOMPONENTS)
$(LD) $(LDFLAGS) -o $@ $^
build/test: $(LIBCOMPONENTS) $(TESTCOMPONENTS)
$(LD) $(LDFLAGS) -o $@ $^
$(LD) $(LDFLAGS) -o $@ $^ -lgmp
lib: build/goldilocks.so
build/goldilocks.so: $(LIBCOMPONENTS)
rm -f $@
ifeq ($(UNAME),Darwin)
libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \
-exported_symbols_list src/exported.sym \
$(LIBCOMPONENTS)
else
$(LD) -shared -Wl,-soname,goldilocks.so.1 -Wl,--gc-sections -o $@ $(LIBCOMPONENTS)
strip --discard-all $@
ln -sf $@ build/goldilocks.so.1
endif
build/timestamp:
mkdir -p build
......@@ -80,9 +119,9 @@ doc: Doxyfile doc/timestamp src/*.c src/include/*.h src/$(ARCH)/*.c src/$(ARCH)/
todo::
@(find * -name '*.h'; find * -name '*.c') | xargs egrep --color=auto -w \
'HACK|TODO|FIXME|BUG|XXX|PERF|FUTURE|REMOVE'
'HACK|TODO|FIXME|BUG|XXX|PERF|FUTURE|REMOVE|MAGIC'
@echo '============================='
@(for i in FIXME BUG XXX TODO HACK PERF FUTURE REMOVE; do \
@(for i in FIXME BUG XXX TODO HACK PERF FUTURE REMOVE MAGIC; do \
(find * -name '*.h'; find * -name '*.c') | xargs egrep -w $$i > /dev/null || continue; \
/bin/echo -n $$i' ' | head -c 10; \
(find * -name '*.h'; find * -name '*.c') | xargs egrep -w $$i| wc -l; \
......@@ -90,7 +129,7 @@ todo::
@echo '============================='
@echo -n 'Total '
@(find * -name '*.h'; find * -name '*.c') | xargs egrep -w \
'HACK|TODO|FIXME|BUG|XXX|PERF|FUTURE|REMOVE' | wc -l
'HACK|TODO|FIXME|BUG|XXX|PERF|FUTURE|REMOVE|MAGIC' | wc -l
bench: build/bench
./$<
......
......@@ -12,13 +12,42 @@
#include <stdint.h>
#ifndef GOLDI_IMPLEMENT_PRECOMPUTED_KEYS
/** If nonzero, implement precomputation for verify and ECDH. */
#define GOLDI_IMPLEMENT_PRECOMPUTED_KEYS 1
#endif
/** The size of the Goldilocks field, in bits. */
#define GOLDI_FIELD_BITS 448
/** The size of the Goldilocks scalars, in bits. */
#define GOLDI_SCALAR_BITS 446
/** The same size, in bytes. */
#define GOLDI_FIELD_BYTES (GOLDI_FIELD_BITS/8)
/** The size of a Goldilocks public key, in bytes. */
#define GOLDI_PUBLIC_KEY_BYTES GOLDI_FIELD_BYTES
/** The extra bytes in a Goldilocks private key for the symmetric key. */
#define GOLDI_SYMKEY_BYTES 32
/** The size of a shared secret. */
#define GOLDI_SHARED_SECRET_BYTES 64
/** The size of a Goldilocks private key, in bytes. */
#define GOLDI_PRIVATE_KEY_BYTES (2*GOLDI_FIELD_BYTES + GOLDI_SYMKEY_BYTES)
/** The size of a Goldilocks private key, in bytes. */
#define GOLDI_SIGNATURE_BYTES (2*GOLDI_FIELD_BYTES)
/**
* @brief Serialized form of a Goldilocks public key.
*
* @warning This isn't even my final form!
*/
struct goldilocks_public_key_t {
uint8_t opaque[56]; /**< Serialized data. */
uint8_t opaque[GOLDI_PUBLIC_KEY_BYTES]; /**< Serialized data. */
};
/**
......@@ -30,7 +59,7 @@ struct goldilocks_public_key_t {
* @warning This isn't even my final form!
*/
struct goldilocks_private_key_t {
uint8_t opaque[144]; /**< Serialized data. */
uint8_t opaque[GOLDI_PRIVATE_KEY_BYTES]; /**< Serialized data. */
};
#ifdef __cplusplus
......@@ -72,7 +101,7 @@ static const int GOLDI_EALREADYINIT = 44805;
*/
int
goldilocks_init ()
__attribute__((warn_unused_result));
__attribute__((warn_unused_result,visibility ("default")));
/**
......@@ -90,7 +119,40 @@ int
goldilocks_keygen (
struct goldilocks_private_key_t *privkey,
struct goldilocks_public_key_t *pubkey
) __attribute__((warn_unused_result,nonnull(1,2)));
) __attribute__((warn_unused_result,nonnull(1,2),visibility ("default")));
/**
* @brief Derive a key from its compressed form.
* @param [out] privkey The derived private key.
* @param [in] proto The compressed or proto-key, which must be 32 random bytes.
*
* @warning This isn't even my final form!
*
* @retval GOLDI_EOK Success.
* @retval GOLDI_EUNINIT You must call goldilocks_init() first.
*/
int
goldilocks_derive_private_key (
struct goldilocks_private_key_t *privkey,
const unsigned char proto[GOLDI_SYMKEY_BYTES]
) __attribute__((nonnull(1,2),visibility ("default")));
/**
* @brief Compress a private key (by copying out the proto-key)
* @param [out] proto The proto-key.
* @param [in] privkey The private key.
*
* @warning This isn't even my final form!
* @todo test.
*
* @retval GOLDI_EOK Success.
* @retval GOLDI_EUNINIT You must call goldilocks_init() first.
*/
void
goldilocks_underive_private_key (
unsigned char proto[GOLDI_SYMKEY_BYTES],
const struct goldilocks_private_key_t *privkey
) __attribute__((nonnull(1,2),visibility ("default")));
/**
* @brief Extract the public key from a private key.
......@@ -107,7 +169,7 @@ int
goldilocks_private_to_public (
struct goldilocks_public_key_t *pubkey,
const struct goldilocks_private_key_t *privkey
) __attribute__((nonnull(1,2)));
) __attribute__((nonnull(1,2),visibility ("default")));
/**
* @brief Generate a Diffie-Hellman shared secret in constant time.
......@@ -140,10 +202,10 @@ goldilocks_private_to_public (
*/
int
goldilocks_shared_secret (
uint8_t shared[64],
uint8_t shared[GOLDI_SHARED_SECRET_BYTES],
const struct goldilocks_private_key_t *my_privkey,
const struct goldilocks_public_key_t *your_pubkey
) __attribute__((warn_unused_result,nonnull(1,2,3)));
) __attribute__((warn_unused_result,nonnull(1,2,3),visibility ("default")));
/**
* @brief Sign a message.
......@@ -166,11 +228,11 @@ goldilocks_shared_secret (
*/
int
goldilocks_sign (
uint8_t signature_out[56*2],
uint8_t signature_out[GOLDI_SIGNATURE_BYTES],
const uint8_t *message,
uint64_t message_len,
const struct goldilocks_private_key_t *privkey
) __attribute__((nonnull(1,2,4)));
) __attribute__((nonnull(1,2,4),visibility ("default")));
/**
* @brief Verify a signature.
......@@ -197,11 +259,108 @@ goldilocks_sign (
*/
int
goldilocks_verify (
const uint8_t signature[56*2],
const uint8_t signature[GOLDI_SIGNATURE_BYTES],
const uint8_t *message,
uint64_t message_len,
const struct goldilocks_public_key_t *pubkey
) __attribute__((warn_unused_result,nonnull(1,2,4)));
) __attribute__((warn_unused_result,nonnull(1,2,4),visibility ("default")));
#if GOLDI_IMPLEMENT_PRECOMPUTED_KEYS
/** A public key which has been expanded by precomputation for higher speed. */
struct goldilocks_precomputed_public_key_t;
/**
* @brief Expand a public key by precomputation.
*
* @todo Give actual error returns, instead of ambiguous NULL.
*
* @warning This isn't even my final form!
*
* @param [in] pub The public key.
* @retval NULL We ran out of memory, or the
*/
struct goldilocks_precomputed_public_key_t *
goldilocks_precompute_public_key (
const struct goldilocks_public_key_t *pub
) __attribute__((warn_unused_result,nonnull(1),visibility ("default")));
/**
* @brief Overwrite an expanded public key with zeros, then destroy it.
*
* If the input is NULL, this function does nothing.
*
* @param [in] precom The public key.
*/
void
goldilocks_destroy_precomputed_public_key (
struct goldilocks_precomputed_public_key_t *precom
) __attribute__((visibility ("default")));
/**
* @brief Verify a signature.
*
* This function is fairly strict. It will correctly detect when
* the signature has the wrong cofactor component, or when the sig
* values aren't less than p or q.
*
* @warning This isn't even my final form!
*
* @param [in] signature The signature.
* @param [in] message The message to be verified.
* @param [in] message_len The length of the message to be verified.
* @param [in] pubkey The signer's public key, expanded by precomputation.
*
* @retval GOLDI_EOK Success.
* @retval GOLDI_EINVAL The public key or signature is corrupt.
* @retval GOLDI_EUNINIT You must call goldilocks_init() first.
*/
int
goldilocks_verify_precomputed (
const uint8_t signature[GOLDI_SIGNATURE_BYTES],
const uint8_t *message,
uint64_t message_len,
const struct goldilocks_precomputed_public_key_t *pubkey
) __attribute__((warn_unused_result,nonnull(1,2,4),visibility ("default")));
/**
* @brief Generate a Diffie-Hellman shared secret in constant time.
* Uses a precomputation on the other party's public key for efficiency.
*
* This function uses some compile-time flags whose merit remains to
* be decided.
*
* If the flag EXPERIMENT_ECDH_OBLITERATE_CT is set, prepend 40 bytes
* of zeros to the secret before hashing. In the case that the other
* party's key is detectably corrupt, instead the symmetric part
* of the secret key is used to produce a pseudorandom value.
*
* If EXPERIMENT_ECDH_STIR_IN_PUBKEYS is set, the sum and product of
* the two parties' public keys is prepended to the hash.
*
* In the current version, this function can safely be run even without
* goldilocks_init(). But this property is not guaranteed for future
* versions, so call it anyway.
*
* @warning This isn't even my final form!
*
* @param [out] shared The shared secret established with the other party.
* @param [in] my_privkey My private key.
* @param [in] your_pubkey The other party's precomputed public key.
*
* @retval GOLDI_EOK Success.
* @retval GOLDI_ECORRUPT My key is corrupt.
* @retval GOLDI_EINVAL The other party's key is corrupt.
* @retval GOLDI_EUNINIT You must call goldilocks_init() first.
*/
int
goldilocks_shared_secret_precomputed (
uint8_t shared[GOLDI_SHARED_SECRET_BYTES],
const struct goldilocks_private_key_t *my_privkey,
const struct goldilocks_precomputed_public_key_t *your_pubkey
) __attribute__((warn_unused_result,nonnull(1,2,3),visibility ("default")));
#endif /* GOLDI_IMPLEMENT_PRECOMPUTED_KEYS */
#ifdef __cplusplus
}; /* extern "C" */
......
......@@ -28,6 +28,8 @@ smlal (
const uint32_t a,
const uint32_t b
) {
#ifdef __ARMEL__
uint32_t lo = *acc, hi = (*acc)>>32;
__asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
......@@ -35,6 +37,9 @@ smlal (
: [a]"r"(a), [b]"r"(b));
*acc = lo + (((uint64_t)hi)<<32);
#else
*acc += (int64_t)(int32_t)a * (int64_t)(int32_t)b;
#endif
}
static inline void __attribute__((gnu_inline,always_inline))
......@@ -43,6 +48,7 @@ smlal2 (
const uint32_t a,
const uint32_t b
) {
#ifdef __ARMEL__
uint32_t lo = *acc, hi = (*acc)>>32;
__asm__ __volatile__ ("smlal %[lo], %[hi], %[a], %[b]"
......@@ -50,6 +56,9 @@ smlal2 (
: [a]"r"(a), [b]"r"(2*b));
*acc = lo + (((uint64_t)hi)<<32);
#else
*acc += (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
#endif
}
static inline void __attribute__((gnu_inline,always_inline))
......@@ -58,6 +67,7 @@ smull (
const uint32_t a,
const uint32_t b
) {
#ifdef __ARMEL__
uint32_t lo, hi;
__asm__ __volatile__ ("smull %[lo], %[hi], %[a], %[b]"
......@@ -65,6 +75,9 @@ smull (
: [a]"r"(a), [b]"r"(b));
*acc = lo + (((uint64_t)hi)<<32);
#else
*acc = (int64_t)(int32_t)a * (int64_t)(int32_t)b;
#endif
}
static inline void __attribute__((gnu_inline,always_inline))
......@@ -73,6 +86,7 @@ smull2 (
const uint32_t a,
const uint32_t b
) {
#ifdef __ARMEL__
uint32_t lo, hi;
__asm__ /*__volatile__*/ ("smull %[lo], %[hi], %[a], %[b]"
......@@ -80,6 +94,9 @@ smull2 (
: [a]"r"(a), [b]"r"(2*b));
*acc = lo + (((uint64_t)hi)<<32);
#else
*acc = (int64_t)(int32_t)a * (int64_t)(int32_t)(b * 2);
#endif
}
void
......@@ -760,13 +777,13 @@ p448_mulw (
const p448_t *as,
uint64_t b
) {
const uint32_t bhi = b>>28, blo = b & (1<<28)-1;
uint32_t mask = (1ull<<28)-1;
const uint32_t bhi = b>>28, blo = b & mask;
const uint32_t *a = as->limb;
uint32_t *c = cs->limb;
uint64_t accum0, accum8;
uint32_t mask = (1ull<<28)-1;
int i;
......@@ -957,7 +974,7 @@ p448_deserialize (
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[2*i] = out & (1ull<<28)-1;
x->limb[2*i] = out & ((1ull<<28)-1);
x->limb[2*i+1] = out >> 28;
}
......
......@@ -173,7 +173,7 @@ p448_set_ui (
uint64_t x
) {
int i;
out->limb[0] = x & (1<<28)-1;
out->limb[0] = x & ((1<<28)-1);
out->limb[1] = x>>28;
for (i=2; i<16; i++) {
out->limb[i] = 0;
......@@ -188,7 +188,11 @@ p448_cond_swap (
) {
big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b;
#if __ARM_NEON__
big_register_t m = vdupq_n_u32(doswap);
#else
big_register_t m = doswap;
#endif
unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
......@@ -260,8 +264,12 @@ p448_cond_neg(
struct p448_t negated;
big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated;
#if __ARM_NEON__
big_register_t m = vdupq_n_u32(doNegate);
#else
big_register_t m = doNegate;
#endif
p448_neg(&negated, a);
p448_bias(&negated, 2);
......
This diff is collapsed.
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
/**
* @file "neon_emulation.h"
* @brief NEON intrinsic emulation using clang's vector extensions.
*
* This lets you test and debug NEON code on x86.
*/
#ifndef __NEON_EMULATION_H__
#define __NEON_EMULATION_H__ 1
#include "word.h"
#include <stdint.h>
#include <assert.h>
static __inline__ int64x2_t vaddw_s32 (int64x2_t a, int32x2_t b) {
a.x += b.x;
a.y += b.y;
return a;
}
static __inline__ int64x2_t __attribute__((gnu_inline,always_inline))
xx_vaddup_s64(int64x2_t x) {
x.y += x.x;
return x;
}
typedef struct { int32x2_t val[2]; } int32x2x2_t;
static inline int32x2x2_t vtrn_s32 (int32x2_t x, int32x2_t y) {
int32x2x2_t out = {{{ x.x, y.x }, {x.y, y.y}}};
return out;
}
static __inline__ void __attribute__((gnu_inline,always_inline))
xx_vtrnq_s64 (
int64x2_t *x,
int64x2_t *y
) {
int64_t tmp = (*x).y;
(*x).y = (*y).x;
(*y).x = tmp;
}
int64x2_t vsraq_n_s64 (
int64x2_t a,
int64x2_t v,
const int x
) {
return a + (v >> x);
}
int64x2_t vshrq_n_s64 (
int64x2_t v,
const int x
) {
return v >> x;
}
static inline int64_t vgetq_lane_s64 (
int64x2_t acc,
const int lane
) {
return lane ? acc.y : acc.x;
}
static inline int32_t vget_lane_s32 (
int32x2_t acc,
const int lane
) {
return lane ? acc.y : acc.x;
}
static inline int64x2_t vmlal_lane_s32 (
int64x2_t acc,
int32x2_t x,
int32x2_t y,
int lane
) {
int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y };
return acc + xx*(lane?yy.yy:yy.xx);
}
static inline int64x2_t vmlsl_lane_s32 (
int64x2_t acc,
int32x2_t x,
int32x2_t y,
int lane
) {
int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y };
return acc - xx*(lane?yy.yy:yy.xx);
}
static inline int64x2_t vqdmlsl_lane_s32 (
int64x2_t acc,
int32x2_t x,
int32x2_t y,
int lane
) {
int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y };
int64x2_t tmp = xx*(lane?yy.yy:yy.xx);
assert(tmp.x >> 63 == tmp.x>>62);
assert(tmp.y >> 63 == tmp.y>>62);
return acc - 2*tmp;
}
static inline int64x2_t vqdmlal_lane_s32 (
int64x2_t acc,
int32x2_t x,
int32x2_t y,
int lane
) {
int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y };
int64x2_t tmp = xx*(lane?yy.yy:yy.xx);
assert(tmp.x >> 63 == tmp.x>>62);
assert(tmp.y >> 63 == tmp.y>>62);
return acc + 2*tmp;
}
static inline int64x2_t vqdmull_lane_s32 (
int32x2_t x,
int32x2_t y,
int lane
) {
int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y };
int64x2_t tmp = xx*(lane?yy.yy:yy.xx);
assert(tmp.x >> 63 == tmp.x>>62);
assert(tmp.y >> 63 == tmp.y>>62);
return 2*tmp;
}
static inline int32x2_t vmovn_s64(
int64x2_t x
) {
int32x2_t y = {x.x,x.y};
return y;
}
static inline int64x2_t vmull_lane_s32 (
int32x2_t x,
int32x2_t y,
int lane
) {
int64x2_t xx = { x.x, x.y }, yy = { y.x, y.y };
return xx*(lane?yy.yy:yy.xx);
}
#endif /* __NEON_EMULATION_H__ */
This diff is collapsed.
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#ifndef __P448_H__
#define __P448_H__ 1
#include "word.h"
#include <stdint.h>
#include <assert.h>
typedef struct p448_t {
uint32_t limb[16];
} __attribute__((aligned(32))) p448_t;
#ifdef __cplusplus
extern "C" {
#endif
static __inline__ void
p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused,always_inline));
static __inline__ void
p448_add (
p448_t *out,
const p448_t *a,
const p448_t *b
) __attribute__((unused,always_inline));
static __inline__ void
p448_sub (
p448_t *out,
const p448_t *a,
const p448_t *b
) __attribute__((unused,always_inline));
static __inline__ void
p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused,always_inline));