Commit 8ebdfaee authored by Michael Hamburg's avatar Michael Hamburg

gcc-clean, though the code is slow when compiled by gcc

parent d4085b96
......@@ -11,6 +11,7 @@ else
CC = gcc
endif
LD = $(CC)
ASM ?= $(CC)
ifneq (,$(findstring x86_64,$(MACHINE)))
ARCH ?= arch_x86_64
......@@ -99,7 +100,7 @@ build/timestamp:
touch $@
build/%.o: build/%.s
$(CC) $(ASFLAGS) -c -o $@ $<
$(ASM) $(ASFLAGS) -c -o $@ $<
build/%.s: src/%.c $(HEADERS)
$(CC) $(CFLAGS) -S -c -o $@ $<
......
......@@ -187,7 +187,7 @@ p448_cond_swap (
) {
big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b;
big_register_t m = doswap;
big_register_t m = br_set_to_mask(doswap);
unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
......@@ -259,7 +259,7 @@ p448_cond_neg(
struct p448_t negated;
big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated;
big_register_t m = doNegate;
big_register_t m = br_set_to_mask(doNegate);
p448_neg(&negated, a);
p448_bias(&negated, 2);
......
......@@ -71,18 +71,18 @@ static const mask_t MASK_FAILURE = 0, MASK_SUCCESS = -1;
typedef uint32x4_t vecmask_t;
#else
/* FIXME this only works on clang */
typedef uint64_t uint64x2_t __attribute__((ext_vector_type(2)));
typedef int64_t int64x2_t __attribute__((ext_vector_type(2)));
typedef uint64_t uint64x4_t __attribute__((ext_vector_type(4)));
typedef int64_t int64x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x4_t __attribute__((ext_vector_type(4)));
typedef int32_t int32x4_t __attribute__((ext_vector_type(4)));
typedef uint32_t uint32x2_t __attribute__((ext_vector_type(2)));
typedef int32_t int32x2_t __attribute__((ext_vector_type(2)));
typedef uint32_t uint32x8_t __attribute__((ext_vector_type(8)));
typedef int32_t int32x8_t __attribute__((ext_vector_type(8)));
typedef uint64_t uint64x2_t __attribute__((vector_size(16)));
typedef int64_t int64x2_t __attribute__((vector_size(16)));
typedef uint64_t uint64x4_t __attribute__((vector_size(32)));
typedef int64_t int64x4_t __attribute__((vector_size(32)));
typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
typedef int32_t int32x2_t __attribute__((vector_size(8)));
typedef uint32_t uint32x4_t __attribute__((vector_size(16)));
typedef int32_t int32x4_t __attribute__((vector_size(16)));
typedef uint32_t uint32x8_t __attribute__((vector_size(32)));
typedef int32_t int32x8_t __attribute__((vector_size(32)));
/* TODO: vector width for procs like ARM; gcc support */
typedef word_t vecmask_t __attribute__((ext_vector_type(4)));
typedef word_t vecmask_t __attribute__((vector_size(32)));
#endif
#if __AVX2__
......@@ -111,14 +111,15 @@ br_set_to_mask(mask_t x) {
#else
static __inline__ big_register_t
br_set_to_mask(mask_t x) {
return (big_register_t)x;
big_register_t out = {x,x,x,x,x,x,x,x};
return out;
}
#endif
#if __AVX2__ || __SSE2__
static __inline__ big_register_t
br_is_zero(big_register_t x) {
return (big_register_t)(x == (big_register_t)0);
return (big_register_t)(x == br_set_to_mask(0));
}
#elif __ARM_NEON__
static __inline__ big_register_t
......
......@@ -163,9 +163,9 @@ sha512_final (
sha512_process_block(ctx);
fill = 0;
}
memset(ctx->block + fill, 0, 112-fill);
*((uint64_t *)&ctx->block[112]) = 0;
*((uint64_t *)&ctx->block[120]) = htobe64((ctx->nbytes * 8));
memset(ctx->block + fill, 0, 120-fill);
uint64_t size = htobe64((ctx->nbytes * 8));
memcpy(&ctx->block[120], &size, sizeof(size));
sha512_process_block(ctx);
for (i=0; i<8; i++) {
ctx->chain[i] = htobe64(ctx->chain[i]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment