First version of Threefish512-CTR with BLAKE3-MAC & a custom shamirs secret sharing port alongside C bridges with test vectors & Compilation instructions
This commit is contained in:
332
crypto/threefish512_ctr/exts/blake3_dispatch.c
Normal file
332
crypto/threefish512_ctr/exts/blake3_dispatch.c
Normal file
@@ -0,0 +1,332 @@
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "blake3_impl.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <Windows.h>
|
||||
#endif
|
||||
|
||||
#if defined(IS_X86)
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h>
|
||||
#elif defined(__GNUC__)
|
||||
#include <immintrin.h>
|
||||
#else
|
||||
#undef IS_X86 /* Unimplemented! */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(BLAKE3_ATOMICS)
|
||||
#if defined(__has_include)
|
||||
#if __has_include(<stdatomic.h>) && !defined(_MSC_VER)
|
||||
#define BLAKE3_ATOMICS 1
|
||||
#else
|
||||
#define BLAKE3_ATOMICS 0
|
||||
#endif /* __has_include(<stdatomic.h>) && !defined(_MSC_VER) */
|
||||
#else
|
||||
#define BLAKE3_ATOMICS 0
|
||||
#endif /* defined(__has_include) */
|
||||
#endif /* BLAKE3_ATOMICS */
|
||||
|
||||
#if BLAKE3_ATOMICS
|
||||
#define ATOMIC_INT _Atomic int
|
||||
#define ATOMIC_LOAD(x) x
|
||||
#define ATOMIC_STORE(x, y) x = y
|
||||
#elif defined(_MSC_VER)
|
||||
#define ATOMIC_INT LONG
|
||||
#define ATOMIC_LOAD(x) InterlockedOr(&x, 0)
|
||||
#define ATOMIC_STORE(x, y) InterlockedExchange(&x, y)
|
||||
#else
|
||||
#define ATOMIC_INT int
|
||||
#define ATOMIC_LOAD(x) x
|
||||
#define ATOMIC_STORE(x, y) x = y
|
||||
#endif
|
||||
|
||||
#define MAYBE_UNUSED(x) (void)((x))
|
||||
|
||||
#if defined(IS_X86)
|
||||
static uint64_t xgetbv(void) {
|
||||
#if defined(_MSC_VER)
|
||||
return _xgetbv(0);
|
||||
#else
|
||||
uint32_t eax = 0, edx = 0;
|
||||
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cpuid(uint32_t out[4], uint32_t id) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuid((int *)out, id);
|
||||
#elif defined(__i386__) || defined(_M_IX86)
|
||||
__asm__ __volatile__("movl %%ebx, %1\n"
|
||||
"cpuid\n"
|
||||
"xchgl %1, %%ebx\n"
|
||||
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id));
|
||||
#else
|
||||
__asm__ __volatile__("cpuid\n"
|
||||
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id));
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuidex((int *)out, id, sid);
|
||||
#elif defined(__i386__) || defined(_M_IX86)
|
||||
__asm__ __volatile__("movl %%ebx, %1\n"
|
||||
"cpuid\n"
|
||||
"xchgl %1, %%ebx\n"
|
||||
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id), "c"(sid));
|
||||
#else
|
||||
__asm__ __volatile__("cpuid\n"
|
||||
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id), "c"(sid));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
enum cpu_feature {
|
||||
SSE2 = 1 << 0,
|
||||
SSSE3 = 1 << 1,
|
||||
SSE41 = 1 << 2,
|
||||
AVX = 1 << 3,
|
||||
AVX2 = 1 << 4,
|
||||
AVX512F = 1 << 5,
|
||||
AVX512VL = 1 << 6,
|
||||
/* ... */
|
||||
UNDEFINED = 1 << 30
|
||||
};
|
||||
|
||||
#if !defined(BLAKE3_TESTING)
|
||||
static /* Allow the variable to be controlled manually for testing */
|
||||
#endif
|
||||
ATOMIC_INT g_cpu_features = UNDEFINED;
|
||||
|
||||
#if !defined(BLAKE3_TESTING)
|
||||
static
|
||||
#endif
|
||||
enum cpu_feature
|
||||
get_cpu_features(void) {
|
||||
|
||||
/* If TSAN detects a data race here, try compiling with -DBLAKE3_ATOMICS=1 */
|
||||
enum cpu_feature features = ATOMIC_LOAD(g_cpu_features);
|
||||
if (features != UNDEFINED) {
|
||||
return features;
|
||||
} else {
|
||||
#if defined(IS_X86)
|
||||
uint32_t regs[4] = {0};
|
||||
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
||||
(void)edx;
|
||||
features = 0;
|
||||
cpuid(regs, 0);
|
||||
const int max_id = *eax;
|
||||
cpuid(regs, 1);
|
||||
#if defined(__amd64__) || defined(_M_X64)
|
||||
features |= SSE2;
|
||||
#else
|
||||
if (*edx & (1UL << 26))
|
||||
features |= SSE2;
|
||||
#endif
|
||||
if (*ecx & (1UL << 9))
|
||||
features |= SSSE3;
|
||||
if (*ecx & (1UL << 19))
|
||||
features |= SSE41;
|
||||
|
||||
if (*ecx & (1UL << 27)) { // OSXSAVE
|
||||
const uint64_t mask = xgetbv();
|
||||
if ((mask & 6) == 6) { // SSE and AVX states
|
||||
if (*ecx & (1UL << 28))
|
||||
features |= AVX;
|
||||
if (max_id >= 7) {
|
||||
cpuidex(regs, 7, 0);
|
||||
if (*ebx & (1UL << 5))
|
||||
features |= AVX2;
|
||||
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
|
||||
if (*ebx & (1UL << 31))
|
||||
features |= AVX512VL;
|
||||
if (*ebx & (1UL << 16))
|
||||
features |= AVX512F;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
ATOMIC_STORE(g_cpu_features, features);
|
||||
return features;
|
||||
#else
|
||||
/* How to detect NEON? */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void blake3_compress_in_place(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags) {
|
||||
#if defined(IS_X86)
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
MAYBE_UNUSED(features);
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
if (features & AVX512VL) {
|
||||
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
if (features & SSE41) {
|
||||
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE2)
|
||||
if (features & SSE2) {
|
||||
blake3_compress_in_place_sse2(cv, block, block_len, counter, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
|
||||
}
|
||||
|
||||
void blake3_compress_xof(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter, uint8_t flags,
|
||||
uint8_t out[64]) {
|
||||
#if defined(IS_X86)
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
MAYBE_UNUSED(features);
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
if (features & AVX512VL) {
|
||||
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
if (features & SSE41) {
|
||||
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE2)
|
||||
if (features & SSE2) {
|
||||
blake3_compress_xof_sse2(cv, block, block_len, counter, flags, out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
|
||||
}
|
||||
|
||||
|
||||
void blake3_xof_many(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter, uint8_t flags,
|
||||
uint8_t out[64], size_t outblocks) {
|
||||
if (outblocks == 0) {
|
||||
// The current assembly implementation always outputs at least 1 block.
|
||||
return;
|
||||
}
|
||||
#if defined(IS_X86)
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
MAYBE_UNUSED(features);
|
||||
#if !defined(_WIN32) && !defined(__CYGWIN__) && !defined(BLAKE3_NO_AVX512)
|
||||
if (features & AVX512VL) {
|
||||
blake3_xof_many_avx512(cv, block, block_len, counter, flags, out, outblocks);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
for(size_t i = 0; i < outblocks; ++i) {
|
||||
blake3_compress_xof(cv, block, block_len, counter + i, flags, out + 64*i);
|
||||
}
|
||||
}
|
||||
|
||||
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8], uint64_t counter,
|
||||
bool increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
#if defined(IS_X86)
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
MAYBE_UNUSED(features);
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
||||
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_AVX2)
|
||||
if (features & AVX2) {
|
||||
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
if (features & SSE41) {
|
||||
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE2)
|
||||
if (features & SSE2) {
|
||||
blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if BLAKE3_USE_NEON == 1
|
||||
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
return;
|
||||
#endif
|
||||
|
||||
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
}
|
||||
|
||||
// The dynamically detected SIMD degree of the current platform.
|
||||
size_t blake3_simd_degree(void) {
|
||||
#if defined(IS_X86)
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
MAYBE_UNUSED(features);
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
if ((features & (AVX512F|AVX512VL)) == (AVX512F|AVX512VL)) {
|
||||
return 16;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_AVX2)
|
||||
if (features & AVX2) {
|
||||
return 8;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
if (features & SSE41) {
|
||||
return 4;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE2)
|
||||
if (features & SSE2) {
|
||||
return 4;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
#if BLAKE3_USE_NEON == 1
|
||||
return 4;
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
Reference in New Issue
Block a user