upstream: https://github.com/mirage/mirage-crypto
1#include "crypto.h"
2
3#ifdef __mc_ACCELERATE__
4
5static inline void xor_into (const uint8_t *src, uint8_t *dst, size_t n) {
6/* see issue #70 #81 for alignment considerations (memcpy used below) */
7#ifdef ARCH_64BIT
8 __m128i r;
9 for (; n >= 16; n -= 16, src += 16, dst += 16)
10 _mm_storeu_si128 (
11 (__m128i*) dst,
12 _mm_xor_si128 (
13 _mm_loadu_si128 ((__m128i*) memcpy(&r, src, 16)),
14 _mm_loadu_si128 ((__m128i*) dst)));
15
16 uint64_t s;
17 for (; n >= 8; n -= 8, src += 8, dst += 8)
18 *(uint64_t*) dst ^= *(uint64_t*) memcpy(&s, src, 8);
19#endif
20
21 uint32_t t;
22 for (; n >= 4; n -= 4, src += 4, dst += 4)
23 *(uint32_t*) dst ^= *(uint32_t*)memcpy(&t, src, 4);
24
25 for (; n --; ++ src, ++ dst) *dst = *src ^ *dst;
26}
27
28/* The GCM counter. Counts on the last 32 bits, ignoring carry. */
29static inline void _mc_count_16_be_4 (uint64_t *init, uint64_t *dst, size_t blocks) {
30
31 __m128i ctr, c1 = _mm_set_epi32 (1, 0, 0, 0),
32 mask = _mm_set_epi64x (0x0c0d0e0f0b0a0908, 0x0706050403020100);
33 ctr = _mm_shuffle_epi8 (_mm_loadu_si128 ((__m128i *) init), mask);
34 for (; blocks --; dst += 2) {
35 _mm_storeu_si128 ((__m128i *) dst, _mm_shuffle_epi8 (ctr, mask));
36 ctr = _mm_add_epi32 (ctr, c1);
37 }
38}
39
40#endif /* __mc_ACCELERATE__ */
41
42CAMLprim value
43mc_xor_into_bytes (value b1, value off1, value b2, value off2, value n) {
44 _mc_switch_accel(ssse3,
45 mc_xor_into_bytes_generic(b1, off1, b2, off2, n),
46 xor_into (_st_uint8_off (b1, off1), _bp_uint8_off (b2, off2), Int_val (n)))
47 return Val_unit;
48}
49
50#define __export_counter(name, f) \
51 CAMLprim value name (value ctr, value dst, value off, value blocks) { \
52 _mc_switch_accel(ssse3, \
53 name##_generic (ctr, dst, off, blocks), \
54 f ( (uint64_t*) Bp_val (ctr), \
55 (uint64_t*) _bp_uint8_off (dst, off), Long_val (blocks) )) \
56 return Val_unit; \
57 }
58
59__export_counter(mc_count_16_be_4, _mc_count_16_be_4)
60
61CAMLprim value mc_misc_mode (__unit ()) {
62 value enabled = 0;
63 _mc_switch_accel(ssse3,
64 enabled = 0,
65 enabled = 1)
66 return Val_int (enabled);
67}