upstream: https://github.com/mirage/mirage-crypto
1#include "crypto.h"
2
3static inline void xor_into (const uint8_t *src, uint8_t *dst, size_t n) {
4/* see issue #70 #81 for alignment considerations (memcpy used below) */
5#ifdef ARCH_64BIT
6 uint64_t s;
7 for (; n >= 8; n -= 8, src += 8, dst += 8)
8 *(uint64_t*) dst ^= *(uint64_t*)memcpy(&s, src, 8);
9#endif
10
11 uint32_t t;
12 for (; n >= 4; n -= 4, src += 4, dst += 4)
13 *(uint32_t*) dst ^= *(uint32_t*)memcpy(&t, src, 4);
14
15 for (; n --; ++ src, ++ dst) *dst = *src ^ *dst;
16}
17
18static inline void _mc_count_8_be (uint64_t *init, uint64_t *dst, size_t blocks) {
19 uint64_t qw = be64_to_cpu (*init);
20 while (blocks --) *(dst ++) = cpu_to_be64 (qw ++);
21}
22
23/* XXX
24 *
25 * Counters are garbage. ;_;
26 * Calling this incurs about a 15% hit in AES-CTR.
27 *
28 * What slows things down:
29 * - Naive __uint128_t.
30 * - Loop unrolling.
31 * - SSE carry bit handling.
32 */
33static inline void _mc_count_16_be (uint64_t *init, uint64_t *dst, size_t blocks) {
34 uint64_t qw1 = init[0],
35 qw2 = be64_to_cpu (init[1]);
36 for (; blocks --; dst += 2) {
37 dst[0] = qw1;
38 dst[1] = cpu_to_be64 (qw2);
39 if ((++ qw2) == 0) qw1 = cpu_to_be64 (be64_to_cpu (qw1) + 1);
40 }
41}
42
43/* The GCM counter. Counts on the last 32 bits, ignoring carry. */
44static inline void _mc_count_16_be_4 (uint64_t *init, uint64_t *dst, size_t blocks) {
45
46 uint64_t qw1 = init[0];
47 uint32_t dw3 = ((uint32_t*) init)[2],
48 dw4 = be32_to_cpu (((uint32_t*) init)[3]);
49 for (; blocks --; dst += 2) {
50 dst[0] = qw1;
51 ((uint32_t*) dst)[2] = dw3;
52 ((uint32_t*) dst)[3] = cpu_to_be32 (dw4 ++);
53 }
54}
55
56CAMLprim value
57mc_xor_into_bytes_generic (value b1, value off1, value b2, value off2, value n) {
58 xor_into (_st_uint8_off (b1, off1), _bp_uint8_off (b2, off2), Int_val (n));
59 return Val_unit;
60}
61
62#define __export_counter(name, f) \
63 CAMLprim value name (value ctr, value dst, value off, value blocks) { \
64 f ( (uint64_t*) Bp_val (ctr), \
65 (uint64_t*) _bp_uint8_off (dst, off), Long_val (blocks) ); \
66 return Val_unit; \
67 }
68
69__export_counter (mc_count_8_be, _mc_count_8_be)
70__export_counter (mc_count_16_be, _mc_count_16_be)
71__export_counter (mc_count_16_be_4_generic, _mc_count_16_be_4)