Pure OCaml xxhash implementation
1(** xxHash-64 - Pure OCaml implementation.
2
3 This implements the xxHash64 algorithm designed by Yann Collet.
4 xxHash is an extremely fast non-cryptographic hash algorithm with
5 excellent distribution properties. *)
6
7(* Constants *)
8let prime64_1 = 0x9E3779B185EBCA87L
9let prime64_2 = 0xC2B2AE3D27D4EB4FL
10let prime64_3 = 0x165667B19E3779F9L
11let prime64_4 = 0x85EBCA77C2B2AE63L
12let prime64_5 = 0x27D4EB2F165667C5L
13
14(* Helper functions *)
15let[@inline] rotl64 x r =
16 Int64.(logor (shift_left x r) (shift_right_logical x (64 - r)))
17
18let[@inline] mix1 acc v =
19 let open Int64 in
20 let acc = add acc (mul v prime64_2) in
21 let acc = rotl64 acc 31 in
22 mul acc prime64_1
23
24let[@inline] mix2 acc v =
25 let open Int64 in
26 let v = mul v prime64_2 in
27 let v = rotl64 v 31 in
28 let v = mul v prime64_1 in
29 let acc = logxor acc v in
30 add (mul acc prime64_1) prime64_4
31
32let[@inline] avalanche h =
33 let open Int64 in
34 let h = logxor h (shift_right_logical h 33) in
35 let h = mul h prime64_2 in
36 let h = logxor h (shift_right_logical h 29) in
37 let h = mul h prime64_3 in
38 logxor h (shift_right_logical h 32)
39
40(** Compute xxHash-64 of bytes with given seed *)
41let hash64 ?(seed=0L) src ~pos ~len =
42 let open Int64 in
43 let end_pos = pos + len in
44
45 let h = ref (
46 if len >= 32 then begin
47 (* Initialize accumulators *)
48 let v1 = ref (add (add seed prime64_1) prime64_2) in
49 let v2 = ref (add seed prime64_2) in
50 let v3 = ref seed in
51 let v4 = ref (sub seed prime64_1) in
52
53 (* Process 32-byte blocks *)
54 let p = ref pos in
55 while !p + 32 <= end_pos do
56 v1 := mix1 !v1 (Bytes.get_int64_le src !p);
57 v2 := mix1 !v2 (Bytes.get_int64_le src (!p + 8));
58 v3 := mix1 !v3 (Bytes.get_int64_le src (!p + 16));
59 v4 := mix1 !v4 (Bytes.get_int64_le src (!p + 24));
60 p := !p + 32
61 done;
62
63 (* Merge accumulators *)
64 let h = add
65 (add (rotl64 !v1 1) (rotl64 !v2 7))
66 (add (rotl64 !v3 12) (rotl64 !v4 18)) in
67 let h = mix2 h !v1 in
68 let h = mix2 h !v2 in
69 let h = mix2 h !v3 in
70 mix2 h !v4
71 end else
72 add seed prime64_5
73 ) in
74
75 h := add !h (of_int len);
76
77 (* Process remaining 8-byte chunks *)
78 let p = ref (if len >= 32 then pos + (len / 32) * 32 else pos) in
79 while !p + 8 <= end_pos do
80 let k = Bytes.get_int64_le src !p in
81 let k = mul k prime64_2 in
82 let k = rotl64 k 31 in
83 let k = mul k prime64_1 in
84 h := logxor !h k;
85 h := rotl64 !h 27;
86 h := add (mul !h prime64_1) prime64_4;
87 p := !p + 8
88 done;
89
90 (* Process remaining 4-byte chunk *)
91 if !p + 4 <= end_pos then begin
92 let k = of_int (Bytes.get_int32_le src !p |> Int32.to_int) in
93 let k = logand k 0xFFFFFFFFL in (* Make unsigned *)
94 h := logxor !h (mul k prime64_1);
95 h := rotl64 !h 23;
96 h := add (mul !h prime64_2) prime64_3;
97 p := !p + 4
98 end;
99
100 (* Process remaining bytes *)
101 while !p < end_pos do
102 let k = of_int (Bytes.get_uint8 src !p) in
103 h := logxor !h (mul k prime64_5);
104 h := rotl64 !h 11;
105 h := mul !h prime64_1;
106 incr p
107 done;
108
109 avalanche !h
110
111let hash64_string ?seed s =
112 let src = Bytes.unsafe_of_string s in
113 hash64 ?seed src ~pos:0 ~len:(String.length s)
114
115(** Compute xxHash-64 and return lower 32 bits (for zstd checksum) *)
116let hash32 ?seed src ~pos ~len =
117 let h = hash64 ?seed src ~pos ~len in
118 Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
119
120let hash32_string ?seed s =
121 let src = Bytes.unsafe_of_string s in
122 hash32 ?seed src ~pos:0 ~len:(String.length s)
123
124(** Streaming hasher state *)
125type state = {
126 mutable v1 : int64;
127 mutable v2 : int64;
128 mutable v3 : int64;
129 mutable v4 : int64;
130 mutable total_len : int;
131 buffer : bytes;
132 mutable buf_len : int;
133 seed : int64;
134}
135
136let create_state ?(seed=0L) () =
137 let open Int64 in
138 {
139 v1 = add (add seed prime64_1) prime64_2;
140 v2 = add seed prime64_2;
141 v3 = seed;
142 v4 = sub seed prime64_1;
143 total_len = 0;
144 buffer = Bytes.create 32;
145 buf_len = 0;
146 seed;
147 }
148
149let reset ?(seed=0L) state =
150 let open Int64 in
151 state.v1 <- add (add seed prime64_1) prime64_2;
152 state.v2 <- add seed prime64_2;
153 state.v3 <- seed;
154 state.v4 <- sub seed prime64_1;
155 state.total_len <- 0;
156 state.buf_len <- 0
157
158let copy_state state =
159 {
160 v1 = state.v1;
161 v2 = state.v2;
162 v3 = state.v3;
163 v4 = state.v4;
164 total_len = state.total_len;
165 buffer = Bytes.copy state.buffer;
166 buf_len = state.buf_len;
167 seed = state.seed;
168 }
169
170let update state src ~pos ~len =
171 let end_pos = pos + len in
172 state.total_len <- state.total_len + len;
173
174 let p = ref pos in
175
176 (* Fill buffer if we have partial data *)
177 if state.buf_len > 0 then begin
178 let to_copy = min (32 - state.buf_len) len in
179 Bytes.blit src !p state.buffer state.buf_len to_copy;
180 state.buf_len <- state.buf_len + to_copy;
181 p := !p + to_copy;
182
183 if state.buf_len = 32 then begin
184 state.v1 <- mix1 state.v1 (Bytes.get_int64_le state.buffer 0);
185 state.v2 <- mix1 state.v2 (Bytes.get_int64_le state.buffer 8);
186 state.v3 <- mix1 state.v3 (Bytes.get_int64_le state.buffer 16);
187 state.v4 <- mix1 state.v4 (Bytes.get_int64_le state.buffer 24);
188 state.buf_len <- 0
189 end
190 end;
191
192 (* Process 32-byte blocks *)
193 while !p + 32 <= end_pos do
194 state.v1 <- mix1 state.v1 (Bytes.get_int64_le src !p);
195 state.v2 <- mix1 state.v2 (Bytes.get_int64_le src (!p + 8));
196 state.v3 <- mix1 state.v3 (Bytes.get_int64_le src (!p + 16));
197 state.v4 <- mix1 state.v4 (Bytes.get_int64_le src (!p + 24));
198 p := !p + 32
199 done;
200
201 (* Buffer remaining *)
202 if !p < end_pos then begin
203 let remaining = end_pos - !p in
204 Bytes.blit src !p state.buffer state.buf_len remaining;
205 state.buf_len <- state.buf_len + remaining
206 end
207
208let update_string state s =
209 let src = Bytes.unsafe_of_string s in
210 update state src ~pos:0 ~len:(String.length s)
211
212let finalize state =
213 let open Int64 in
214
215 let h = ref (
216 if state.total_len >= 32 then begin
217 let h = add
218 (add (rotl64 state.v1 1) (rotl64 state.v2 7))
219 (add (rotl64 state.v3 12) (rotl64 state.v4 18)) in
220 let h = mix2 h state.v1 in
221 let h = mix2 h state.v2 in
222 let h = mix2 h state.v3 in
223 mix2 h state.v4
224 end else
225 add state.v3 prime64_5 (* v3 holds seed *)
226 ) in
227
228 h := add !h (of_int state.total_len);
229
230 (* Process buffered data *)
231 let p = ref 0 in
232 while !p + 8 <= state.buf_len do
233 let k = Bytes.get_int64_le state.buffer !p in
234 let k = mul k prime64_2 in
235 let k = rotl64 k 31 in
236 let k = mul k prime64_1 in
237 h := logxor !h k;
238 h := rotl64 !h 27;
239 h := add (mul !h prime64_1) prime64_4;
240 p := !p + 8
241 done;
242
243 if !p + 4 <= state.buf_len then begin
244 let k = of_int (Bytes.get_int32_le state.buffer !p |> Int32.to_int) in
245 let k = logand k 0xFFFFFFFFL in
246 h := logxor !h (mul k prime64_1);
247 h := rotl64 !h 23;
248 h := add (mul !h prime64_2) prime64_3;
249 p := !p + 4
250 end;
251
252 while !p < state.buf_len do
253 let k = of_int (Bytes.get_uint8 state.buffer !p) in
254 h := logxor !h (mul k prime64_5);
255 h := rotl64 !h 11;
256 h := mul !h prime64_1;
257 incr p
258 done;
259
260 avalanche !h
261
262let finalize32 state =
263 let h = finalize state in
264 Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)