···1-(** xxHash-64 - Pure OCaml implementation.
2-3- This implements the xxHash64 algorithm designed by Yann Collet.
4- xxHash is an extremely fast non-cryptographic hash algorithm with
5- excellent distribution properties. *)
6-7-(* Constants *)
8-let prime64_1 = 0x9E3779B185EBCA87L
9-let prime64_2 = 0xC2B2AE3D27D4EB4FL
10-let prime64_3 = 0x165667B19E3779F9L
11-let prime64_4 = 0x85EBCA77C2B2AE63L
12-let prime64_5 = 0x27D4EB2F165667C5L
13-14-(* Helper functions *)
15-let[@inline] rotl64 x r =
16- Int64.(logor (shift_left x r) (shift_right_logical x (64 - r)))
17-18-let[@inline] mix1 acc v =
19- let open Int64 in
20- let acc = add acc (mul v prime64_2) in
21- let acc = rotl64 acc 31 in
22- mul acc prime64_1
23-24-let[@inline] mix2 acc v =
25- let open Int64 in
26- let v = mul v prime64_2 in
27- let v = rotl64 v 31 in
28- let v = mul v prime64_1 in
29- let acc = logxor acc v in
30- add (mul acc prime64_1) prime64_4
31-32-let[@inline] avalanche h =
33- let open Int64 in
34- let h = logxor h (shift_right_logical h 33) in
35- let h = mul h prime64_2 in
36- let h = logxor h (shift_right_logical h 29) in
37- let h = mul h prime64_3 in
38- logxor h (shift_right_logical h 32)
39-40-(** Compute xxHash-64 of bytes with given seed *)
41-let hash64 ?(seed=0L) src ~pos ~len =
42- let open Int64 in
43- let end_pos = pos + len in
44-45- let h = ref (
46- if len >= 32 then begin
47- (* Initialize accumulators *)
48- let v1 = ref (add (add seed prime64_1) prime64_2) in
49- let v2 = ref (add seed prime64_2) in
50- let v3 = ref seed in
51- let v4 = ref (sub seed prime64_1) in
52-53- (* Process 32-byte blocks *)
54- let p = ref pos in
55- while !p + 32 <= end_pos do
56- v1 := mix1 !v1 (Bytes.get_int64_le src !p);
57- v2 := mix1 !v2 (Bytes.get_int64_le src (!p + 8));
58- v3 := mix1 !v3 (Bytes.get_int64_le src (!p + 16));
59- v4 := mix1 !v4 (Bytes.get_int64_le src (!p + 24));
60- p := !p + 32
61- done;
62-63- (* Merge accumulators *)
64- let h = add
65- (add (rotl64 !v1 1) (rotl64 !v2 7))
66- (add (rotl64 !v3 12) (rotl64 !v4 18)) in
67- let h = mix2 h !v1 in
68- let h = mix2 h !v2 in
69- let h = mix2 h !v3 in
70- mix2 h !v4
71- end else
72- add seed prime64_5
73- ) in
74-75- h := add !h (of_int len);
76-77- (* Process remaining 8-byte chunks *)
78- let p = ref (if len >= 32 then pos + (len / 32) * 32 else pos) in
79- while !p + 8 <= end_pos do
80- let k = Bytes.get_int64_le src !p in
81- let k = mul k prime64_2 in
82- let k = rotl64 k 31 in
83- let k = mul k prime64_1 in
84- h := logxor !h k;
85- h := rotl64 !h 27;
86- h := add (mul !h prime64_1) prime64_4;
87- p := !p + 8
88- done;
89-90- (* Process remaining 4-byte chunk *)
91- if !p + 4 <= end_pos then begin
92- let k = of_int (Bytes.get_int32_le src !p |> Int32.to_int) in
93- let k = logand k 0xFFFFFFFFL in (* Make unsigned *)
94- h := logxor !h (mul k prime64_1);
95- h := rotl64 !h 23;
96- h := add (mul !h prime64_2) prime64_3;
97- p := !p + 4
98- end;
99-100- (* Process remaining bytes *)
101- while !p < end_pos do
102- let k = of_int (Bytes.get_uint8 src !p) in
103- h := logxor !h (mul k prime64_5);
104- h := rotl64 !h 11;
105- h := mul !h prime64_1;
106- incr p
107- done;
108-109- avalanche !h
110-111-let hash64_string ?seed s =
112- let src = Bytes.unsafe_of_string s in
113- hash64 ?seed src ~pos:0 ~len:(String.length s)
114-115-(** Compute xxHash-64 and return lower 32 bits (for zstd checksum) *)
116-let hash32 ?seed src ~pos ~len =
117- let h = hash64 ?seed src ~pos ~len in
118- Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
119-120-let hash32_string ?seed s =
121- let src = Bytes.unsafe_of_string s in
122- hash32 ?seed src ~pos:0 ~len:(String.length s)
123-124-(** Streaming hasher state *)
125-type state = {
126- mutable v1 : int64;
127- mutable v2 : int64;
128- mutable v3 : int64;
129- mutable v4 : int64;
130- mutable total_len : int;
131- buffer : bytes;
132- mutable buf_len : int;
133- seed : int64;
134-}
135-136-let create_state ?(seed=0L) () =
137- let open Int64 in
138- {
139- v1 = add (add seed prime64_1) prime64_2;
140- v2 = add seed prime64_2;
141- v3 = seed;
142- v4 = sub seed prime64_1;
143- total_len = 0;
144- buffer = Bytes.create 32;
145- buf_len = 0;
146- seed;
147- }
148-149-let reset ?(seed=0L) state =
150- let open Int64 in
151- state.v1 <- add (add seed prime64_1) prime64_2;
152- state.v2 <- add seed prime64_2;
153- state.v3 <- seed;
154- state.v4 <- sub seed prime64_1;
155- state.total_len <- 0;
156- state.buf_len <- 0
157-158-let copy_state state =
159- {
160- v1 = state.v1;
161- v2 = state.v2;
162- v3 = state.v3;
163- v4 = state.v4;
164- total_len = state.total_len;
165- buffer = Bytes.copy state.buffer;
166- buf_len = state.buf_len;
167- seed = state.seed;
168- }
169-170-let update state src ~pos ~len =
171- let end_pos = pos + len in
172- state.total_len <- state.total_len + len;
173-174- let p = ref pos in
175-176- (* Fill buffer if we have partial data *)
177- if state.buf_len > 0 then begin
178- let to_copy = min (32 - state.buf_len) len in
179- Bytes.blit src !p state.buffer state.buf_len to_copy;
180- state.buf_len <- state.buf_len + to_copy;
181- p := !p + to_copy;
182-183- if state.buf_len = 32 then begin
184- state.v1 <- mix1 state.v1 (Bytes.get_int64_le state.buffer 0);
185- state.v2 <- mix1 state.v2 (Bytes.get_int64_le state.buffer 8);
186- state.v3 <- mix1 state.v3 (Bytes.get_int64_le state.buffer 16);
187- state.v4 <- mix1 state.v4 (Bytes.get_int64_le state.buffer 24);
188- state.buf_len <- 0
189- end
190- end;
191-192- (* Process 32-byte blocks *)
193- while !p + 32 <= end_pos do
194- state.v1 <- mix1 state.v1 (Bytes.get_int64_le src !p);
195- state.v2 <- mix1 state.v2 (Bytes.get_int64_le src (!p + 8));
196- state.v3 <- mix1 state.v3 (Bytes.get_int64_le src (!p + 16));
197- state.v4 <- mix1 state.v4 (Bytes.get_int64_le src (!p + 24));
198- p := !p + 32
199- done;
200-201- (* Buffer remaining *)
202- if !p < end_pos then begin
203- let remaining = end_pos - !p in
204- Bytes.blit src !p state.buffer state.buf_len remaining;
205- state.buf_len <- state.buf_len + remaining
206- end
207-208-let update_string state s =
209- let src = Bytes.unsafe_of_string s in
210- update state src ~pos:0 ~len:(String.length s)
211-212-let finalize state =
213- let open Int64 in
214-215- let h = ref (
216- if state.total_len >= 32 then begin
217- let h = add
218- (add (rotl64 state.v1 1) (rotl64 state.v2 7))
219- (add (rotl64 state.v3 12) (rotl64 state.v4 18)) in
220- let h = mix2 h state.v1 in
221- let h = mix2 h state.v2 in
222- let h = mix2 h state.v3 in
223- mix2 h state.v4
224- end else
225- add state.v3 prime64_5 (* v3 holds seed *)
226- ) in
227-228- h := add !h (of_int state.total_len);
229-230- (* Process buffered data *)
231- let p = ref 0 in
232- while !p + 8 <= state.buf_len do
233- let k = Bytes.get_int64_le state.buffer !p in
234- let k = mul k prime64_2 in
235- let k = rotl64 k 31 in
236- let k = mul k prime64_1 in
237- h := logxor !h k;
238- h := rotl64 !h 27;
239- h := add (mul !h prime64_1) prime64_4;
240- p := !p + 8
241- done;
242-243- if !p + 4 <= state.buf_len then begin
244- let k = of_int (Bytes.get_int32_le state.buffer !p |> Int32.to_int) in
245- let k = logand k 0xFFFFFFFFL in
246- h := logxor !h (mul k prime64_1);
247- h := rotl64 !h 23;
248- h := add (mul !h prime64_2) prime64_3;
249- p := !p + 4
250- end;
251-252- while !p < state.buf_len do
253- let k = of_int (Bytes.get_uint8 state.buffer !p) in
254- h := logxor !h (mul k prime64_5);
255- h := rotl64 !h 11;
256- h := mul !h prime64_1;
257- incr p
258- done;
259-260- avalanche !h
261-262-let finalize32 state =
263- let h = finalize state in
264- Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
···1-(** xxHash - Fast non-cryptographic hash functions.
2-3- This is a pure OCaml implementation of the xxHash family of hash functions,
4- originally designed by Yann Collet. xxHash provides extremely fast hashing
5- with excellent distribution properties.
6-7- {1 Quick Start}
8-9- {[
10- (* Hash a string *)
11- let hash = Xxhash.hash64_string "Hello, World!"
12-13- (* Hash bytes with explicit range *)
14- let bytes = Bytes.of_string "Hello, World!"
15- let hash = Xxhash.hash64 bytes ~pos:0 ~len:13
16-17- (* Use streaming API for large data *)
18- let state = Xxhash.create_state () in
19- Xxhash.update state chunk1 ~pos:0 ~len:(Bytes.length chunk1);
20- Xxhash.update state chunk2 ~pos:0 ~len:(Bytes.length chunk2);
21- let hash = Xxhash.finalize state
22- ]}
23-24- {1 Hash Variants}
25-26- - {!hash64}: 64-bit hash, best for general use
27- - {!hash32}: Lower 32 bits of 64-bit hash (used by zstd)
28-29- {1 Streaming API}
30-31- For hashing data that doesn't fit in memory or arrives incrementally:
32- - {!create_state}: Create a new streaming state
33- - {!update}: Feed data into the state
34- - {!finalize}: Get the final hash value *)
35-36-(** {1 One-shot Hashing} *)
37-38-val hash64 : ?seed:int64 -> bytes -> pos:int -> len:int -> int64
39-(** [hash64 ?seed bytes ~pos ~len] computes the xxHash-64 of [len] bytes
40- from [bytes] starting at [pos].
41-42- @param seed Optional seed value (default: 0) *)
43-44-val hash64_string : ?seed:int64 -> string -> int64
45-(** [hash64_string ?seed s] computes the xxHash-64 of string [s]. *)
46-47-val hash32 : ?seed:int64 -> bytes -> pos:int -> len:int -> int32
48-(** [hash32 ?seed bytes ~pos ~len] computes xxHash-64 and returns the
49- lower 32 bits. This is the variant used by zstd for content checksums. *)
50-51-val hash32_string : ?seed:int64 -> string -> int32
52-(** [hash32_string ?seed s] computes the lower 32 bits of xxHash-64. *)
53-54-(** {1 Streaming API} *)
55-56-(** Streaming hasher state. *)
57-type state
58-59-val create_state : ?seed:int64 -> unit -> state
60-(** [create_state ?seed ()] creates a new streaming hash state.
61-62- @param seed Optional seed value (default: 0) *)
63-64-val reset : ?seed:int64 -> state -> unit
65-(** [reset ?seed state] resets the state for reuse with a new hash.
66-67- @param seed Optional new seed value (default: 0) *)
68-69-val update : state -> bytes -> pos:int -> len:int -> unit
70-(** [update state bytes ~pos ~len] feeds [len] bytes from [bytes]
71- starting at [pos] into the hash state.
72-73- Can be called multiple times to hash data incrementally. *)
74-75-val update_string : state -> string -> unit
76-(** [update_string state s] feeds string [s] into the hash state. *)
77-78-val finalize : state -> int64
79-(** [finalize state] returns the 64-bit hash value.
80-81- The state can still be used after finalization - subsequent calls
82- to {!finalize} return the same value until {!update} is called. *)
83-84-val finalize32 : state -> int32
85-(** [finalize32 state] returns the lower 32 bits of the hash. *)
86-87-(** {1 Utilities} *)
88-89-val copy_state : state -> state
90-(** [copy_state state] creates an independent copy of the hash state.
91- Useful for computing hashes of data with common prefixes. *)