···1+(** xxHash-64 - Pure OCaml implementation.
2+3+ This implements the xxHash64 algorithm designed by Yann Collet.
4+ xxHash is an extremely fast non-cryptographic hash algorithm with
5+ excellent distribution properties. *)
6+7+(* Constants *)
8+let prime64_1 = 0x9E3779B185EBCA87L
9+let prime64_2 = 0xC2B2AE3D27D4EB4FL
10+let prime64_3 = 0x165667B19E3779F9L
11+let prime64_4 = 0x85EBCA77C2B2AE63L
12+let prime64_5 = 0x27D4EB2F165667C5L
13+14+(* Helper functions *)
15+let[@inline] rotl64 x r =
16+ Int64.(logor (shift_left x r) (shift_right_logical x (64 - r)))
17+18+let[@inline] mix1 acc v =
19+ let open Int64 in
20+ let acc = add acc (mul v prime64_2) in
21+ let acc = rotl64 acc 31 in
22+ mul acc prime64_1
23+24+let[@inline] mix2 acc v =
25+ let open Int64 in
26+ let v = mul v prime64_2 in
27+ let v = rotl64 v 31 in
28+ let v = mul v prime64_1 in
29+ let acc = logxor acc v in
30+ add (mul acc prime64_1) prime64_4
31+32+let[@inline] avalanche h =
33+ let open Int64 in
34+ let h = logxor h (shift_right_logical h 33) in
35+ let h = mul h prime64_2 in
36+ let h = logxor h (shift_right_logical h 29) in
37+ let h = mul h prime64_3 in
38+ logxor h (shift_right_logical h 32)
39+40+(** Compute xxHash-64 of bytes with given seed *)
41+let hash64 ?(seed=0L) src ~pos ~len =
42+ let open Int64 in
43+ let end_pos = pos + len in
44+45+ let h = ref (
46+ if len >= 32 then begin
47+ (* Initialize accumulators *)
48+ let v1 = ref (add (add seed prime64_1) prime64_2) in
49+ let v2 = ref (add seed prime64_2) in
50+ let v3 = ref seed in
51+ let v4 = ref (sub seed prime64_1) in
52+53+ (* Process 32-byte blocks *)
54+ let p = ref pos in
55+ while !p + 32 <= end_pos do
56+ v1 := mix1 !v1 (Bytes.get_int64_le src !p);
57+ v2 := mix1 !v2 (Bytes.get_int64_le src (!p + 8));
58+ v3 := mix1 !v3 (Bytes.get_int64_le src (!p + 16));
59+ v4 := mix1 !v4 (Bytes.get_int64_le src (!p + 24));
60+ p := !p + 32
61+ done;
62+63+ (* Merge accumulators *)
64+ let h = add
65+ (add (rotl64 !v1 1) (rotl64 !v2 7))
66+ (add (rotl64 !v3 12) (rotl64 !v4 18)) in
67+ let h = mix2 h !v1 in
68+ let h = mix2 h !v2 in
69+ let h = mix2 h !v3 in
70+ mix2 h !v4
71+ end else
72+ add seed prime64_5
73+ ) in
74+75+ h := add !h (of_int len);
76+77+ (* Process remaining 8-byte chunks *)
78+ let p = ref (if len >= 32 then pos + (len / 32) * 32 else pos) in
79+ while !p + 8 <= end_pos do
80+ let k = Bytes.get_int64_le src !p in
81+ let k = mul k prime64_2 in
82+ let k = rotl64 k 31 in
83+ let k = mul k prime64_1 in
84+ h := logxor !h k;
85+ h := rotl64 !h 27;
86+ h := add (mul !h prime64_1) prime64_4;
87+ p := !p + 8
88+ done;
89+90+ (* Process remaining 4-byte chunk *)
91+ if !p + 4 <= end_pos then begin
92+ let k = of_int (Bytes.get_int32_le src !p |> Int32.to_int) in
93+ let k = logand k 0xFFFFFFFFL in (* Make unsigned *)
94+ h := logxor !h (mul k prime64_1);
95+ h := rotl64 !h 23;
96+ h := add (mul !h prime64_2) prime64_3;
97+ p := !p + 4
98+ end;
99+100+ (* Process remaining bytes *)
101+ while !p < end_pos do
102+ let k = of_int (Bytes.get_uint8 src !p) in
103+ h := logxor !h (mul k prime64_5);
104+ h := rotl64 !h 11;
105+ h := mul !h prime64_1;
106+ incr p
107+ done;
108+109+ avalanche !h
110+111+let hash64_string ?seed s =
112+ let src = Bytes.unsafe_of_string s in
113+ hash64 ?seed src ~pos:0 ~len:(String.length s)
114+115+(** Compute xxHash-64 and return lower 32 bits (for zstd checksum) *)
116+let hash32 ?seed src ~pos ~len =
117+ let h = hash64 ?seed src ~pos ~len in
118+ Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
119+120+let hash32_string ?seed s =
121+ let src = Bytes.unsafe_of_string s in
122+ hash32 ?seed src ~pos:0 ~len:(String.length s)
123+124+(** Streaming hasher state *)
125+type state = {
126+ mutable v1 : int64;
127+ mutable v2 : int64;
128+ mutable v3 : int64;
129+ mutable v4 : int64;
130+ mutable total_len : int;
131+ buffer : bytes;
132+ mutable buf_len : int;
133+ seed : int64;
134+}
135+136+let create_state ?(seed=0L) () =
137+ let open Int64 in
138+ {
139+ v1 = add (add seed prime64_1) prime64_2;
140+ v2 = add seed prime64_2;
141+ v3 = seed;
142+ v4 = sub seed prime64_1;
143+ total_len = 0;
144+ buffer = Bytes.create 32;
145+ buf_len = 0;
146+ seed;
147+ }
148+149+let reset ?(seed=0L) state =
150+ let open Int64 in
151+ state.v1 <- add (add seed prime64_1) prime64_2;
152+ state.v2 <- add seed prime64_2;
153+ state.v3 <- seed;
154+ state.v4 <- sub seed prime64_1;
155+ state.total_len <- 0;
156+ state.buf_len <- 0
157+158+let copy_state state =
159+ {
160+ v1 = state.v1;
161+ v2 = state.v2;
162+ v3 = state.v3;
163+ v4 = state.v4;
164+ total_len = state.total_len;
165+ buffer = Bytes.copy state.buffer;
166+ buf_len = state.buf_len;
167+ seed = state.seed;
168+ }
169+170+let update state src ~pos ~len =
171+ let end_pos = pos + len in
172+ state.total_len <- state.total_len + len;
173+174+ let p = ref pos in
175+176+ (* Fill buffer if we have partial data *)
177+ if state.buf_len > 0 then begin
178+ let to_copy = min (32 - state.buf_len) len in
179+ Bytes.blit src !p state.buffer state.buf_len to_copy;
180+ state.buf_len <- state.buf_len + to_copy;
181+ p := !p + to_copy;
182+183+ if state.buf_len = 32 then begin
184+ state.v1 <- mix1 state.v1 (Bytes.get_int64_le state.buffer 0);
185+ state.v2 <- mix1 state.v2 (Bytes.get_int64_le state.buffer 8);
186+ state.v3 <- mix1 state.v3 (Bytes.get_int64_le state.buffer 16);
187+ state.v4 <- mix1 state.v4 (Bytes.get_int64_le state.buffer 24);
188+ state.buf_len <- 0
189+ end
190+ end;
191+192+ (* Process 32-byte blocks *)
193+ while !p + 32 <= end_pos do
194+ state.v1 <- mix1 state.v1 (Bytes.get_int64_le src !p);
195+ state.v2 <- mix1 state.v2 (Bytes.get_int64_le src (!p + 8));
196+ state.v3 <- mix1 state.v3 (Bytes.get_int64_le src (!p + 16));
197+ state.v4 <- mix1 state.v4 (Bytes.get_int64_le src (!p + 24));
198+ p := !p + 32
199+ done;
200+201+ (* Buffer remaining *)
202+ if !p < end_pos then begin
203+ let remaining = end_pos - !p in
204+ Bytes.blit src !p state.buffer state.buf_len remaining;
205+ state.buf_len <- state.buf_len + remaining
206+ end
207+208+let update_string state s =
209+ let src = Bytes.unsafe_of_string s in
210+ update state src ~pos:0 ~len:(String.length s)
211+212+let finalize state =
213+ let open Int64 in
214+215+ let h = ref (
216+ if state.total_len >= 32 then begin
217+ let h = add
218+ (add (rotl64 state.v1 1) (rotl64 state.v2 7))
219+ (add (rotl64 state.v3 12) (rotl64 state.v4 18)) in
220+ let h = mix2 h state.v1 in
221+ let h = mix2 h state.v2 in
222+ let h = mix2 h state.v3 in
223+ mix2 h state.v4
224+ end else
225+ add state.v3 prime64_5 (* v3 holds seed *)
226+ ) in
227+228+ h := add !h (of_int state.total_len);
229+230+ (* Process buffered data *)
231+ let p = ref 0 in
232+ while !p + 8 <= state.buf_len do
233+ let k = Bytes.get_int64_le state.buffer !p in
234+ let k = mul k prime64_2 in
235+ let k = rotl64 k 31 in
236+ let k = mul k prime64_1 in
237+ h := logxor !h k;
238+ h := rotl64 !h 27;
239+ h := add (mul !h prime64_1) prime64_4;
240+ p := !p + 8
241+ done;
242+243+ if !p + 4 <= state.buf_len then begin
244+ let k = of_int (Bytes.get_int32_le state.buffer !p |> Int32.to_int) in
245+ let k = logand k 0xFFFFFFFFL in
246+ h := logxor !h (mul k prime64_1);
247+ h := rotl64 !h 23;
248+ h := add (mul !h prime64_2) prime64_3;
249+ p := !p + 4
250+ end;
251+252+ while !p < state.buf_len do
253+ let k = of_int (Bytes.get_uint8 state.buffer !p) in
254+ h := logxor !h (mul k prime64_5);
255+ h := rotl64 !h 11;
256+ h := mul !h prime64_1;
257+ incr p
258+ done;
259+260+ avalanche !h
261+262+let finalize32 state =
263+ let h = finalize state in
264+ Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
···1+(** xxHash - Fast non-cryptographic hash functions.
2+3+ This is a pure OCaml implementation of the xxHash family of hash functions,
4+ originally designed by Yann Collet. xxHash provides extremely fast hashing
5+ with excellent distribution properties.
6+7+ {1 Quick Start}
8+9+ {[
10+ (* Hash a string *)
11+ let hash = Xxhash.hash64_string "Hello, World!"
12+13+ (* Hash bytes with explicit range *)
14+ let bytes = Bytes.of_string "Hello, World!"
15+ let hash = Xxhash.hash64 bytes ~pos:0 ~len:13
16+17+ (* Use streaming API for large data *)
18+ let state = Xxhash.create_state () in
19+ Xxhash.update state chunk1 ~pos:0 ~len:(Bytes.length chunk1);
20+ Xxhash.update state chunk2 ~pos:0 ~len:(Bytes.length chunk2);
21+ let hash = Xxhash.finalize state
22+ ]}
23+24+ {1 Hash Variants}
25+26+ - {!hash64}: 64-bit hash, best for general use
27+ - {!hash32}: Lower 32 bits of 64-bit hash (used by zstd)
28+29+ {1 Streaming API}
30+31+ For hashing data that doesn't fit in memory or arrives incrementally:
32+ - {!create_state}: Create a new streaming state
33+ - {!update}: Feed data into the state
34+ - {!finalize}: Get the final hash value *)
35+36+(** {1 One-shot Hashing} *)
37+38+val hash64 : ?seed:int64 -> bytes -> pos:int -> len:int -> int64
39+(** [hash64 ?seed bytes ~pos ~len] computes the xxHash-64 of [len] bytes
40+ from [bytes] starting at [pos].
41+42+ @param seed Optional seed value (default: 0) *)
43+44+val hash64_string : ?seed:int64 -> string -> int64
45+(** [hash64_string ?seed s] computes the xxHash-64 of string [s]. *)
46+47+val hash32 : ?seed:int64 -> bytes -> pos:int -> len:int -> int32
48+(** [hash32 ?seed bytes ~pos ~len] computes xxHash-64 and returns the
49+ lower 32 bits. This is the variant used by zstd for content checksums. *)
50+51+val hash32_string : ?seed:int64 -> string -> int32
52+(** [hash32_string ?seed s] computes the lower 32 bits of xxHash-64. *)
53+54+(** {1 Streaming API} *)
55+56+(** Streaming hasher state. *)
57+type state
58+59+val create_state : ?seed:int64 -> unit -> state
60+(** [create_state ?seed ()] creates a new streaming hash state.
61+62+ @param seed Optional seed value (default: 0) *)
63+64+val reset : ?seed:int64 -> state -> unit
65+(** [reset ?seed state] resets the state for reuse with a new hash.
66+67+ @param seed Optional new seed value (default: 0) *)
68+69+val update : state -> bytes -> pos:int -> len:int -> unit
70+(** [update state bytes ~pos ~len] feeds [len] bytes from [bytes]
71+ starting at [pos] into the hash state.
72+73+ Can be called multiple times to hash data incrementally. *)
74+75+val update_string : state -> string -> unit
76+(** [update_string state s] feeds string [s] into the hash state. *)
77+78+val finalize : state -> int64
79+(** [finalize state] returns the 64-bit hash value.
80+81+ The state can still be used after finalization - subsequent calls
82+ to {!finalize} return the same value until {!update} is called. *)
83+84+val finalize32 : state -> int32
85+(** [finalize32 state] returns the lower 32 bits of the hash. *)
86+87+(** {1 Utilities} *)
88+89+val copy_state : state -> state
90+(** [copy_state state] creates an independent copy of the hash state.
91+ Useful for computing hashes of data with common prefixes. *)