···11-(** xxHash-64 - Pure OCaml implementation.
22-33- This implements the xxHash64 algorithm designed by Yann Collet.
44- xxHash is an extremely fast non-cryptographic hash algorithm with
55- excellent distribution properties. *)
66-77-(* Constants *)
88-let prime64_1 = 0x9E3779B185EBCA87L
99-let prime64_2 = 0xC2B2AE3D27D4EB4FL
1010-let prime64_3 = 0x165667B19E3779F9L
1111-let prime64_4 = 0x85EBCA77C2B2AE63L
1212-let prime64_5 = 0x27D4EB2F165667C5L
1313-1414-(* Helper functions *)
1515-let[@inline] rotl64 x r =
1616- Int64.(logor (shift_left x r) (shift_right_logical x (64 - r)))
1717-1818-let[@inline] mix1 acc v =
1919- let open Int64 in
2020- let acc = add acc (mul v prime64_2) in
2121- let acc = rotl64 acc 31 in
2222- mul acc prime64_1
2323-2424-let[@inline] mix2 acc v =
2525- let open Int64 in
2626- let v = mul v prime64_2 in
2727- let v = rotl64 v 31 in
2828- let v = mul v prime64_1 in
2929- let acc = logxor acc v in
3030- add (mul acc prime64_1) prime64_4
3131-3232-let[@inline] avalanche h =
3333- let open Int64 in
3434- let h = logxor h (shift_right_logical h 33) in
3535- let h = mul h prime64_2 in
3636- let h = logxor h (shift_right_logical h 29) in
3737- let h = mul h prime64_3 in
3838- logxor h (shift_right_logical h 32)
3939-4040-(** Compute xxHash-64 of bytes with given seed *)
4141-let hash64 ?(seed=0L) src ~pos ~len =
4242- let open Int64 in
4343- let end_pos = pos + len in
4444-4545- let h = ref (
4646- if len >= 32 then begin
4747- (* Initialize accumulators *)
4848- let v1 = ref (add (add seed prime64_1) prime64_2) in
4949- let v2 = ref (add seed prime64_2) in
5050- let v3 = ref seed in
5151- let v4 = ref (sub seed prime64_1) in
5252-5353- (* Process 32-byte blocks *)
5454- let p = ref pos in
5555- while !p + 32 <= end_pos do
5656- v1 := mix1 !v1 (Bytes.get_int64_le src !p);
5757- v2 := mix1 !v2 (Bytes.get_int64_le src (!p + 8));
5858- v3 := mix1 !v3 (Bytes.get_int64_le src (!p + 16));
5959- v4 := mix1 !v4 (Bytes.get_int64_le src (!p + 24));
6060- p := !p + 32
6161- done;
6262-6363- (* Merge accumulators *)
6464- let h = add
6565- (add (rotl64 !v1 1) (rotl64 !v2 7))
6666- (add (rotl64 !v3 12) (rotl64 !v4 18)) in
6767- let h = mix2 h !v1 in
6868- let h = mix2 h !v2 in
6969- let h = mix2 h !v3 in
7070- mix2 h !v4
7171- end else
7272- add seed prime64_5
7373- ) in
7474-7575- h := add !h (of_int len);
7676-7777- (* Process remaining 8-byte chunks *)
7878- let p = ref (if len >= 32 then pos + (len / 32) * 32 else pos) in
7979- while !p + 8 <= end_pos do
8080- let k = Bytes.get_int64_le src !p in
8181- let k = mul k prime64_2 in
8282- let k = rotl64 k 31 in
8383- let k = mul k prime64_1 in
8484- h := logxor !h k;
8585- h := rotl64 !h 27;
8686- h := add (mul !h prime64_1) prime64_4;
8787- p := !p + 8
8888- done;
8989-9090- (* Process remaining 4-byte chunk *)
9191- if !p + 4 <= end_pos then begin
9292- let k = of_int (Bytes.get_int32_le src !p |> Int32.to_int) in
9393- let k = logand k 0xFFFFFFFFL in (* Make unsigned *)
9494- h := logxor !h (mul k prime64_1);
9595- h := rotl64 !h 23;
9696- h := add (mul !h prime64_2) prime64_3;
9797- p := !p + 4
9898- end;
9999-100100- (* Process remaining bytes *)
101101- while !p < end_pos do
102102- let k = of_int (Bytes.get_uint8 src !p) in
103103- h := logxor !h (mul k prime64_5);
104104- h := rotl64 !h 11;
105105- h := mul !h prime64_1;
106106- incr p
107107- done;
108108-109109- avalanche !h
110110-111111-let hash64_string ?seed s =
112112- let src = Bytes.unsafe_of_string s in
113113- hash64 ?seed src ~pos:0 ~len:(String.length s)
114114-115115-(** Compute xxHash-64 and return lower 32 bits (for zstd checksum) *)
116116-let hash32 ?seed src ~pos ~len =
117117- let h = hash64 ?seed src ~pos ~len in
118118- Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
119119-120120-let hash32_string ?seed s =
121121- let src = Bytes.unsafe_of_string s in
122122- hash32 ?seed src ~pos:0 ~len:(String.length s)
123123-124124-(** Streaming hasher state *)
125125-type state = {
126126- mutable v1 : int64;
127127- mutable v2 : int64;
128128- mutable v3 : int64;
129129- mutable v4 : int64;
130130- mutable total_len : int;
131131- buffer : bytes;
132132- mutable buf_len : int;
133133- seed : int64;
134134-}
135135-136136-let create_state ?(seed=0L) () =
137137- let open Int64 in
138138- {
139139- v1 = add (add seed prime64_1) prime64_2;
140140- v2 = add seed prime64_2;
141141- v3 = seed;
142142- v4 = sub seed prime64_1;
143143- total_len = 0;
144144- buffer = Bytes.create 32;
145145- buf_len = 0;
146146- seed;
147147- }
148148-149149-let reset ?(seed=0L) state =
150150- let open Int64 in
151151- state.v1 <- add (add seed prime64_1) prime64_2;
152152- state.v2 <- add seed prime64_2;
153153- state.v3 <- seed;
154154- state.v4 <- sub seed prime64_1;
155155- state.total_len <- 0;
156156- state.buf_len <- 0
157157-158158-let copy_state state =
159159- {
160160- v1 = state.v1;
161161- v2 = state.v2;
162162- v3 = state.v3;
163163- v4 = state.v4;
164164- total_len = state.total_len;
165165- buffer = Bytes.copy state.buffer;
166166- buf_len = state.buf_len;
167167- seed = state.seed;
168168- }
169169-170170-let update state src ~pos ~len =
171171- let end_pos = pos + len in
172172- state.total_len <- state.total_len + len;
173173-174174- let p = ref pos in
175175-176176- (* Fill buffer if we have partial data *)
177177- if state.buf_len > 0 then begin
178178- let to_copy = min (32 - state.buf_len) len in
179179- Bytes.blit src !p state.buffer state.buf_len to_copy;
180180- state.buf_len <- state.buf_len + to_copy;
181181- p := !p + to_copy;
182182-183183- if state.buf_len = 32 then begin
184184- state.v1 <- mix1 state.v1 (Bytes.get_int64_le state.buffer 0);
185185- state.v2 <- mix1 state.v2 (Bytes.get_int64_le state.buffer 8);
186186- state.v3 <- mix1 state.v3 (Bytes.get_int64_le state.buffer 16);
187187- state.v4 <- mix1 state.v4 (Bytes.get_int64_le state.buffer 24);
188188- state.buf_len <- 0
189189- end
190190- end;
191191-192192- (* Process 32-byte blocks *)
193193- while !p + 32 <= end_pos do
194194- state.v1 <- mix1 state.v1 (Bytes.get_int64_le src !p);
195195- state.v2 <- mix1 state.v2 (Bytes.get_int64_le src (!p + 8));
196196- state.v3 <- mix1 state.v3 (Bytes.get_int64_le src (!p + 16));
197197- state.v4 <- mix1 state.v4 (Bytes.get_int64_le src (!p + 24));
198198- p := !p + 32
199199- done;
200200-201201- (* Buffer remaining *)
202202- if !p < end_pos then begin
203203- let remaining = end_pos - !p in
204204- Bytes.blit src !p state.buffer state.buf_len remaining;
205205- state.buf_len <- state.buf_len + remaining
206206- end
207207-208208-let update_string state s =
209209- let src = Bytes.unsafe_of_string s in
210210- update state src ~pos:0 ~len:(String.length s)
211211-212212-let finalize state =
213213- let open Int64 in
214214-215215- let h = ref (
216216- if state.total_len >= 32 then begin
217217- let h = add
218218- (add (rotl64 state.v1 1) (rotl64 state.v2 7))
219219- (add (rotl64 state.v3 12) (rotl64 state.v4 18)) in
220220- let h = mix2 h state.v1 in
221221- let h = mix2 h state.v2 in
222222- let h = mix2 h state.v3 in
223223- mix2 h state.v4
224224- end else
225225- add state.v3 prime64_5 (* v3 holds seed *)
226226- ) in
227227-228228- h := add !h (of_int state.total_len);
229229-230230- (* Process buffered data *)
231231- let p = ref 0 in
232232- while !p + 8 <= state.buf_len do
233233- let k = Bytes.get_int64_le state.buffer !p in
234234- let k = mul k prime64_2 in
235235- let k = rotl64 k 31 in
236236- let k = mul k prime64_1 in
237237- h := logxor !h k;
238238- h := rotl64 !h 27;
239239- h := add (mul !h prime64_1) prime64_4;
240240- p := !p + 8
241241- done;
242242-243243- if !p + 4 <= state.buf_len then begin
244244- let k = of_int (Bytes.get_int32_le state.buffer !p |> Int32.to_int) in
245245- let k = logand k 0xFFFFFFFFL in
246246- h := logxor !h (mul k prime64_1);
247247- h := rotl64 !h 23;
248248- h := add (mul !h prime64_2) prime64_3;
249249- p := !p + 4
250250- end;
251251-252252- while !p < state.buf_len do
253253- let k = of_int (Bytes.get_uint8 state.buffer !p) in
254254- h := logxor !h (mul k prime64_5);
255255- h := rotl64 !h 11;
256256- h := mul !h prime64_1;
257257- incr p
258258- done;
259259-260260- avalanche !h
261261-262262-let finalize32 state =
263263- let h = finalize state in
264264- Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
-91
ocaml-mlxxhash/src/xxhash.mli
···11-(** xxHash - Fast non-cryptographic hash functions.
22-33- This is a pure OCaml implementation of the xxHash family of hash functions,
44- originally designed by Yann Collet. xxHash provides extremely fast hashing
55- with excellent distribution properties.
66-77- {1 Quick Start}
88-99- {[
1010- (* Hash a string *)
1111- let hash = Xxhash.hash64_string "Hello, World!"
1212-1313- (* Hash bytes with explicit range *)
1414- let bytes = Bytes.of_string "Hello, World!"
1515- let hash = Xxhash.hash64 bytes ~pos:0 ~len:13
1616-1717- (* Use streaming API for large data *)
1818- let state = Xxhash.create_state () in
1919- Xxhash.update state chunk1 ~pos:0 ~len:(Bytes.length chunk1);
2020- Xxhash.update state chunk2 ~pos:0 ~len:(Bytes.length chunk2);
2121- let hash = Xxhash.finalize state
2222- ]}
2323-2424- {1 Hash Variants}
2525-2626- - {!hash64}: 64-bit hash, best for general use
2727- - {!hash32}: Lower 32 bits of 64-bit hash (used by zstd)
2828-2929- {1 Streaming API}
3030-3131- For hashing data that doesn't fit in memory or arrives incrementally:
3232- - {!create_state}: Create a new streaming state
3333- - {!update}: Feed data into the state
3434- - {!finalize}: Get the final hash value *)
3535-3636-(** {1 One-shot Hashing} *)
3737-3838-val hash64 : ?seed:int64 -> bytes -> pos:int -> len:int -> int64
3939-(** [hash64 ?seed bytes ~pos ~len] computes the xxHash-64 of [len] bytes
4040- from [bytes] starting at [pos].
4141-4242- @param seed Optional seed value (default: 0) *)
4343-4444-val hash64_string : ?seed:int64 -> string -> int64
4545-(** [hash64_string ?seed s] computes the xxHash-64 of string [s]. *)
4646-4747-val hash32 : ?seed:int64 -> bytes -> pos:int -> len:int -> int32
4848-(** [hash32 ?seed bytes ~pos ~len] computes xxHash-64 and returns the
4949- lower 32 bits. This is the variant used by zstd for content checksums. *)
5050-5151-val hash32_string : ?seed:int64 -> string -> int32
5252-(** [hash32_string ?seed s] computes the lower 32 bits of xxHash-64. *)
5353-5454-(** {1 Streaming API} *)
5555-5656-(** Streaming hasher state. *)
5757-type state
5858-5959-val create_state : ?seed:int64 -> unit -> state
6060-(** [create_state ?seed ()] creates a new streaming hash state.
6161-6262- @param seed Optional seed value (default: 0) *)
6363-6464-val reset : ?seed:int64 -> state -> unit
6565-(** [reset ?seed state] resets the state for reuse with a new hash.
6666-6767- @param seed Optional new seed value (default: 0) *)
6868-6969-val update : state -> bytes -> pos:int -> len:int -> unit
7070-(** [update state bytes ~pos ~len] feeds [len] bytes from [bytes]
7171- starting at [pos] into the hash state.
7272-7373- Can be called multiple times to hash data incrementally. *)
7474-7575-val update_string : state -> string -> unit
7676-(** [update_string state s] feeds string [s] into the hash state. *)
7777-7878-val finalize : state -> int64
7979-(** [finalize state] returns the 64-bit hash value.
8080-8181- The state can still be used after finalization - subsequent calls
8282- to {!finalize} return the same value until {!update} is called. *)
8383-8484-val finalize32 : state -> int32
8585-(** [finalize32 state] returns the lower 32 bits of the hash. *)
8686-8787-(** {1 Utilities} *)
8888-8989-val copy_state : state -> state
9090-(** [copy_state state] creates an independent copy of the hash state.
9191- Useful for computing hashes of data with common prefixes. *)