(** xxHash-64 - Pure OCaml implementation. This implements the xxHash64 algorithm designed by Yann Collet. xxHash is an extremely fast non-cryptographic hash algorithm with excellent distribution properties. *) (* Constants *) let prime64_1 = 0x9E3779B185EBCA87L let prime64_2 = 0xC2B2AE3D27D4EB4FL let prime64_3 = 0x165667B19E3779F9L let prime64_4 = 0x85EBCA77C2B2AE63L let prime64_5 = 0x27D4EB2F165667C5L (* Helper functions *) let[@inline] rotl64 x r = Int64.(logor (shift_left x r) (shift_right_logical x (64 - r))) let[@inline] mix1 acc v = let open Int64 in let acc = add acc (mul v prime64_2) in let acc = rotl64 acc 31 in mul acc prime64_1 let[@inline] mix2 acc v = let open Int64 in let v = mul v prime64_2 in let v = rotl64 v 31 in let v = mul v prime64_1 in let acc = logxor acc v in add (mul acc prime64_1) prime64_4 let[@inline] avalanche h = let open Int64 in let h = logxor h (shift_right_logical h 33) in let h = mul h prime64_2 in let h = logxor h (shift_right_logical h 29) in let h = mul h prime64_3 in logxor h (shift_right_logical h 32) (** Compute xxHash-64 of bytes with given seed *) let hash64 ?(seed=0L) src ~pos ~len = let open Int64 in let end_pos = pos + len in let h = ref ( if len >= 32 then begin (* Initialize accumulators *) let v1 = ref (add (add seed prime64_1) prime64_2) in let v2 = ref (add seed prime64_2) in let v3 = ref seed in let v4 = ref (sub seed prime64_1) in (* Process 32-byte blocks *) let p = ref pos in while !p + 32 <= end_pos do v1 := mix1 !v1 (Bytes.get_int64_le src !p); v2 := mix1 !v2 (Bytes.get_int64_le src (!p + 8)); v3 := mix1 !v3 (Bytes.get_int64_le src (!p + 16)); v4 := mix1 !v4 (Bytes.get_int64_le src (!p + 24)); p := !p + 32 done; (* Merge accumulators *) let h = add (add (rotl64 !v1 1) (rotl64 !v2 7)) (add (rotl64 !v3 12) (rotl64 !v4 18)) in let h = mix2 h !v1 in let h = mix2 h !v2 in let h = mix2 h !v3 in mix2 h !v4 end else add seed prime64_5 ) in h := add !h (of_int len); (* Process remaining 8-byte chunks *) let p = ref (if len >= 32 then pos + (len / 32) * 32 else pos) in while !p + 8 <= end_pos do let k = Bytes.get_int64_le src !p in let k = mul k prime64_2 in let k = rotl64 k 31 in let k = mul k prime64_1 in h := logxor !h k; h := rotl64 !h 27; h := add (mul !h prime64_1) prime64_4; p := !p + 8 done; (* Process remaining 4-byte chunk *) if !p + 4 <= end_pos then begin let k = of_int (Bytes.get_int32_le src !p |> Int32.to_int) in let k = logand k 0xFFFFFFFFL in (* Make unsigned *) h := logxor !h (mul k prime64_1); h := rotl64 !h 23; h := add (mul !h prime64_2) prime64_3; p := !p + 4 end; (* Process remaining bytes *) while !p < end_pos do let k = of_int (Bytes.get_uint8 src !p) in h := logxor !h (mul k prime64_5); h := rotl64 !h 11; h := mul !h prime64_1; incr p done; avalanche !h let hash64_string ?seed s = let src = Bytes.unsafe_of_string s in hash64 ?seed src ~pos:0 ~len:(String.length s) (** Compute xxHash-64 and return lower 32 bits (for zstd checksum) *) let hash32 ?seed src ~pos ~len = let h = hash64 ?seed src ~pos ~len in Int64.to_int32 (Int64.logand h 0xFFFFFFFFL) let hash32_string ?seed s = let src = Bytes.unsafe_of_string s in hash32 ?seed src ~pos:0 ~len:(String.length s) (** Streaming hasher state *) type state = { mutable v1 : int64; mutable v2 : int64; mutable v3 : int64; mutable v4 : int64; mutable total_len : int; buffer : bytes; mutable buf_len : int; seed : int64; } let create_state ?(seed=0L) () = let open Int64 in { v1 = add (add seed prime64_1) prime64_2; v2 = add seed prime64_2; v3 = seed; v4 = sub seed prime64_1; total_len = 0; buffer = Bytes.create 32; buf_len = 0; seed; } let reset ?(seed=0L) state = let open Int64 in state.v1 <- add (add seed prime64_1) prime64_2; state.v2 <- add seed prime64_2; state.v3 <- seed; state.v4 <- sub seed prime64_1; state.total_len <- 0; state.buf_len <- 0 let copy_state state = { v1 = state.v1; v2 = state.v2; v3 = state.v3; v4 = state.v4; total_len = state.total_len; buffer = Bytes.copy state.buffer; buf_len = state.buf_len; seed = state.seed; } let update state src ~pos ~len = let end_pos = pos + len in state.total_len <- state.total_len + len; let p = ref pos in (* Fill buffer if we have partial data *) if state.buf_len > 0 then begin let to_copy = min (32 - state.buf_len) len in Bytes.blit src !p state.buffer state.buf_len to_copy; state.buf_len <- state.buf_len + to_copy; p := !p + to_copy; if state.buf_len = 32 then begin state.v1 <- mix1 state.v1 (Bytes.get_int64_le state.buffer 0); state.v2 <- mix1 state.v2 (Bytes.get_int64_le state.buffer 8); state.v3 <- mix1 state.v3 (Bytes.get_int64_le state.buffer 16); state.v4 <- mix1 state.v4 (Bytes.get_int64_le state.buffer 24); state.buf_len <- 0 end end; (* Process 32-byte blocks *) while !p + 32 <= end_pos do state.v1 <- mix1 state.v1 (Bytes.get_int64_le src !p); state.v2 <- mix1 state.v2 (Bytes.get_int64_le src (!p + 8)); state.v3 <- mix1 state.v3 (Bytes.get_int64_le src (!p + 16)); state.v4 <- mix1 state.v4 (Bytes.get_int64_le src (!p + 24)); p := !p + 32 done; (* Buffer remaining *) if !p < end_pos then begin let remaining = end_pos - !p in Bytes.blit src !p state.buffer state.buf_len remaining; state.buf_len <- state.buf_len + remaining end let update_string state s = let src = Bytes.unsafe_of_string s in update state src ~pos:0 ~len:(String.length s) let finalize state = let open Int64 in let h = ref ( if state.total_len >= 32 then begin let h = add (add (rotl64 state.v1 1) (rotl64 state.v2 7)) (add (rotl64 state.v3 12) (rotl64 state.v4 18)) in let h = mix2 h state.v1 in let h = mix2 h state.v2 in let h = mix2 h state.v3 in mix2 h state.v4 end else add state.v3 prime64_5 (* v3 holds seed *) ) in h := add !h (of_int state.total_len); (* Process buffered data *) let p = ref 0 in while !p + 8 <= state.buf_len do let k = Bytes.get_int64_le state.buffer !p in let k = mul k prime64_2 in let k = rotl64 k 31 in let k = mul k prime64_1 in h := logxor !h k; h := rotl64 !h 27; h := add (mul !h prime64_1) prime64_4; p := !p + 8 done; if !p + 4 <= state.buf_len then begin let k = of_int (Bytes.get_int32_le state.buffer !p |> Int32.to_int) in let k = logand k 0xFFFFFFFFL in h := logxor !h (mul k prime64_1); h := rotl64 !h 23; h := add (mul !h prime64_2) prime64_3; p := !p + 4 end; while !p < state.buf_len do let k = of_int (Bytes.get_uint8 state.buffer !p) in h := logxor !h (mul k prime64_5); h := rotl64 !h 11; h := mul !h prime64_1; incr p done; avalanche !h let finalize32 state = let h = finalize state in Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)