Pure OCaml xxhash implementation

Remove ocaml-mlxxhash for fork

-845
-1
ocaml-mlxxhash/.gitignore
··· 1 - _build
-1
ocaml-mlxxhash/dune
··· 1 - (vendored_dirs vendor)
-19
ocaml-mlxxhash/dune-project
··· 1 - (lang dune 3.21) 2 - (name mlxxhash) 3 - 4 - (generate_opam_files true) 5 - 6 - (license ISC) 7 - (authors "Anil Madhavapeddy <anil@recoil.org>") 8 - (maintainers "Anil Madhavapeddy <anil@recoil.org>") 9 - (source (tangled anil.recoil.org/ocaml-xxhash)) 10 - 11 - (package 12 - (name mlxxhash) 13 - (synopsis "Pure OCaml implementation of xxHash-64") 14 - (description 15 - "A pure OCaml implementation of the xxHash-64 non-cryptographic hash algorithm. 16 - Provides both one-shot and streaming APIs for hashing bytes and strings.") 17 - (depends 18 - (ocaml (>= 5.2.0)) 19 - (alcotest (and :with-test (>= 1.7.0)))))
-33
ocaml-mlxxhash/mlxxhash.opam
··· 1 - # This file is generated by dune, edit dune-project instead 2 - opam-version: "2.0" 3 - synopsis: "Pure OCaml implementation of xxHash-64" 4 - description: """ 5 - A pure OCaml implementation of the xxHash-64 non-cryptographic hash algorithm. 6 - Provides both one-shot and streaming APIs for hashing bytes and strings.""" 7 - maintainer: ["Anil Madhavapeddy <anil@recoil.org>"] 8 - authors: ["Anil Madhavapeddy <anil@recoil.org>"] 9 - license: "ISC" 10 - homepage: "https://tangled.org/anil.recoil.org/ocaml-xxhash" 11 - bug-reports: "https://tangled.org/anil.recoil.org/ocaml-xxhash/issues" 12 - depends: [ 13 - "dune" {>= "3.21"} 14 - "ocaml" {>= "5.2.0"} 15 - "alcotest" {with-test & >= "1.7.0"} 16 - "odoc" {with-doc} 17 - ] 18 - build: [ 19 - ["dune" "subst"] {dev} 20 - [ 21 - "dune" 22 - "build" 23 - "-p" 24 - name 25 - "-j" 26 - jobs 27 - "@install" 28 - "@runtest" {with-test} 29 - "@doc" {with-doc} 30 - ] 31 - ] 32 - dev-repo: "git+https://tangled.org/anil.recoil.org/ocaml-xxhash" 33 - x-maintenance-intent: ["(latest)"]
-3
ocaml-mlxxhash/src/dune
··· 1 - (library 2 - (name xxhash) 3 - (public_name mlxxhash))
-264
ocaml-mlxxhash/src/xxhash.ml
··· 1 - (** xxHash-64 - Pure OCaml implementation. 2 - 3 - This implements the xxHash64 algorithm designed by Yann Collet. 4 - xxHash is an extremely fast non-cryptographic hash algorithm with 5 - excellent distribution properties. *) 6 - 7 - (* Constants *) 8 - let prime64_1 = 0x9E3779B185EBCA87L 9 - let prime64_2 = 0xC2B2AE3D27D4EB4FL 10 - let prime64_3 = 0x165667B19E3779F9L 11 - let prime64_4 = 0x85EBCA77C2B2AE63L 12 - let prime64_5 = 0x27D4EB2F165667C5L 13 - 14 - (* Helper functions *) 15 - let[@inline] rotl64 x r = 16 - Int64.(logor (shift_left x r) (shift_right_logical x (64 - r))) 17 - 18 - let[@inline] mix1 acc v = 19 - let open Int64 in 20 - let acc = add acc (mul v prime64_2) in 21 - let acc = rotl64 acc 31 in 22 - mul acc prime64_1 23 - 24 - let[@inline] mix2 acc v = 25 - let open Int64 in 26 - let v = mul v prime64_2 in 27 - let v = rotl64 v 31 in 28 - let v = mul v prime64_1 in 29 - let acc = logxor acc v in 30 - add (mul acc prime64_1) prime64_4 31 - 32 - let[@inline] avalanche h = 33 - let open Int64 in 34 - let h = logxor h (shift_right_logical h 33) in 35 - let h = mul h prime64_2 in 36 - let h = logxor h (shift_right_logical h 29) in 37 - let h = mul h prime64_3 in 38 - logxor h (shift_right_logical h 32) 39 - 40 - (** Compute xxHash-64 of bytes with given seed *) 41 - let hash64 ?(seed=0L) src ~pos ~len = 42 - let open Int64 in 43 - let end_pos = pos + len in 44 - 45 - let h = ref ( 46 - if len >= 32 then begin 47 - (* Initialize accumulators *) 48 - let v1 = ref (add (add seed prime64_1) prime64_2) in 49 - let v2 = ref (add seed prime64_2) in 50 - let v3 = ref seed in 51 - let v4 = ref (sub seed prime64_1) in 52 - 53 - (* Process 32-byte blocks *) 54 - let p = ref pos in 55 - while !p + 32 <= end_pos do 56 - v1 := mix1 !v1 (Bytes.get_int64_le src !p); 57 - v2 := mix1 !v2 (Bytes.get_int64_le src (!p + 8)); 58 - v3 := mix1 !v3 (Bytes.get_int64_le src (!p + 16)); 59 - v4 := mix1 !v4 (Bytes.get_int64_le src (!p + 24)); 60 - p := !p + 32 61 - done; 62 - 63 - (* Merge accumulators *) 64 - let h = add 65 - (add (rotl64 !v1 1) (rotl64 !v2 7)) 66 - (add (rotl64 !v3 12) (rotl64 !v4 18)) in 67 - let h = mix2 h !v1 in 68 - let h = mix2 h !v2 in 69 - let h = mix2 h !v3 in 70 - mix2 h !v4 71 - end else 72 - add seed prime64_5 73 - ) in 74 - 75 - h := add !h (of_int len); 76 - 77 - (* Process remaining 8-byte chunks *) 78 - let p = ref (if len >= 32 then pos + (len / 32) * 32 else pos) in 79 - while !p + 8 <= end_pos do 80 - let k = Bytes.get_int64_le src !p in 81 - let k = mul k prime64_2 in 82 - let k = rotl64 k 31 in 83 - let k = mul k prime64_1 in 84 - h := logxor !h k; 85 - h := rotl64 !h 27; 86 - h := add (mul !h prime64_1) prime64_4; 87 - p := !p + 8 88 - done; 89 - 90 - (* Process remaining 4-byte chunk *) 91 - if !p + 4 <= end_pos then begin 92 - let k = of_int (Bytes.get_int32_le src !p |> Int32.to_int) in 93 - let k = logand k 0xFFFFFFFFL in (* Make unsigned *) 94 - h := logxor !h (mul k prime64_1); 95 - h := rotl64 !h 23; 96 - h := add (mul !h prime64_2) prime64_3; 97 - p := !p + 4 98 - end; 99 - 100 - (* Process remaining bytes *) 101 - while !p < end_pos do 102 - let k = of_int (Bytes.get_uint8 src !p) in 103 - h := logxor !h (mul k prime64_5); 104 - h := rotl64 !h 11; 105 - h := mul !h prime64_1; 106 - incr p 107 - done; 108 - 109 - avalanche !h 110 - 111 - let hash64_string ?seed s = 112 - let src = Bytes.unsafe_of_string s in 113 - hash64 ?seed src ~pos:0 ~len:(String.length s) 114 - 115 - (** Compute xxHash-64 and return lower 32 bits (for zstd checksum) *) 116 - let hash32 ?seed src ~pos ~len = 117 - let h = hash64 ?seed src ~pos ~len in 118 - Int64.to_int32 (Int64.logand h 0xFFFFFFFFL) 119 - 120 - let hash32_string ?seed s = 121 - let src = Bytes.unsafe_of_string s in 122 - hash32 ?seed src ~pos:0 ~len:(String.length s) 123 - 124 - (** Streaming hasher state *) 125 - type state = { 126 - mutable v1 : int64; 127 - mutable v2 : int64; 128 - mutable v3 : int64; 129 - mutable v4 : int64; 130 - mutable total_len : int; 131 - buffer : bytes; 132 - mutable buf_len : int; 133 - seed : int64; 134 - } 135 - 136 - let create_state ?(seed=0L) () = 137 - let open Int64 in 138 - { 139 - v1 = add (add seed prime64_1) prime64_2; 140 - v2 = add seed prime64_2; 141 - v3 = seed; 142 - v4 = sub seed prime64_1; 143 - total_len = 0; 144 - buffer = Bytes.create 32; 145 - buf_len = 0; 146 - seed; 147 - } 148 - 149 - let reset ?(seed=0L) state = 150 - let open Int64 in 151 - state.v1 <- add (add seed prime64_1) prime64_2; 152 - state.v2 <- add seed prime64_2; 153 - state.v3 <- seed; 154 - state.v4 <- sub seed prime64_1; 155 - state.total_len <- 0; 156 - state.buf_len <- 0 157 - 158 - let copy_state state = 159 - { 160 - v1 = state.v1; 161 - v2 = state.v2; 162 - v3 = state.v3; 163 - v4 = state.v4; 164 - total_len = state.total_len; 165 - buffer = Bytes.copy state.buffer; 166 - buf_len = state.buf_len; 167 - seed = state.seed; 168 - } 169 - 170 - let update state src ~pos ~len = 171 - let end_pos = pos + len in 172 - state.total_len <- state.total_len + len; 173 - 174 - let p = ref pos in 175 - 176 - (* Fill buffer if we have partial data *) 177 - if state.buf_len > 0 then begin 178 - let to_copy = min (32 - state.buf_len) len in 179 - Bytes.blit src !p state.buffer state.buf_len to_copy; 180 - state.buf_len <- state.buf_len + to_copy; 181 - p := !p + to_copy; 182 - 183 - if state.buf_len = 32 then begin 184 - state.v1 <- mix1 state.v1 (Bytes.get_int64_le state.buffer 0); 185 - state.v2 <- mix1 state.v2 (Bytes.get_int64_le state.buffer 8); 186 - state.v3 <- mix1 state.v3 (Bytes.get_int64_le state.buffer 16); 187 - state.v4 <- mix1 state.v4 (Bytes.get_int64_le state.buffer 24); 188 - state.buf_len <- 0 189 - end 190 - end; 191 - 192 - (* Process 32-byte blocks *) 193 - while !p + 32 <= end_pos do 194 - state.v1 <- mix1 state.v1 (Bytes.get_int64_le src !p); 195 - state.v2 <- mix1 state.v2 (Bytes.get_int64_le src (!p + 8)); 196 - state.v3 <- mix1 state.v3 (Bytes.get_int64_le src (!p + 16)); 197 - state.v4 <- mix1 state.v4 (Bytes.get_int64_le src (!p + 24)); 198 - p := !p + 32 199 - done; 200 - 201 - (* Buffer remaining *) 202 - if !p < end_pos then begin 203 - let remaining = end_pos - !p in 204 - Bytes.blit src !p state.buffer state.buf_len remaining; 205 - state.buf_len <- state.buf_len + remaining 206 - end 207 - 208 - let update_string state s = 209 - let src = Bytes.unsafe_of_string s in 210 - update state src ~pos:0 ~len:(String.length s) 211 - 212 - let finalize state = 213 - let open Int64 in 214 - 215 - let h = ref ( 216 - if state.total_len >= 32 then begin 217 - let h = add 218 - (add (rotl64 state.v1 1) (rotl64 state.v2 7)) 219 - (add (rotl64 state.v3 12) (rotl64 state.v4 18)) in 220 - let h = mix2 h state.v1 in 221 - let h = mix2 h state.v2 in 222 - let h = mix2 h state.v3 in 223 - mix2 h state.v4 224 - end else 225 - add state.v3 prime64_5 (* v3 holds seed *) 226 - ) in 227 - 228 - h := add !h (of_int state.total_len); 229 - 230 - (* Process buffered data *) 231 - let p = ref 0 in 232 - while !p + 8 <= state.buf_len do 233 - let k = Bytes.get_int64_le state.buffer !p in 234 - let k = mul k prime64_2 in 235 - let k = rotl64 k 31 in 236 - let k = mul k prime64_1 in 237 - h := logxor !h k; 238 - h := rotl64 !h 27; 239 - h := add (mul !h prime64_1) prime64_4; 240 - p := !p + 8 241 - done; 242 - 243 - if !p + 4 <= state.buf_len then begin 244 - let k = of_int (Bytes.get_int32_le state.buffer !p |> Int32.to_int) in 245 - let k = logand k 0xFFFFFFFFL in 246 - h := logxor !h (mul k prime64_1); 247 - h := rotl64 !h 23; 248 - h := add (mul !h prime64_2) prime64_3; 249 - p := !p + 4 250 - end; 251 - 252 - while !p < state.buf_len do 253 - let k = of_int (Bytes.get_uint8 state.buffer !p) in 254 - h := logxor !h (mul k prime64_5); 255 - h := rotl64 !h 11; 256 - h := mul !h prime64_1; 257 - incr p 258 - done; 259 - 260 - avalanche !h 261 - 262 - let finalize32 state = 263 - let h = finalize state in 264 - Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)
-91
ocaml-mlxxhash/src/xxhash.mli
··· 1 - (** xxHash - Fast non-cryptographic hash functions. 2 - 3 - This is a pure OCaml implementation of the xxHash family of hash functions, 4 - originally designed by Yann Collet. xxHash provides extremely fast hashing 5 - with excellent distribution properties. 6 - 7 - {1 Quick Start} 8 - 9 - {[ 10 - (* Hash a string *) 11 - let hash = Xxhash.hash64_string "Hello, World!" 12 - 13 - (* Hash bytes with explicit range *) 14 - let bytes = Bytes.of_string "Hello, World!" 15 - let hash = Xxhash.hash64 bytes ~pos:0 ~len:13 16 - 17 - (* Use streaming API for large data *) 18 - let state = Xxhash.create_state () in 19 - Xxhash.update state chunk1 ~pos:0 ~len:(Bytes.length chunk1); 20 - Xxhash.update state chunk2 ~pos:0 ~len:(Bytes.length chunk2); 21 - let hash = Xxhash.finalize state 22 - ]} 23 - 24 - {1 Hash Variants} 25 - 26 - - {!hash64}: 64-bit hash, best for general use 27 - - {!hash32}: Lower 32 bits of 64-bit hash (used by zstd) 28 - 29 - {1 Streaming API} 30 - 31 - For hashing data that doesn't fit in memory or arrives incrementally: 32 - - {!create_state}: Create a new streaming state 33 - - {!update}: Feed data into the state 34 - - {!finalize}: Get the final hash value *) 35 - 36 - (** {1 One-shot Hashing} *) 37 - 38 - val hash64 : ?seed:int64 -> bytes -> pos:int -> len:int -> int64 39 - (** [hash64 ?seed bytes ~pos ~len] computes the xxHash-64 of [len] bytes 40 - from [bytes] starting at [pos]. 41 - 42 - @param seed Optional seed value (default: 0) *) 43 - 44 - val hash64_string : ?seed:int64 -> string -> int64 45 - (** [hash64_string ?seed s] computes the xxHash-64 of string [s]. *) 46 - 47 - val hash32 : ?seed:int64 -> bytes -> pos:int -> len:int -> int32 48 - (** [hash32 ?seed bytes ~pos ~len] computes xxHash-64 and returns the 49 - lower 32 bits. This is the variant used by zstd for content checksums. *) 50 - 51 - val hash32_string : ?seed:int64 -> string -> int32 52 - (** [hash32_string ?seed s] computes the lower 32 bits of xxHash-64. *) 53 - 54 - (** {1 Streaming API} *) 55 - 56 - (** Streaming hasher state. *) 57 - type state 58 - 59 - val create_state : ?seed:int64 -> unit -> state 60 - (** [create_state ?seed ()] creates a new streaming hash state. 61 - 62 - @param seed Optional seed value (default: 0) *) 63 - 64 - val reset : ?seed:int64 -> state -> unit 65 - (** [reset ?seed state] resets the state for reuse with a new hash. 66 - 67 - @param seed Optional new seed value (default: 0) *) 68 - 69 - val update : state -> bytes -> pos:int -> len:int -> unit 70 - (** [update state bytes ~pos ~len] feeds [len] bytes from [bytes] 71 - starting at [pos] into the hash state. 72 - 73 - Can be called multiple times to hash data incrementally. *) 74 - 75 - val update_string : state -> string -> unit 76 - (** [update_string state s] feeds string [s] into the hash state. *) 77 - 78 - val finalize : state -> int64 79 - (** [finalize state] returns the 64-bit hash value. 80 - 81 - The state can still be used after finalization - subsequent calls 82 - to {!finalize} return the same value until {!update} is called. *) 83 - 84 - val finalize32 : state -> int32 85 - (** [finalize32 state] returns the lower 32 bits of the hash. *) 86 - 87 - (** {1 Utilities} *) 88 - 89 - val copy_state : state -> state 90 - (** [copy_state state] creates an independent copy of the hash state. 91 - Useful for computing hashes of data with common prefixes. *)
-3
ocaml-mlxxhash/test/dune
··· 1 - (test 2 - (name test_xxhash) 3 - (libraries xxhash alcotest))
-430
ocaml-mlxxhash/test/test_xxhash.ml
··· 1 - (** Tests for xxHash-64 implementation. 2 - 3 - This test suite verifies: 4 - 1. Internal consistency (streaming vs one-shot produce same results) 5 - 2. Known reference values from the official xxHash test vectors 6 - 3. Boundary conditions (32-byte blocks, various lengths) 7 - 4. Reference validation against the C implementation test suite *) 8 - 9 - (* ===== Reference Test Buffer Generation ===== 10 - 11 - The official xxHash test suite uses a deterministic pseudorandom buffer. 12 - This must match exactly: 13 - 14 - PRIME32 = 2654435761 15 - PRIME64 = 11400714785074694797 16 - 17 - buffer[i] = (byteGen >> 56) & 0xFF 18 - byteGen *= PRIME64 19 - 20 - Starting with byteGen = PRIME32 *) 21 - 22 - let prime32 = 2654435761L (* 0x9E3779B1 as unsigned 32-bit *) 23 - (* PRIME64 for test buffer = 11400714785074694797 = 0x9e3779b185ebca8d *) 24 - (* Note: This is different from the xxHash algorithm's prime constants! *) 25 - let prime64_gen = 0x9e3779b185ebca8dL 26 - 27 - (** Generate the reference test buffer used by the xxHash test suite *) 28 - let fill_test_buffer len = 29 - let buf = Bytes.create len in 30 - let rec loop i gen = 31 - if i >= len then buf 32 - else begin 33 - Bytes.set_uint8 buf i (Int64.(to_int (shift_right_logical gen 56))); 34 - loop (i + 1) Int64.(mul gen prime64_gen) 35 - end 36 - in 37 - loop 0 prime32 38 - 39 - (* ===== Official XXH64 Test Vectors ===== 40 - 41 - Format: (len, seed, expected_hash) 42 - From vendor/git/xxHash/tests/sanity_test_vectors.h *) 43 - 44 - (* Comprehensive test vectors from official xxHash test suite. 45 - Format: (length, seed, expected_hash) 46 - All lengths from 0 to 128, plus key lengths 256, 512, 1024, 4096. *) 47 - let xxh64_test_vectors = [ 48 - (* Lengths 0-31: small inputs < block size *) 49 - (0, 0x0000000000000000L, 0xEF46DB3751D8E999L); 50 - (0, 0x000000009E3779B1L, 0xAC75FDA2929B17EFL); 51 - (1, 0x0000000000000000L, 0xE934A84ADB052768L); 52 - (1, 0x000000009E3779B1L, 0x5014607643A9B4C3L); 53 - (2, 0x0000000000000000L, 0x5D48CD60A77E23FFL); 54 - (2, 0x000000009E3779B1L, 0x9E93152232D54A39L); 55 - (3, 0x0000000000000000L, 0xFF7E1959CB50794AL); 56 - (3, 0x000000009E3779B1L, 0xAA8584E83660F7D1L); 57 - (4, 0x0000000000000000L, 0x9136A0DCA57457EEL); 58 - (4, 0x000000009E3779B1L, 0xCAAB286BD8E9FDB5L); 59 - (5, 0x0000000000000000L, 0x9B046FB1397F09A5L); 60 - (5, 0x000000009E3779B1L, 0x2AF5249930F984ECL); 61 - (6, 0x0000000000000000L, 0xC72565B7154268A8L); 62 - (6, 0x000000009E3779B1L, 0xCA4C6723580E8EF6L); 63 - (7, 0x0000000000000000L, 0x6C83909A9F01ED25L); 64 - (7, 0x000000009E3779B1L, 0xF98D03B1AD6F9293L); 65 - (8, 0x0000000000000000L, 0xCDBCF538E71D1348L); 66 - (8, 0x000000009E3779B1L, 0xFE0C047A5353CDACL); 67 - (9, 0x0000000000000000L, 0x554B1AE991EDA6B6L); 68 - (9, 0x000000009E3779B1L, 0x7908265248F6D73FL); 69 - (10, 0x0000000000000000L, 0x5D00E7351392EA84L); 70 - (10, 0x000000009E3779B1L, 0x2A8AE16B86CD2F12L); 71 - (11, 0x0000000000000000L, 0x6345D5746F35DA70L); 72 - (11, 0x000000009E3779B1L, 0xEAA08A8C8BE3CCCFL); 73 - (12, 0x0000000000000000L, 0x0723BF50086EAD9AL); 74 - (12, 0x000000009E3779B1L, 0x8252819F4E506951L); 75 - (13, 0x0000000000000000L, 0xC2E5013E3C40BCF7L); 76 - (13, 0x000000009E3779B1L, 0x4DF437A291CB1039L); 77 - (14, 0x0000000000000000L, 0x8282DCC4994E35C8L); 78 - (14, 0x000000009E3779B1L, 0xC3BD6BF63DEB6DF0L); 79 - (15, 0x0000000000000000L, 0x180719316D622D84L); 80 - (15, 0x000000009E3779B1L, 0xD61105C20E91F99FL); 81 - (16, 0x0000000000000000L, 0x98C90B57FDFCB55CL); 82 - (16, 0x000000009E3779B1L, 0xC900AD2D536B607EL); 83 - (17, 0x0000000000000000L, 0x0D39A2D051A30C2CL); 84 - (17, 0x000000009E3779B1L, 0x495CD68A647C7A22L); 85 - (18, 0x0000000000000000L, 0x33E84A4333B2B2EBL); 86 - (18, 0x000000009E3779B1L, 0x2325A30CCA1A66DDL); 87 - (19, 0x0000000000000000L, 0xE91C6EF31FC08F82L); 88 - (19, 0x000000009E3779B1L, 0x06809662799B7D6FL); 89 - (20, 0x0000000000000000L, 0x5F8C68355769439EL); 90 - (20, 0x000000009E3779B1L, 0x97218696C2D29602L); 91 - (21, 0x0000000000000000L, 0x42B0B8EE353AC461L); 92 - (21, 0x000000009E3779B1L, 0x7FC0BB451B83A633L); 93 - (22, 0x0000000000000000L, 0x65C935C6978098B1L); 94 - (22, 0x000000009E3779B1L, 0xC4A0DD14BF835C13L); 95 - (23, 0x0000000000000000L, 0xD2460ECC840B74DDL); 96 - (23, 0x000000009E3779B1L, 0x4B44E8DE7A396773L); 97 - (24, 0x0000000000000000L, 0xF75A6DEA42DC5BF4L); 98 - (24, 0x000000009E3779B1L, 0x8B7C67EB59778E22L); 99 - (25, 0x0000000000000000L, 0x52FAA43C3F20B994L); 100 - (25, 0x000000009E3779B1L, 0xC4FEC92EAC2C3B8AL); 101 - (26, 0x0000000000000000L, 0x8DB7831EC345F9A3L); 102 - (26, 0x000000009E3779B1L, 0x2C2A80BCAD321466L); 103 - (27, 0x0000000000000000L, 0x88945AA08051FC2DL); 104 - (27, 0x000000009E3779B1L, 0x3401AF8EF28FD410L); 105 - (28, 0x0000000000000000L, 0x64CD9E8C96A9E2DDL); 106 - (28, 0x000000009E3779B1L, 0x8160FB8C20B48287L); 107 - (29, 0x0000000000000000L, 0x8C8F345B634AC2B9L); 108 - (29, 0x000000009E3779B1L, 0x5A327C78E4AD6678L); 109 - (30, 0x0000000000000000L, 0xE2677241D4C46CAFL); 110 - (30, 0x000000009E3779B1L, 0xB1B2B51C93AF4866L); 111 - (31, 0x0000000000000000L, 0x299B39A290E6D783L); 112 - (31, 0x000000009E3779B1L, 0xDA673D5FEB5C1D79L); 113 - (* Lengths 32-64: one to two blocks *) 114 - (32, 0x0000000000000000L, 0x18B216492BB44B70L); 115 - (32, 0x000000009E3779B1L, 0xB3F33BDF93ADE409L); 116 - (33, 0x0000000000000000L, 0x55C8DC3E578F5B59L); 117 - (33, 0x000000009E3779B1L, 0xE92C292F64BC3071L); 118 - (48, 0x0000000000000000L, 0xFD0FEEAC7A939933L); 119 - (48, 0x000000009E3779B1L, 0x6FFE2F43A24C2302L); 120 - (63, 0x0000000000000000L, 0xA9EFBE0FA0F3F4E7L); 121 - (63, 0x000000009E3779B1L, 0x6C911FADB05B6FC2L); 122 - (64, 0x0000000000000000L, 0xEF558F8ACAC2B5CDL); 123 - (64, 0x000000009E3779B1L, 0xB5EEBA99264CC44FL); 124 - (* Lengths 65-128: two to four blocks *) 125 - (65, 0x0000000000000000L, 0xDE0F20DC2631AF7AL); 126 - (65, 0x000000009E3779B1L, 0xD3F6FF3941E310CAL); 127 - (96, 0x0000000000000000L, 0x105064E743EDD1D9L); 128 - (96, 0x000000009E3779B1L, 0x8FF0B4ABEE6F03CCL); 129 - (100, 0x0000000000000000L, 0x4BFE019CD91D9EA4L); 130 - (100, 0x000000009E3779B1L, 0x4853706DC9625CAEL); 131 - (127, 0x0000000000000000L, 0x3C7A21119AA662B0L); 132 - (127, 0x000000009E3779B1L, 0xB0D6DC189C06CEEDL); 133 - (128, 0x0000000000000000L, 0x90CA021457D96DC5L); 134 - (128, 0x000000009E3779B1L, 0xED9340A202BCD1CFL); 135 - (* Larger sizes: multiple blocks *) 136 - (256, 0x0000000000000000L, 0x5E3F5BF94D574981L); 137 - (256, 0x000000009E3779B1L, 0x34733CBD9CC1B0D5L); 138 - (512, 0x0000000000000000L, 0x4358D2FDD62B58A7L); 139 - (512, 0x000000009E3779B1L, 0x0DED69C4804C47BAL); 140 - (1024, 0x0000000000000000L, 0x4775BF7CACE4D177L); 141 - (1024, 0x000000009E3779B1L, 0x238CF9296898B465L); 142 - (4096, 0x0000000000000000L, 0xAB77F4AF85F4E70BL); 143 - (4096, 0x000000009E3779B1L, 0xCB8B60CBA513125DL); 144 - ] 145 - 146 - (* Create test buffer once - large enough for all tests *) 147 - let test_buffer = fill_test_buffer 4200 148 - 149 - (* Known reference value: xxhash64("") with seed 0 *) 150 - let test_empty_string () = 151 - let hash = Xxhash.hash64_string "" in 152 - Alcotest.(check int64) "empty string" 0xef46db3751d8e999L hash 153 - 154 - (* Consistency tests - verify streaming and one-shot produce same results *) 155 - let test_consistency_short () = 156 - let s = "Hello" in 157 - let direct = Xxhash.hash64_string s in 158 - let state = Xxhash.create_state () in 159 - Xxhash.update_string state s; 160 - let streaming = Xxhash.finalize state in 161 - Alcotest.(check int64) "short string consistency" direct streaming 162 - 163 - let test_consistency_medium () = 164 - let s = "Hello, World!" in 165 - let direct = Xxhash.hash64_string s in 166 - let state = Xxhash.create_state () in 167 - Xxhash.update_string state s; 168 - let streaming = Xxhash.finalize state in 169 - Alcotest.(check int64) "medium string consistency" direct streaming 170 - 171 - let test_consistency_with_seed () = 172 - let s = "test data" in 173 - let seed = 12345L in 174 - let direct = Xxhash.hash64_string ~seed s in 175 - let state = Xxhash.create_state ~seed () in 176 - Xxhash.update_string state s; 177 - let streaming = Xxhash.finalize state in 178 - Alcotest.(check int64) "consistency with seed" direct streaming 179 - 180 - (* Boundary condition: exactly 32 bytes (one block) *) 181 - let test_32_bytes () = 182 - let s = String.make 32 'x' in 183 - let direct = Xxhash.hash64_string s in 184 - let state = Xxhash.create_state () in 185 - Xxhash.update_string state s; 186 - let streaming = Xxhash.finalize state in 187 - Alcotest.(check int64) "32 bytes consistency" direct streaming 188 - 189 - (* Boundary condition: 33 bytes (one block + 1 byte) *) 190 - let test_33_bytes () = 191 - let s = String.make 33 'y' in 192 - let direct = Xxhash.hash64_string s in 193 - let state = Xxhash.create_state () in 194 - Xxhash.update_string state s; 195 - let streaming = Xxhash.finalize state in 196 - Alcotest.(check int64) "33 bytes consistency" direct streaming 197 - 198 - (* Boundary condition: 64 bytes (two blocks) *) 199 - let test_64_bytes () = 200 - let s = String.make 64 'a' in 201 - let direct = Xxhash.hash64_string s in 202 - let state = Xxhash.create_state () in 203 - Xxhash.update_string state s; 204 - let streaming = Xxhash.finalize state in 205 - Alcotest.(check int64) "64 bytes consistency" direct streaming 206 - 207 - (* hash32 consistency *) 208 - let test_hash32_consistency () = 209 - let s = "Hello, World!" in 210 - let hash64 = Xxhash.hash64_string s in 211 - let hash32 = Xxhash.hash32_string s in 212 - let expected32 = Int64.to_int32 (Int64.logand hash64 0xFFFFFFFFL) in 213 - Alcotest.(check int32) "hash32 is lower 32 bits" expected32 hash32 214 - 215 - let test_streaming_chunks () = 216 - (* Hash in multiple chunks, should match single hash *) 217 - let state = Xxhash.create_state () in 218 - Xxhash.update_string state "Hello"; 219 - Xxhash.update_string state ", "; 220 - Xxhash.update_string state "World!"; 221 - let hash = Xxhash.finalize state in 222 - let direct = Xxhash.hash64_string "Hello, World!" in 223 - Alcotest.(check int64) "chunked streaming" direct hash 224 - 225 - let test_streaming_byte_at_a_time () = 226 - let s = "Hello, World!" in 227 - let state = Xxhash.create_state () in 228 - String.iter (fun c -> 229 - Xxhash.update_string state (String.make 1 c) 230 - ) s; 231 - let hash = Xxhash.finalize state in 232 - let direct = Xxhash.hash64_string s in 233 - Alcotest.(check int64) "byte-at-a-time" direct hash 234 - 235 - let test_streaming_large () = 236 - (* Large data in chunks *) 237 - let chunk = String.make 100 'z' in 238 - let state = Xxhash.create_state () in 239 - for _ = 1 to 10 do 240 - Xxhash.update_string state chunk 241 - done; 242 - let hash = Xxhash.finalize state in 243 - let direct = Xxhash.hash64_string (String.make 1000 'z') in 244 - Alcotest.(check int64) "large streaming" direct hash 245 - 246 - let test_streaming_across_boundary () = 247 - (* Feed data that crosses 32-byte block boundaries *) 248 - let state = Xxhash.create_state () in 249 - Xxhash.update_string state (String.make 20 'a'); 250 - Xxhash.update_string state (String.make 20 'b'); 251 - Xxhash.update_string state (String.make 20 'c'); 252 - let hash = Xxhash.finalize state in 253 - let direct = Xxhash.hash64_string (String.make 20 'a' ^ String.make 20 'b' ^ String.make 20 'c') in 254 - Alcotest.(check int64) "across boundary" direct hash 255 - 256 - let test_reset () = 257 - let state = Xxhash.create_state () in 258 - Xxhash.update_string state "first data"; 259 - let _ = Xxhash.finalize state in 260 - Xxhash.reset state; 261 - Xxhash.update_string state "second data"; 262 - let hash = Xxhash.finalize state in 263 - let direct = Xxhash.hash64_string "second data" in 264 - Alcotest.(check int64) "after reset" direct hash 265 - 266 - let test_reset_with_new_seed () = 267 - let state = Xxhash.create_state ~seed:111L () in 268 - Xxhash.update_string state "first"; 269 - let _ = Xxhash.finalize state in 270 - Xxhash.reset ~seed:222L state; 271 - Xxhash.update_string state "second"; 272 - let hash = Xxhash.finalize state in 273 - let direct = Xxhash.hash64_string ~seed:222L "second" in 274 - Alcotest.(check int64) "reset with new seed" direct hash 275 - 276 - let test_copy_state () = 277 - let state1 = Xxhash.create_state () in 278 - Xxhash.update_string state1 "Hello"; 279 - let state2 = Xxhash.copy_state state1 in 280 - Xxhash.update_string state1 ", World!"; 281 - Xxhash.update_string state2 " there"; 282 - let hash1 = Xxhash.finalize state1 in 283 - let hash2 = Xxhash.finalize state2 in 284 - let direct1 = Xxhash.hash64_string "Hello, World!" in 285 - let direct2 = Xxhash.hash64_string "Hello there" in 286 - Alcotest.(check int64) "original state" direct1 hash1; 287 - Alcotest.(check int64) "copied state" direct2 hash2 288 - 289 - let test_finalize32 () = 290 - let state = Xxhash.create_state () in 291 - Xxhash.update_string state "test"; 292 - let hash32 = Xxhash.finalize32 state in 293 - let hash64 = Xxhash.finalize state in 294 - let expected32 = Int64.to_int32 (Int64.logand hash64 0xFFFFFFFFL) in 295 - Alcotest.(check int32) "finalize32" expected32 hash32 296 - 297 - let test_bytes_api () = 298 - let s = "Hello, World!" in 299 - let bytes = Bytes.of_string s in 300 - let from_string = Xxhash.hash64_string s in 301 - let from_bytes = Xxhash.hash64 bytes ~pos:0 ~len:(Bytes.length bytes) in 302 - Alcotest.(check int64) "bytes API consistency" from_string from_bytes 303 - 304 - let test_bytes_partial () = 305 - let bytes = Bytes.of_string "XXXHello, World!YYY" in 306 - let hash = Xxhash.hash64 bytes ~pos:3 ~len:13 in 307 - let direct = Xxhash.hash64_string "Hello, World!" in 308 - Alcotest.(check int64) "partial bytes" direct hash 309 - 310 - (* Determinism test *) 311 - let test_deterministic () = 312 - let s = "The quick brown fox jumps over the lazy dog" in 313 - let hash1 = Xxhash.hash64_string s in 314 - let hash2 = Xxhash.hash64_string s in 315 - let hash3 = Xxhash.hash64_string s in 316 - Alcotest.(check int64) "hash1 = hash2" hash1 hash2; 317 - Alcotest.(check int64) "hash2 = hash3" hash2 hash3 318 - 319 - (* Different inputs produce different hashes *) 320 - let test_different_inputs () = 321 - let h1 = Xxhash.hash64_string "hello" in 322 - let h2 = Xxhash.hash64_string "Hello" in 323 - let h3 = Xxhash.hash64_string "hello " in 324 - Alcotest.(check bool) "hello != Hello" true (h1 <> h2); 325 - Alcotest.(check bool) "hello != 'hello '" true (h1 <> h3) 326 - 327 - (* Different seeds produce different hashes *) 328 - let test_different_seeds () = 329 - let s = "test" in 330 - let h1 = Xxhash.hash64_string ~seed:0L s in 331 - let h2 = Xxhash.hash64_string ~seed:1L s in 332 - let h3 = Xxhash.hash64_string ~seed:42L s in 333 - Alcotest.(check bool) "seed 0 != seed 1" true (h1 <> h2); 334 - Alcotest.(check bool) "seed 1 != seed 42" true (h2 <> h3) 335 - 336 - (* ===== Reference Validation Tests ===== *) 337 - 338 - (** Test against official xxHash test vectors *) 339 - let test_reference_vectors () = 340 - let failed = ref [] in 341 - List.iteri (fun i (len, seed, expected) -> 342 - let actual = Xxhash.hash64 ~seed test_buffer ~pos:0 ~len in 343 - if actual <> expected then 344 - failed := (i, len, seed, expected, actual) :: !failed 345 - ) xxh64_test_vectors; 346 - if !failed <> [] then begin 347 - List.iter (fun (i, len, seed, expected, actual) -> 348 - Printf.eprintf "FAIL test %d: len=%d seed=%016Lx expected=%016Lx got=%016Lx\n" 349 - i len seed expected actual 350 - ) (List.rev !failed); 351 - Alcotest.fail (Printf.sprintf "%d reference tests failed" (List.length !failed)) 352 - end 353 - 354 - (** Generate reference tests for streaming mode *) 355 - let test_reference_streaming () = 356 - let failed = ref [] in 357 - List.iteri (fun i (len, seed, expected) -> 358 - let state = Xxhash.create_state ~seed () in 359 - Xxhash.update state test_buffer ~pos:0 ~len; 360 - let actual = Xxhash.finalize state in 361 - if actual <> expected then 362 - failed := (i, len, seed, expected, actual) :: !failed 363 - ) xxh64_test_vectors; 364 - if !failed <> [] then begin 365 - List.iter (fun (i, len, seed, expected, actual) -> 366 - Printf.eprintf "FAIL streaming test %d: len=%d seed=%016Lx expected=%016Lx got=%016Lx\n" 367 - i len seed expected actual 368 - ) (List.rev !failed); 369 - Alcotest.fail (Printf.sprintf "%d streaming reference tests failed" (List.length !failed)) 370 - end 371 - 372 - (** Test streaming with byte-by-byte updates *) 373 - let test_reference_streaming_bytewise () = 374 - let failed = ref [] in 375 - List.iteri (fun i (len, seed, expected) -> 376 - let state = Xxhash.create_state ~seed () in 377 - for j = 0 to len - 1 do 378 - Xxhash.update state test_buffer ~pos:j ~len:1 379 - done; 380 - let actual = Xxhash.finalize state in 381 - if actual <> expected then 382 - failed := (i, len, seed, expected, actual) :: !failed 383 - ) xxh64_test_vectors; 384 - if !failed <> [] then begin 385 - List.iter (fun (i, len, seed, expected, actual) -> 386 - Printf.eprintf "FAIL bytewise test %d: len=%d seed=%016Lx expected=%016Lx got=%016Lx\n" 387 - i len seed expected actual 388 - ) (List.rev !failed); 389 - Alcotest.fail (Printf.sprintf "%d bytewise reference tests failed" (List.length !failed)) 390 - end 391 - 392 - let () = 393 - Alcotest.run "xxhash" [ 394 - "reference", [ 395 - Alcotest.test_case "empty string" `Quick test_empty_string; 396 - Alcotest.test_case "official vectors" `Quick test_reference_vectors; 397 - Alcotest.test_case "streaming vectors" `Quick test_reference_streaming; 398 - Alcotest.test_case "bytewise streaming" `Quick test_reference_streaming_bytewise; 399 - ]; 400 - "consistency", [ 401 - Alcotest.test_case "short" `Quick test_consistency_short; 402 - Alcotest.test_case "medium" `Quick test_consistency_medium; 403 - Alcotest.test_case "with seed" `Quick test_consistency_with_seed; 404 - Alcotest.test_case "hash32" `Quick test_hash32_consistency; 405 - Alcotest.test_case "bytes API" `Quick test_bytes_api; 406 - Alcotest.test_case "partial bytes" `Quick test_bytes_partial; 407 - ]; 408 - "boundaries", [ 409 - Alcotest.test_case "32 bytes" `Quick test_32_bytes; 410 - Alcotest.test_case "33 bytes" `Quick test_33_bytes; 411 - Alcotest.test_case "64 bytes" `Quick test_64_bytes; 412 - ]; 413 - "streaming", [ 414 - Alcotest.test_case "chunks" `Quick test_streaming_chunks; 415 - Alcotest.test_case "byte at a time" `Quick test_streaming_byte_at_a_time; 416 - Alcotest.test_case "large" `Quick test_streaming_large; 417 - Alcotest.test_case "across boundary" `Quick test_streaming_across_boundary; 418 - ]; 419 - "state", [ 420 - Alcotest.test_case "reset" `Quick test_reset; 421 - Alcotest.test_case "reset with seed" `Quick test_reset_with_new_seed; 422 - Alcotest.test_case "copy" `Quick test_copy_state; 423 - Alcotest.test_case "finalize32" `Quick test_finalize32; 424 - ]; 425 - "properties", [ 426 - Alcotest.test_case "deterministic" `Quick test_deterministic; 427 - Alcotest.test_case "different inputs" `Quick test_different_inputs; 428 - Alcotest.test_case "different seeds" `Quick test_different_seeds; 429 - ]; 430 - ]