Pure OCaml xxhash implementation
at main 264 lines 7.3 kB view raw
1(** xxHash-64 - Pure OCaml implementation. 2 3 This implements the xxHash64 algorithm designed by Yann Collet. 4 xxHash is an extremely fast non-cryptographic hash algorithm with 5 excellent distribution properties. *) 6 7(* Constants *) 8let prime64_1 = 0x9E3779B185EBCA87L 9let prime64_2 = 0xC2B2AE3D27D4EB4FL 10let prime64_3 = 0x165667B19E3779F9L 11let prime64_4 = 0x85EBCA77C2B2AE63L 12let prime64_5 = 0x27D4EB2F165667C5L 13 14(* Helper functions *) 15let[@inline] rotl64 x r = 16 Int64.(logor (shift_left x r) (shift_right_logical x (64 - r))) 17 18let[@inline] mix1 acc v = 19 let open Int64 in 20 let acc = add acc (mul v prime64_2) in 21 let acc = rotl64 acc 31 in 22 mul acc prime64_1 23 24let[@inline] mix2 acc v = 25 let open Int64 in 26 let v = mul v prime64_2 in 27 let v = rotl64 v 31 in 28 let v = mul v prime64_1 in 29 let acc = logxor acc v in 30 add (mul acc prime64_1) prime64_4 31 32let[@inline] avalanche h = 33 let open Int64 in 34 let h = logxor h (shift_right_logical h 33) in 35 let h = mul h prime64_2 in 36 let h = logxor h (shift_right_logical h 29) in 37 let h = mul h prime64_3 in 38 logxor h (shift_right_logical h 32) 39 40(** Compute xxHash-64 of bytes with given seed *) 41let hash64 ?(seed=0L) src ~pos ~len = 42 let open Int64 in 43 let end_pos = pos + len in 44 45 let h = ref ( 46 if len >= 32 then begin 47 (* Initialize accumulators *) 48 let v1 = ref (add (add seed prime64_1) prime64_2) in 49 let v2 = ref (add seed prime64_2) in 50 let v3 = ref seed in 51 let v4 = ref (sub seed prime64_1) in 52 53 (* Process 32-byte blocks *) 54 let p = ref pos in 55 while !p + 32 <= end_pos do 56 v1 := mix1 !v1 (Bytes.get_int64_le src !p); 57 v2 := mix1 !v2 (Bytes.get_int64_le src (!p + 8)); 58 v3 := mix1 !v3 (Bytes.get_int64_le src (!p + 16)); 59 v4 := mix1 !v4 (Bytes.get_int64_le src (!p + 24)); 60 p := !p + 32 61 done; 62 63 (* Merge accumulators *) 64 let h = add 65 (add (rotl64 !v1 1) (rotl64 !v2 7)) 66 (add (rotl64 !v3 12) (rotl64 !v4 18)) in 67 let h = mix2 h !v1 in 68 let h = mix2 h !v2 in 69 let h = mix2 h !v3 in 70 mix2 h !v4 71 end else 72 add seed prime64_5 73 ) in 74 75 h := add !h (of_int len); 76 77 (* Process remaining 8-byte chunks *) 78 let p = ref (if len >= 32 then pos + (len / 32) * 32 else pos) in 79 while !p + 8 <= end_pos do 80 let k = Bytes.get_int64_le src !p in 81 let k = mul k prime64_2 in 82 let k = rotl64 k 31 in 83 let k = mul k prime64_1 in 84 h := logxor !h k; 85 h := rotl64 !h 27; 86 h := add (mul !h prime64_1) prime64_4; 87 p := !p + 8 88 done; 89 90 (* Process remaining 4-byte chunk *) 91 if !p + 4 <= end_pos then begin 92 let k = of_int (Bytes.get_int32_le src !p |> Int32.to_int) in 93 let k = logand k 0xFFFFFFFFL in (* Make unsigned *) 94 h := logxor !h (mul k prime64_1); 95 h := rotl64 !h 23; 96 h := add (mul !h prime64_2) prime64_3; 97 p := !p + 4 98 end; 99 100 (* Process remaining bytes *) 101 while !p < end_pos do 102 let k = of_int (Bytes.get_uint8 src !p) in 103 h := logxor !h (mul k prime64_5); 104 h := rotl64 !h 11; 105 h := mul !h prime64_1; 106 incr p 107 done; 108 109 avalanche !h 110 111let hash64_string ?seed s = 112 let src = Bytes.unsafe_of_string s in 113 hash64 ?seed src ~pos:0 ~len:(String.length s) 114 115(** Compute xxHash-64 and return lower 32 bits (for zstd checksum) *) 116let hash32 ?seed src ~pos ~len = 117 let h = hash64 ?seed src ~pos ~len in 118 Int64.to_int32 (Int64.logand h 0xFFFFFFFFL) 119 120let hash32_string ?seed s = 121 let src = Bytes.unsafe_of_string s in 122 hash32 ?seed src ~pos:0 ~len:(String.length s) 123 124(** Streaming hasher state *) 125type state = { 126 mutable v1 : int64; 127 mutable v2 : int64; 128 mutable v3 : int64; 129 mutable v4 : int64; 130 mutable total_len : int; 131 buffer : bytes; 132 mutable buf_len : int; 133 seed : int64; 134} 135 136let create_state ?(seed=0L) () = 137 let open Int64 in 138 { 139 v1 = add (add seed prime64_1) prime64_2; 140 v2 = add seed prime64_2; 141 v3 = seed; 142 v4 = sub seed prime64_1; 143 total_len = 0; 144 buffer = Bytes.create 32; 145 buf_len = 0; 146 seed; 147 } 148 149let reset ?(seed=0L) state = 150 let open Int64 in 151 state.v1 <- add (add seed prime64_1) prime64_2; 152 state.v2 <- add seed prime64_2; 153 state.v3 <- seed; 154 state.v4 <- sub seed prime64_1; 155 state.total_len <- 0; 156 state.buf_len <- 0 157 158let copy_state state = 159 { 160 v1 = state.v1; 161 v2 = state.v2; 162 v3 = state.v3; 163 v4 = state.v4; 164 total_len = state.total_len; 165 buffer = Bytes.copy state.buffer; 166 buf_len = state.buf_len; 167 seed = state.seed; 168 } 169 170let update state src ~pos ~len = 171 let end_pos = pos + len in 172 state.total_len <- state.total_len + len; 173 174 let p = ref pos in 175 176 (* Fill buffer if we have partial data *) 177 if state.buf_len > 0 then begin 178 let to_copy = min (32 - state.buf_len) len in 179 Bytes.blit src !p state.buffer state.buf_len to_copy; 180 state.buf_len <- state.buf_len + to_copy; 181 p := !p + to_copy; 182 183 if state.buf_len = 32 then begin 184 state.v1 <- mix1 state.v1 (Bytes.get_int64_le state.buffer 0); 185 state.v2 <- mix1 state.v2 (Bytes.get_int64_le state.buffer 8); 186 state.v3 <- mix1 state.v3 (Bytes.get_int64_le state.buffer 16); 187 state.v4 <- mix1 state.v4 (Bytes.get_int64_le state.buffer 24); 188 state.buf_len <- 0 189 end 190 end; 191 192 (* Process 32-byte blocks *) 193 while !p + 32 <= end_pos do 194 state.v1 <- mix1 state.v1 (Bytes.get_int64_le src !p); 195 state.v2 <- mix1 state.v2 (Bytes.get_int64_le src (!p + 8)); 196 state.v3 <- mix1 state.v3 (Bytes.get_int64_le src (!p + 16)); 197 state.v4 <- mix1 state.v4 (Bytes.get_int64_le src (!p + 24)); 198 p := !p + 32 199 done; 200 201 (* Buffer remaining *) 202 if !p < end_pos then begin 203 let remaining = end_pos - !p in 204 Bytes.blit src !p state.buffer state.buf_len remaining; 205 state.buf_len <- state.buf_len + remaining 206 end 207 208let update_string state s = 209 let src = Bytes.unsafe_of_string s in 210 update state src ~pos:0 ~len:(String.length s) 211 212let finalize state = 213 let open Int64 in 214 215 let h = ref ( 216 if state.total_len >= 32 then begin 217 let h = add 218 (add (rotl64 state.v1 1) (rotl64 state.v2 7)) 219 (add (rotl64 state.v3 12) (rotl64 state.v4 18)) in 220 let h = mix2 h state.v1 in 221 let h = mix2 h state.v2 in 222 let h = mix2 h state.v3 in 223 mix2 h state.v4 224 end else 225 add state.v3 prime64_5 (* v3 holds seed *) 226 ) in 227 228 h := add !h (of_int state.total_len); 229 230 (* Process buffered data *) 231 let p = ref 0 in 232 while !p + 8 <= state.buf_len do 233 let k = Bytes.get_int64_le state.buffer !p in 234 let k = mul k prime64_2 in 235 let k = rotl64 k 31 in 236 let k = mul k prime64_1 in 237 h := logxor !h k; 238 h := rotl64 !h 27; 239 h := add (mul !h prime64_1) prime64_4; 240 p := !p + 8 241 done; 242 243 if !p + 4 <= state.buf_len then begin 244 let k = of_int (Bytes.get_int32_le state.buffer !p |> Int32.to_int) in 245 let k = logand k 0xFFFFFFFFL in 246 h := logxor !h (mul k prime64_1); 247 h := rotl64 !h 23; 248 h := add (mul !h prime64_2) prime64_3; 249 p := !p + 4 250 end; 251 252 while !p < state.buf_len do 253 let k = of_int (Bytes.get_uint8 state.buffer !p) in 254 h := logxor !h (mul k prime64_5); 255 h := rotl64 !h 11; 256 h := mul !h prime64_1; 257 incr p 258 done; 259 260 avalanche !h 261 262let finalize32 state = 263 let h = finalize state in 264 Int64.to_int32 (Int64.logand h 0xFFFFFFFFL)