Pure OCaml implementation of the Brotli compression algorithm
at main 984 lines 36 kB view raw
1(* Optimal parsing for Brotli compression (quality 10-11) 2 This implements Zopfli-like optimal matching using dynamic programming, 3 matching the brotli-c reference implementation in backward_references_hq.c *) 4 5(* Configuration constants from brotli-c quality.h *) 6let max_zopfli_len_quality_10 = 150 7let max_zopfli_len_quality_11 = 325 8let max_zopfli_candidates_q10 = 1 (* MaxZopfliCandidates for Q10 *) 9let max_zopfli_candidates_q11 = 5 (* MaxZopfliCandidates for Q11 *) 10let brotli_long_copy_quick_step = 16384 11 12(* Match parameters *) 13let min_match = 4 14let max_match = 258 15let max_distance = (1 lsl 22) - 16 16let hash_bits = 17 17let hash_size = 1 lsl hash_bits 18let max_tree_search_depth = 64 (* For H10 binary tree hasher *) 19 20(* Distance cache index and offset from brotli-c backward_references_hq.c *) 21let distance_cache_index = [| 0; 1; 2; 3; 0; 0; 0; 0; 0; 0; 1; 1; 1; 1; 1; 1 |] 22let distance_cache_offset = [| 0; 0; 0; 0; -1; 1; -2; 2; -3; 3; -1; 1; -2; 2; -3; 3 |] 23 24(* Infinity for cost comparison *) 25let infinity = max_float 26 27(* Fast log2 approximation matching brotli-c FastLog2 *) 28let[@inline always] fast_log2 v = 29 if v <= 0 then 0.0 30 else 31 let rec log2_floor v acc = if v <= 1 then acc else log2_floor (v lsr 1) (acc + 1) in 32 float_of_int (log2_floor v 0) 33 34(* ============================================================ 35 Cost Model (ZopfliCostModel from brotli-c) 36 ============================================================ *) 37 38type cost_model = { 39 (* Cost arrays *) 40 cost_cmd : float array; (* Command code costs *) 41 cost_dist : float array; (* Distance code costs *) 42 literal_costs : float array; (* Cumulative literal costs *) 43 min_cost_cmd : float; (* Minimum command cost *) 44 num_bytes : int; 45} 46 47(* SetCost from brotli-c: calculate Shannon entropy costs from histogram *) 48let set_cost histogram histogram_size is_literal = 49 let cost = Array.make histogram_size 0.0 in 50 let sum = Array.fold_left (+) 0 histogram in 51 if sum = 0 then cost 52 else begin 53 let log2sum = fast_log2 sum in 54 let missing_symbol_sum = 55 if is_literal then sum 56 else sum + (Array.fold_left (fun acc h -> if h = 0 then acc + 1 else acc) 0 histogram) 57 in 58 let missing_symbol_cost = (fast_log2 missing_symbol_sum) +. 2.0 in 59 for i = 0 to histogram_size - 1 do 60 if histogram.(i) = 0 then 61 cost.(i) <- missing_symbol_cost 62 else begin 63 (* Shannon bits: log2(sum) - log2(count) *) 64 cost.(i) <- max 1.0 (log2sum -. fast_log2 histogram.(i)) 65 end 66 done; 67 cost 68 end 69 70(* UTF-8 position detection from brotli-c literal_cost.c: 71 Returns the expected position within a UTF-8 multi-byte sequence. 72 0 = single byte or first byte, 1 = second byte, 2 = third byte *) 73let utf8_position last_byte current_byte max_utf8 = 74 if current_byte < 128 then 75 0 (* ASCII - next one is byte 1 again *) 76 else if current_byte >= 192 then 77 (* Start of multi-byte sequence *) 78 min 1 max_utf8 79 else begin 80 (* Continuation byte - check last byte to determine position *) 81 if last_byte < 0xE0 then 82 0 (* Completed two-byte sequence *) 83 else 84 (* Third byte of three-byte sequence *) 85 min 2 max_utf8 86 end 87 88(* Detect if data is mostly UTF-8 and determine histogram level 89 Returns 0 for ASCII, 1 for 2-byte UTF-8, 2 for 3-byte UTF-8 *) 90let decide_utf8_level src src_pos len = 91 let counts = Array.make 3 0 in 92 let last_c = ref 0 in 93 for i = 0 to min 2000 len - 1 do 94 let c = Char.code (Bytes.get src (src_pos + i)) in 95 let utf8_pos = utf8_position !last_c c 2 in 96 counts.(utf8_pos) <- counts.(utf8_pos) + 1; 97 last_c := c 98 done; 99 (* Use 3-byte histograms if >500 third-position bytes, 100 2-byte if >25 second/third position bytes combined, 101 otherwise single histogram *) 102 if counts.(2) < 500 then begin 103 if counts.(1) + counts.(2) < 25 then 0 104 else 1 105 end else 2 106 107(* Sliding window literal cost estimation matching brotli-c literal_cost.c 108 Uses a sliding window to estimate per-position literal costs based on 109 local byte frequency distribution. For UTF-8 text, uses position-aware 110 histograms for better cost estimation. *) 111let estimate_literal_costs_sliding_window src src_pos num_bytes = 112 let costs = Array.make (num_bytes + 2) 0.0 in 113 if num_bytes = 0 then costs 114 else begin 115 let max_utf8 = decide_utf8_level src src_pos num_bytes in 116 117 if max_utf8 > 0 then begin 118 (* UTF-8 mode: use position-aware histograms *) 119 let window_half = 495 in (* Smaller window for UTF-8 from brotli-c *) 120 let num_histograms = max_utf8 + 1 in 121 let histograms = Array.init num_histograms (fun _ -> Array.make 256 0) in 122 let in_window_utf8 = Array.make num_histograms 0 in 123 124 (* Bootstrap histograms *) 125 let initial_window = min window_half num_bytes in 126 let last_c = ref 0 in 127 let utf8_pos = ref 0 in 128 for i = 0 to initial_window - 1 do 129 let c = Char.code (Bytes.get src (src_pos + i)) in 130 histograms.(!utf8_pos).(c) <- histograms.(!utf8_pos).(c) + 1; 131 in_window_utf8.(!utf8_pos) <- in_window_utf8.(!utf8_pos) + 1; 132 utf8_pos := utf8_position !last_c c max_utf8; 133 last_c := c 134 done; 135 136 costs.(0) <- 0.0; 137 let prev1 = ref 0 in 138 let prev2 = ref 0 in 139 for i = 0 to num_bytes - 1 do 140 (* Slide window: remove byte from past *) 141 if i >= window_half then begin 142 let past_c = if i < window_half + 1 then 0 143 else Char.code (Bytes.get src (src_pos + i - window_half - 1)) in 144 let past_last = if i < window_half + 2 then 0 145 else Char.code (Bytes.get src (src_pos + i - window_half - 2)) in 146 let utf8_pos2 = utf8_position past_last past_c max_utf8 in 147 let remove_c = Char.code (Bytes.get src (src_pos + i - window_half)) in 148 histograms.(utf8_pos2).(remove_c) <- histograms.(utf8_pos2).(remove_c) - 1; 149 in_window_utf8.(utf8_pos2) <- in_window_utf8.(utf8_pos2) - 1 150 end; 151 (* Slide window: add byte from future *) 152 if i + window_half < num_bytes then begin 153 let fut_c = Char.code (Bytes.get src (src_pos + i + window_half - 1)) in 154 let fut_last = Char.code (Bytes.get src (src_pos + i + window_half - 2)) in 155 let utf8_pos2 = utf8_position fut_last fut_c max_utf8 in 156 let add_c = Char.code (Bytes.get src (src_pos + i + window_half)) in 157 histograms.(utf8_pos2).(add_c) <- histograms.(utf8_pos2).(add_c) + 1; 158 in_window_utf8.(utf8_pos2) <- in_window_utf8.(utf8_pos2) + 1 159 end; 160 161 (* Calculate cost for current byte using UTF-8 position *) 162 let c = Char.code (Bytes.get src (src_pos + i)) in 163 let utf8_pos = utf8_position !prev2 !prev1 max_utf8 in 164 let histo = max 1 histograms.(utf8_pos).(c) in 165 let in_win = max 1 in_window_utf8.(utf8_pos) in 166 let lit_cost = fast_log2 in_win -. fast_log2 histo +. 0.02905 in 167 let lit_cost = if lit_cost < 1.0 then lit_cost *. 0.5 +. 0.5 else lit_cost in 168 let prologue_length = 2000 in 169 let lit_cost = 170 if i < prologue_length then 171 lit_cost +. 0.35 +. 0.35 /. float_of_int prologue_length *. float_of_int i 172 else lit_cost 173 in 174 costs.(i + 1) <- costs.(i) +. lit_cost; 175 prev2 := !prev1; 176 prev1 := c 177 done; 178 costs 179 end else begin 180 (* Binary/ASCII mode: single histogram *) 181 let window_half = 2000 in (* Larger window for non-UTF-8 *) 182 let histogram = Array.make 256 0 in 183 184 (* Bootstrap histogram for first window_half bytes *) 185 let initial_window = min window_half num_bytes in 186 for i = 0 to initial_window - 1 do 187 let c = Char.code (Bytes.get src (src_pos + i)) in 188 histogram.(c) <- histogram.(c) + 1 189 done; 190 let in_window = ref initial_window in 191 192 costs.(0) <- 0.0; 193 for i = 0 to num_bytes - 1 do 194 (* Slide window: remove byte from past *) 195 if i >= window_half then begin 196 let old_c = Char.code (Bytes.get src (src_pos + i - window_half)) in 197 histogram.(old_c) <- histogram.(old_c) - 1; 198 decr in_window 199 end; 200 (* Slide window: add byte from future *) 201 if i + window_half < num_bytes then begin 202 let new_c = Char.code (Bytes.get src (src_pos + i + window_half)) in 203 histogram.(new_c) <- histogram.(new_c) + 1; 204 incr in_window 205 end; 206 207 (* Calculate cost for current byte *) 208 let c = Char.code (Bytes.get src (src_pos + i)) in 209 let histo = max 1 histogram.(c) in 210 let lit_cost = fast_log2 !in_window -. fast_log2 histo +. 0.029 in 211 let lit_cost = if lit_cost < 1.0 then lit_cost *. 0.5 +. 0.5 else lit_cost in 212 let prologue_length = 2000 in 213 let lit_cost = 214 if i < prologue_length then 215 lit_cost +. 0.35 +. 0.35 /. float_of_int prologue_length *. float_of_int i 216 else lit_cost 217 in 218 costs.(i + 1) <- costs.(i) +. lit_cost 219 done; 220 costs 221 end 222 end 223 224(* Initialize cost model from literal costs (first pass) *) 225let init_cost_model_from_literals src src_pos num_bytes = 226 (* Use sliding window for accurate per-position literal cost estimation *) 227 let literal_costs = estimate_literal_costs_sliding_window src src_pos num_bytes in 228 229 (* Command costs: FastLog2(11 + cmd_code) *) 230 let cost_cmd = Array.init 704 (fun i -> fast_log2 (11 + i)) in 231 let min_cost_cmd = fast_log2 11 in 232 233 (* Distance costs: FastLog2(20 + dist_code) *) 234 let cost_dist = Array.init 544 (fun i -> fast_log2 (20 + i)) in 235 236 { cost_cmd; cost_dist; literal_costs; min_cost_cmd; num_bytes } 237 238(* Initialize cost model from command histograms (second pass for Q11) *) 239let init_cost_model_from_histograms src src_pos num_bytes 240 ~lit_histogram ~cmd_histogram ~dist_histogram = 241 (* Literal costs from histogram *) 242 let lit_costs = set_cost lit_histogram 256 true in 243 let literal_costs = Array.make (num_bytes + 2) 0.0 in 244 literal_costs.(0) <- 0.0; 245 for i = 0 to num_bytes - 1 do 246 let c = Char.code (Bytes.get src (src_pos + i)) in 247 literal_costs.(i + 1) <- literal_costs.(i) +. lit_costs.(c) 248 done; 249 250 (* Command costs from histogram *) 251 let cost_cmd = set_cost cmd_histogram 704 false in 252 let min_cost_cmd = Array.fold_left min infinity cost_cmd in 253 254 (* Distance costs from histogram *) 255 let cost_dist = set_cost dist_histogram 544 false in 256 257 { cost_cmd; cost_dist; literal_costs; min_cost_cmd; num_bytes } 258 259let get_literal_cost model from_pos to_pos = 260 model.literal_costs.(to_pos) -. model.literal_costs.(from_pos) 261 262let get_command_cost model cmd_code = 263 if cmd_code < 704 then model.cost_cmd.(cmd_code) else 20.0 264 265let get_distance_cost model dist_code = 266 if dist_code < 544 then model.cost_dist.(dist_code) else 20.0 267 268(* ============================================================ 269 StartPosQueue - maintains 8 best starting positions 270 ============================================================ *) 271 272type pos_data = { 273 pos : int; 274 distance_cache : int array; 275 costdiff : float; 276 cost : float; 277} 278 279type start_pos_queue = { 280 mutable q : pos_data array; 281 mutable idx : int; 282} 283 284let create_start_pos_queue () = 285 let empty = { pos = 0; distance_cache = [|16;15;11;4|]; costdiff = infinity; cost = infinity } in 286 { q = Array.make 8 empty; idx = 0 } 287 288let start_pos_queue_size queue = 289 min queue.idx 8 290 291let start_pos_queue_push queue posdata = 292 let offset = (lnot queue.idx) land 7 in 293 queue.idx <- queue.idx + 1; 294 let len = start_pos_queue_size queue in 295 queue.q.(offset) <- posdata; 296 (* Restore sorted order by costdiff *) 297 let q = queue.q in 298 for i = 1 to len - 1 do 299 let idx1 = (offset + i - 1) land 7 in 300 let idx2 = (offset + i) land 7 in 301 if q.(idx1).costdiff > q.(idx2).costdiff then begin 302 let tmp = q.(idx1) in 303 q.(idx1) <- q.(idx2); 304 q.(idx2) <- tmp 305 end 306 done 307 308let start_pos_queue_at queue k = 309 queue.q.((k - queue.idx) land 7) 310 311(* ============================================================ 312 Zopfli Node - DP state at each position 313 ============================================================ *) 314 315type zopfli_node = { 316 mutable length : int; (* Copy length (lower 25 bits) + len_code modifier *) 317 mutable distance : int; (* Copy distance *) 318 mutable dcode_insert_length : int; (* Short code (upper 5 bits) + insert length *) 319 mutable cost : float; (* Cost or next pointer *) 320 mutable shortcut : int; (* Shortcut for distance cache computation *) 321} 322 323let create_zopfli_node () = 324 { length = 1; distance = 0; dcode_insert_length = 0; cost = infinity; shortcut = 0 } 325 326let zopfli_node_copy_length node = node.length land 0x1FFFFFF 327let zopfli_node_copy_distance node = node.distance 328let zopfli_node_insert_length node = node.dcode_insert_length land 0x7FFFFFF 329let zopfli_node_distance_code node = 330 let short_code = node.dcode_insert_length lsr 27 in 331 if short_code = 0 then zopfli_node_copy_distance node + 16 - 1 332 else short_code - 1 333 334let zopfli_node_command_length node = 335 zopfli_node_copy_length node + zopfli_node_insert_length node 336 337(* ============================================================ 338 Hash functions and match finding 339 ============================================================ *) 340 341let[@inline always] hash4 src pos = 342 let b0 = Char.code (Bytes.unsafe_get src pos) in 343 let b1 = Char.code (Bytes.unsafe_get src (pos + 1)) in 344 let b2 = Char.code (Bytes.unsafe_get src (pos + 2)) in 345 let b3 = Char.code (Bytes.unsafe_get src (pos + 3)) in 346 let v = b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) in 347 ((v * 0x1e35a7bd) land 0xFFFFFFFF) lsr (32 - hash_bits) 348 349let[@inline always] find_match_length src a b limit = 350 let len = ref 0 in 351 let max_len = min max_match (limit - b) in 352 while !len < max_len && Bytes.get src (a + !len) = Bytes.get src (b + !len) do 353 incr len 354 done; 355 !len 356 357(* Backward match structure *) 358type backward_match = { 359 bm_distance : int; 360 bm_length : int; 361 bm_len_code : int; 362} 363 364(* Find all matches at a position, sorted by length *) 365let find_all_matches src pos src_end hash_table chain_table chain_base max_distance = 366 if pos + min_match > src_end then [] 367 else begin 368 let matches = ref [] in 369 let best_len = ref (min_match - 1) in 370 371 (* Search hash chain *) 372 let h = hash4 src pos in 373 let chain_pos = ref hash_table.(h) in 374 let chain_count = ref 0 in 375 376 while !chain_pos >= 0 && !chain_count < max_tree_search_depth do 377 let distance = pos - !chain_pos in 378 if distance > 0 && distance <= max_distance then begin 379 let match_len = find_match_length src !chain_pos pos src_end in 380 if match_len > !best_len then begin 381 best_len := match_len; 382 matches := { bm_distance = distance; bm_length = match_len; bm_len_code = match_len } :: !matches 383 end 384 end; 385 let chain_idx = !chain_pos - chain_base in 386 if chain_idx >= 0 && chain_idx < Array.length chain_table then 387 chain_pos := chain_table.(chain_idx) 388 else 389 chain_pos := -1; 390 incr chain_count 391 done; 392 393 (* Sort by length ascending *) 394 List.sort (fun a b -> compare a.bm_length b.bm_length) !matches 395 end 396 397(* ============================================================ 398 Insert/Copy length encoding (from brotli-c prefix.h) 399 ============================================================ *) 400 401let get_insert_length_code insert_len = 402 if insert_len < 6 then insert_len 403 else if insert_len < 130 then 404 let nbits = Lz77.log2_floor_nonzero (insert_len - 2) - 1 in 405 (nbits lsl 1) + ((insert_len - 2) lsr nbits) + 2 406 else if insert_len < 2114 then 407 Lz77.log2_floor_nonzero (insert_len - 66) + 10 408 else if insert_len < 6210 then 21 409 else if insert_len < 22594 then 22 410 else 23 411 412let get_copy_length_code copy_len = 413 if copy_len < 10 then copy_len - 2 414 else if copy_len < 134 then 415 let nbits = Lz77.log2_floor_nonzero (copy_len - 6) - 1 in 416 (nbits lsl 1) + ((copy_len - 6) lsr nbits) + 4 417 else if copy_len < 2118 then 418 Lz77.log2_floor_nonzero (copy_len - 70) + 12 419 else 23 420 421let get_insert_extra insert_code = 422 let kInsertExtraBits = [| 0;0;0;0;0;0;1;1;2;2;3;3;4;4;5;5;6;7;8;9;10;12;14;24 |] in 423 if insert_code < 24 then kInsertExtraBits.(insert_code) else 24 424 425let get_copy_extra copy_code = 426 let kCopyExtraBits = [| 0;0;0;0;0;0;0;0;1;1;2;2;3;3;4;4;5;5;6;7;8;9;10;24 |] in 427 if copy_code < 24 then kCopyExtraBits.(copy_code) else 24 428 429let combine_length_codes inscode copycode use_last_distance = 430 let inscode64 = (inscode land 0x7) lor ((inscode land 0x18) lsl 2) in 431 let copycode64 = (copycode land 0x7) lor ((copycode land 0x18) lsl 3) in 432 let c = (copycode64 land 0x38) lor inscode64 in 433 if use_last_distance && inscode < 8 && copycode < 16 then c 434 else if inscode < 8 && copycode < 16 then c lor 64 435 else c lor (128 + (if copycode >= 16 then 64 else 0)) 436 437(* ============================================================ 438 Distance encoding 439 ============================================================ *) 440 441let prefix_encode_copy_distance dist_code = 442 if dist_code < 16 then (dist_code, 0, 0) 443 else begin 444 let dist = dist_code - 15 in 445 let nbits = Lz77.log2_floor_nonzero dist in 446 let prefix = (nbits lsl 1) + ((dist lsr (nbits - 1)) land 1) + 12 in 447 let extra_bits = nbits - 1 in 448 let extra = dist land ((1 lsl extra_bits) - 1) in 449 (prefix, extra_bits, extra) 450 end 451 452(* ============================================================ 453 Main Zopfli DP Algorithm 454 ============================================================ *) 455 456(* Compute distance cache at a position from the DP path *) 457let compute_distance_cache pos starting_dist_cache nodes = 458 let dist_cache = Array.make 4 0 in 459 let idx = ref 0 in 460 let p = ref nodes.(pos).shortcut in 461 while !idx < 4 && !p > 0 do 462 let node = nodes.(!p) in 463 let c_len = zopfli_node_copy_length node in 464 let i_len = zopfli_node_insert_length node in 465 let dist = zopfli_node_copy_distance node in 466 dist_cache.(!idx) <- dist; 467 incr idx; 468 p := nodes.(!p - c_len - i_len).shortcut 469 done; 470 for i = !idx to 3 do 471 dist_cache.(i) <- starting_dist_cache.(i - !idx) 472 done; 473 dist_cache 474 475(* Compute distance shortcut *) 476let compute_distance_shortcut block_start pos max_backward_limit nodes = 477 if pos = 0 then 0 478 else begin 479 let node = nodes.(pos) in 480 let c_len = zopfli_node_copy_length node in 481 let i_len = zopfli_node_insert_length node in 482 let dist = zopfli_node_copy_distance node in 483 if dist + c_len <= block_start + pos && 484 dist <= max_backward_limit && 485 zopfli_node_distance_code node > 0 then 486 pos 487 else 488 nodes.(pos - c_len - i_len).shortcut 489 end 490 491(* Update Zopfli node with new values *) 492let update_zopfli_node nodes pos start len len_code dist short_code cost = 493 let node = nodes.(pos + len) in 494 node.length <- len lor ((len + 9 - len_code) lsl 25); 495 node.distance <- dist; 496 node.dcode_insert_length <- (short_code lsl 27) lor (pos - start); 497 node.cost <- cost 498 499(* Compute minimum copy length that can improve cost *) 500let compute_minimum_copy_length start_cost nodes num_bytes pos = 501 let min_cost = ref start_cost in 502 let len = ref 2 in 503 let next_len_bucket = ref 4 in 504 let next_len_offset = ref 10 in 505 while pos + !len <= num_bytes && nodes.(pos + !len).cost <= !min_cost do 506 incr len; 507 if !len = !next_len_offset then begin 508 min_cost := !min_cost +. 1.0; 509 next_len_offset := !next_len_offset + !next_len_bucket; 510 next_len_bucket := !next_len_bucket * 2 511 end 512 done; 513 !len 514 515(* Evaluate node and push to queue if eligible *) 516let evaluate_node block_start pos max_backward_limit starting_dist_cache model queue nodes = 517 let node_cost = nodes.(pos).cost in 518 nodes.(pos).shortcut <- compute_distance_shortcut block_start pos max_backward_limit nodes; 519 if node_cost <= get_literal_cost model 0 pos then begin 520 let dist_cache = compute_distance_cache pos starting_dist_cache nodes in 521 let posdata = { 522 pos; 523 distance_cache = dist_cache; 524 costdiff = node_cost -. get_literal_cost model 0 pos; 525 cost = node_cost; 526 } in 527 start_pos_queue_push queue posdata 528 end 529 530(* Update nodes at a position - core Zopfli DP step *) 531let update_nodes num_bytes block_start pos src src_pos model 532 max_backward_limit starting_dist_cache 533 num_matches matches queue nodes max_zopfli_len max_iters = 534 let cur_ix = block_start + pos in 535 let max_distance_here = min cur_ix max_backward_limit in 536 let max_len = num_bytes - pos in 537 let result = ref 0 in 538 539 evaluate_node block_start pos max_backward_limit starting_dist_cache model queue nodes; 540 541 (* Compute minimum copy length based on best queue entry *) 542 let posdata0 = start_pos_queue_at queue 0 in 543 let min_cost = posdata0.cost +. model.min_cost_cmd +. get_literal_cost model posdata0.pos pos in 544 let min_len = compute_minimum_copy_length min_cost nodes num_bytes pos in 545 546 (* Go over starting positions in order of increasing cost difference *) 547 let queue_size = start_pos_queue_size queue in 548 for k = 0 to min (max_iters - 1) (queue_size - 1) do 549 let posdata = start_pos_queue_at queue k in 550 let start = posdata.pos in 551 let inscode = get_insert_length_code (pos - start) in 552 let start_costdiff = posdata.costdiff in 553 let base_cost = start_costdiff +. float_of_int (get_insert_extra inscode) +. 554 get_literal_cost model 0 pos in 555 556 (* Check distance cache matches first *) 557 let best_len = ref (min_len - 1) in 558 for j = 0 to 15 do 559 if !best_len < max_len then begin 560 let idx = distance_cache_index.(j) in 561 let backward = posdata.distance_cache.(idx) + distance_cache_offset.(j) in 562 if backward > 0 && backward <= max_distance_here then begin 563 let prev_ix = cur_ix - backward in 564 let match_len = find_match_length src prev_ix (src_pos + pos) (src_pos + num_bytes) in 565 if match_len >= 2 then begin 566 let dist_cost = base_cost +. get_distance_cost model j in 567 for l = !best_len + 1 to match_len do 568 let copycode = get_copy_length_code l in 569 let cmdcode = combine_length_codes inscode copycode (j = 0) in 570 let cost = (if cmdcode < 128 then base_cost else dist_cost) +. 571 float_of_int (get_copy_extra copycode) +. 572 get_command_cost model cmdcode in 573 if cost < nodes.(pos + l).cost then begin 574 update_zopfli_node nodes pos start l l backward (j + 1) cost; 575 result := max !result l 576 end; 577 best_len := l 578 done 579 end 580 end 581 end 582 done; 583 584 (* For iterations >= 2, only look at distance cache matches *) 585 if k < 2 then begin 586 (* Loop through all matches *) 587 let len = ref min_len in 588 for j = 0 to num_matches - 1 do 589 let m = matches.(j) in 590 let dist = m.bm_distance in 591 let dist_code = dist + 16 - 1 in (* Add 16 short codes *) 592 let (dist_symbol, distnumextra, _) = prefix_encode_copy_distance dist_code in 593 let dist_cost = base_cost +. float_of_int distnumextra +. 594 get_distance_cost model dist_symbol in 595 let max_match_len = m.bm_length in 596 597 (* For long matches or dictionary, try only max length *) 598 if !len < max_match_len && max_match_len > max_zopfli_len then 599 len := max_match_len; 600 601 while !len <= max_match_len do 602 let len_code = m.bm_len_code in 603 let copycode = get_copy_length_code len_code in 604 let cmdcode = combine_length_codes inscode copycode false in 605 let cost = dist_cost +. float_of_int (get_copy_extra copycode) +. 606 get_command_cost model cmdcode in 607 if cost < nodes.(pos + !len).cost then begin 608 update_zopfli_node nodes pos start !len len_code dist 0 cost; 609 result := max !result !len 610 end; 611 incr len 612 done 613 done 614 end 615 done; 616 !result 617 618(* Compute shortest path from nodes *) 619let compute_shortest_path_from_nodes num_bytes nodes = 620 let index = ref num_bytes in 621 let num_commands = ref 0 in 622 (* Find the actual end position *) 623 while zopfli_node_insert_length nodes.(!index) = 0 && 624 nodes.(!index).length = 1 && !index > 0 do 625 decr index 626 done; 627 nodes.(!index).shortcut <- max_int; (* Mark as end *) 628 while !index > 0 do 629 let len = zopfli_node_command_length nodes.(!index) in 630 index := !index - len; 631 nodes.(!index).shortcut <- len; (* Use shortcut to store next length *) 632 incr num_commands 633 done; 634 !num_commands 635 636(* ============================================================ 637 Main Zopfli function for Q10 638 ============================================================ *) 639 640let zopfli_compute_shortest_path src src_pos num_bytes starting_dist_cache = 641 let max_backward_limit = max_distance in 642 let max_zopfli_len = max_zopfli_len_quality_10 in 643 let max_iters = max_zopfli_candidates_q10 in 644 645 (* Initialize nodes *) 646 let nodes = Array.init (num_bytes + 1) (fun _ -> create_zopfli_node ()) in 647 nodes.(0).length <- 0; 648 nodes.(0).cost <- 0.0; 649 650 (* Initialize cost model from literal costs (first pass) *) 651 let model = init_cost_model_from_literals src src_pos num_bytes in 652 653 (* Hash table and chain *) 654 let hash_table = Array.make hash_size (-1) in 655 let chain_table = Array.make num_bytes (-1) in 656 let chain_base = src_pos in 657 658 (* Initialize queue *) 659 let queue = create_start_pos_queue () in 660 661 (* Main DP loop *) 662 let i = ref 0 in 663 while !i + min_match - 1 < num_bytes do 664 let pos = src_pos + !i in 665 let max_distance_here = min pos max_backward_limit in 666 667 (* Update hash table *) 668 if pos + min_match <= src_pos + num_bytes then begin 669 let h = hash4 src pos in 670 let chain_idx = !i in 671 if chain_idx < Array.length chain_table then 672 chain_table.(chain_idx) <- hash_table.(h); 673 hash_table.(h) <- pos 674 end; 675 676 (* Find all matches *) 677 let matches = find_all_matches src pos (src_pos + num_bytes) 678 hash_table chain_table chain_base max_distance_here in 679 let matches_arr = Array.of_list matches in 680 let num_matches = Array.length matches_arr in 681 682 (* Check for long match to skip *) 683 let skip = 684 if num_matches > 0 then begin 685 let last_match = matches_arr.(num_matches - 1) in 686 if last_match.bm_length > max_zopfli_len then begin 687 (* Use only longest match *) 688 matches_arr.(0) <- last_match; 689 last_match.bm_length 690 end else 0 691 end else 0 692 in 693 694 let update_skip = update_nodes num_bytes src_pos !i src src_pos model 695 max_backward_limit starting_dist_cache 696 (if skip > 0 then 1 else num_matches) matches_arr queue nodes 697 max_zopfli_len max_iters in 698 699 let actual_skip = if update_skip < brotli_long_copy_quick_step then 0 else update_skip in 700 let skip = max skip actual_skip in 701 702 if skip > 1 then begin 703 let skip_remaining = ref (skip - 1) in 704 while !skip_remaining > 0 && !i + min_match - 1 < num_bytes do 705 incr i; 706 evaluate_node src_pos !i max_backward_limit starting_dist_cache model queue nodes; 707 decr skip_remaining 708 done 709 end; 710 incr i 711 done; 712 713 (nodes, compute_shortest_path_from_nodes num_bytes nodes) 714 715(* ============================================================ 716 HQ Zopfli function for Q11 (two passes with histogram refinement) 717 ============================================================ *) 718 719(* Build histograms from completed DP nodes for second pass cost refinement. 720 This matches brotli-c ZopfliCostModelSetFromCommands in backward_references_hq.c *) 721let build_histograms_from_nodes src src_pos num_bytes nodes = 722 let lit_histogram = Array.make 256 0 in 723 let cmd_histogram = Array.make 704 0 in 724 let dist_histogram = Array.make 544 0 in 725 726 (* Reconstruct path from nodes *) 727 let idx = ref num_bytes in 728 (* Find the actual end position *) 729 while zopfli_node_insert_length nodes.(!idx) = 0 && 730 nodes.(!idx).length = 1 && !idx > 0 do 731 decr idx 732 done; 733 734 let pending_lit_start = ref 0 in 735 let end_pos = !idx in 736 737 (* Walk backwards through the path *) 738 idx := end_pos; 739 let path = ref [] in 740 while !idx > 0 do 741 let node = nodes.(!idx) in 742 let cmd_len = zopfli_node_command_length node in 743 if cmd_len > 0 then begin 744 path := !idx :: !path; 745 idx := !idx - cmd_len 746 end else 747 idx := 0 748 done; 749 750 (* Process path forward to count symbols *) 751 pending_lit_start := 0; 752 List.iter (fun end_pos -> 753 let node = nodes.(end_pos) in 754 let copy_len = zopfli_node_copy_length node in 755 let _insert_len = zopfli_node_insert_length node in 756 let dist_code = zopfli_node_distance_code node in 757 758 let copy_start = end_pos - copy_len in 759 let lit_len = copy_start - !pending_lit_start in 760 761 (* Count literals *) 762 for i = !pending_lit_start to copy_start - 1 do 763 let c = Char.code (Bytes.get src (src_pos + i)) in 764 lit_histogram.(c) <- lit_histogram.(c) + 1 765 done; 766 767 (* Count command code *) 768 let inscode = get_insert_length_code lit_len in 769 let copycode = get_copy_length_code copy_len in 770 let use_last = dist_code = 0 in 771 let cmdcode = combine_length_codes inscode copycode use_last in 772 if cmdcode < 704 then 773 cmd_histogram.(cmdcode) <- cmd_histogram.(cmdcode) + 1; 774 775 (* Count distance code if explicit *) 776 if cmdcode >= 128 then begin 777 let dc = if dist_code < 16 then dist_code 778 else begin 779 let (symbol, _, _) = prefix_encode_copy_distance (node.distance + 16 - 1) in 780 symbol 781 end 782 in 783 if dc < 544 then 784 dist_histogram.(dc) <- dist_histogram.(dc) + 1 785 end; 786 787 pending_lit_start := end_pos 788 ) !path; 789 790 (* Count remaining literals *) 791 for i = !pending_lit_start to num_bytes - 1 do 792 let c = Char.code (Bytes.get src (src_pos + i)) in 793 lit_histogram.(c) <- lit_histogram.(c) + 1 794 done; 795 796 (lit_histogram, cmd_histogram, dist_histogram) 797 798let hq_zopfli_compute_shortest_path src src_pos num_bytes starting_dist_cache = 799 let max_backward_limit = max_distance in 800 let max_zopfli_len = max_zopfli_len_quality_11 in 801 let max_iters = max_zopfli_candidates_q11 in 802 803 (* Pre-compute all matches *) 804 let hash_table = Array.make hash_size (-1) in 805 let chain_table = Array.make num_bytes (-1) in 806 let chain_base = src_pos in 807 let all_matches = Array.make num_bytes [||] in 808 let num_matches_arr = Array.make num_bytes 0 in 809 810 for i = 0 to num_bytes - min_match do 811 let pos = src_pos + i in 812 let max_distance_here = min pos max_backward_limit in 813 814 (* Update hash *) 815 if pos + min_match <= src_pos + num_bytes then begin 816 let h = hash4 src pos in 817 chain_table.(i) <- hash_table.(h); 818 hash_table.(h) <- pos 819 end; 820 821 let matches = find_all_matches src pos (src_pos + num_bytes) 822 hash_table chain_table chain_base max_distance_here in 823 let matches_arr = Array.of_list matches in 824 all_matches.(i) <- matches_arr; 825 num_matches_arr.(i) <- Array.length matches_arr; 826 827 (* Skip after very long match *) 828 if Array.length matches_arr > 0 then begin 829 let last = matches_arr.(Array.length matches_arr - 1) in 830 if last.bm_length > max_zopfli_len then begin 831 let skip = last.bm_length - 1 in 832 for j = 1 to min skip (num_bytes - min_match - i) do 833 all_matches.(i + j) <- [||]; 834 num_matches_arr.(i + j) <- 0 835 done 836 end 837 end 838 done; 839 840 (* Do two iterations with histogram refinement *) 841 let final_nodes = ref (Array.init (num_bytes + 1) (fun _ -> create_zopfli_node ())) in 842 let final_count = ref 0 in 843 let first_pass_nodes = ref None in 844 845 for iteration = 0 to 1 do 846 let nodes = Array.init (num_bytes + 1) (fun _ -> create_zopfli_node ()) in 847 nodes.(0).length <- 0; 848 nodes.(0).cost <- 0.0; 849 850 let model = 851 if iteration = 0 then 852 (* First pass: use sliding window literal cost estimation *) 853 init_cost_model_from_literals src src_pos num_bytes 854 else begin 855 (* Second pass: build histograms from first pass for refined estimation *) 856 match !first_pass_nodes with 857 | None -> init_cost_model_from_literals src src_pos num_bytes 858 | Some prev_nodes -> 859 let (lit_hist, cmd_hist, dist_hist) = 860 build_histograms_from_nodes src src_pos num_bytes prev_nodes 861 in 862 init_cost_model_from_histograms src src_pos num_bytes 863 ~lit_histogram:lit_hist ~cmd_histogram:cmd_hist ~dist_histogram:dist_hist 864 end 865 in 866 867 let queue = create_start_pos_queue () in 868 869 (* Main DP loop *) 870 let i = ref 0 in 871 while !i + min_match - 1 < num_bytes do 872 let skip = update_nodes num_bytes src_pos !i src src_pos model 873 max_backward_limit starting_dist_cache 874 num_matches_arr.(!i) all_matches.(!i) queue nodes 875 max_zopfli_len max_iters in 876 877 let skip = if skip < brotli_long_copy_quick_step then 0 else skip in 878 879 if skip > 1 then begin 880 let skip_remaining = ref (skip - 1) in 881 while !skip_remaining > 0 && !i + min_match - 1 < num_bytes do 882 incr i; 883 evaluate_node src_pos !i max_backward_limit starting_dist_cache model queue nodes; 884 decr skip_remaining 885 done 886 end; 887 incr i 888 done; 889 890 (* Save first pass nodes for histogram building *) 891 if iteration = 0 then begin 892 let _ = compute_shortest_path_from_nodes num_bytes nodes in 893 first_pass_nodes := Some nodes 894 end; 895 896 final_nodes := nodes; 897 final_count := compute_shortest_path_from_nodes num_bytes nodes 898 done; 899 900 (!final_nodes, !final_count) 901 902(* ============================================================ 903 Create commands from Zopfli nodes 904 ============================================================ *) 905 906let zopfli_create_commands num_bytes src_pos nodes = 907 let commands = ref [] in 908 let ring = Lz77.create_dist_ring () in 909 910 (* First, reconstruct the path using shortcut field *) 911 let path = ref [] in 912 let idx = ref num_bytes in 913 while !idx > 0 && nodes.(!idx).shortcut <> max_int do 914 path := !idx :: !path; 915 let len = nodes.(!idx).shortcut in 916 if len > 0 && len <= !idx then 917 idx := !idx - len 918 else 919 idx := 0 920 done; 921 922 (* Now process each command in the path *) 923 let pending_lit_start = ref 0 in 924 List.iter (fun end_pos -> 925 let node = nodes.(end_pos) in 926 let copy_len = zopfli_node_copy_length node in 927 let _insert_len = zopfli_node_insert_length node in 928 let distance = zopfli_node_copy_distance node in 929 let dist_code = zopfli_node_distance_code node in 930 931 let copy_start = end_pos - copy_len in 932 let lit_len = copy_start - !pending_lit_start in 933 934 (* Determine short code *) 935 let short_code = 936 if dist_code < 16 then Some dist_code 937 else None 938 in 939 940 commands := Lz77.InsertCopy { 941 lit_start = src_pos + !pending_lit_start; 942 lit_len; 943 copy_len; 944 distance; 945 dist_code = short_code; 946 } :: !commands; 947 948 (* Update ring buffer *) 949 (match short_code with 950 | Some 0 -> () 951 | _ -> Lz77.push_distance ring distance); 952 953 pending_lit_start := end_pos 954 ) !path; 955 956 (* Handle remaining literals *) 957 if !pending_lit_start < num_bytes then 958 commands := Lz77.Literals { 959 start = src_pos + !pending_lit_start; 960 len = num_bytes - !pending_lit_start 961 } :: !commands; 962 963 List.rev !commands 964 965(* ============================================================ 966 Public API 967 ============================================================ *) 968 969let generate_commands ?(quality=11) src src_pos src_len = 970 if src_len = 0 then [] 971 else if src_len < min_match then 972 [Lz77.Literals { start = src_pos; len = src_len }] 973 else begin 974 let starting_dist_cache = [| 16; 15; 11; 4 |] in 975 976 let (nodes, _num_commands) = 977 if quality >= 11 then 978 hq_zopfli_compute_shortest_path src src_pos src_len starting_dist_cache 979 else 980 zopfli_compute_shortest_path src src_pos src_len starting_dist_cache 981 in 982 983 zopfli_create_commands src_len src_pos nodes 984 end