Pure OCaml implementation of the Brotli compression algorithm
1(* Optimal parsing for Brotli compression (quality 10-11)
2 This implements Zopfli-like optimal matching using dynamic programming,
3 matching the brotli-c reference implementation in backward_references_hq.c *)
4
5(* Configuration constants from brotli-c quality.h *)
6let max_zopfli_len_quality_10 = 150
7let max_zopfli_len_quality_11 = 325
8let max_zopfli_candidates_q10 = 1 (* MaxZopfliCandidates for Q10 *)
9let max_zopfli_candidates_q11 = 5 (* MaxZopfliCandidates for Q11 *)
10let brotli_long_copy_quick_step = 16384
11
12(* Match parameters *)
13let min_match = 4
14let max_match = 258
15let max_distance = (1 lsl 22) - 16
16let hash_bits = 17
17let hash_size = 1 lsl hash_bits
18let max_tree_search_depth = 64 (* For H10 binary tree hasher *)
19
20(* Distance cache index and offset from brotli-c backward_references_hq.c *)
21let distance_cache_index = [| 0; 1; 2; 3; 0; 0; 0; 0; 0; 0; 1; 1; 1; 1; 1; 1 |]
22let distance_cache_offset = [| 0; 0; 0; 0; -1; 1; -2; 2; -3; 3; -1; 1; -2; 2; -3; 3 |]
23
24(* Infinity for cost comparison *)
25let infinity = max_float
26
27(* Fast log2 approximation matching brotli-c FastLog2 *)
28let[@inline always] fast_log2 v =
29 if v <= 0 then 0.0
30 else
31 let rec log2_floor v acc = if v <= 1 then acc else log2_floor (v lsr 1) (acc + 1) in
32 float_of_int (log2_floor v 0)
33
34(* ============================================================
35 Cost Model (ZopfliCostModel from brotli-c)
36 ============================================================ *)
37
38type cost_model = {
39 (* Cost arrays *)
40 cost_cmd : float array; (* Command code costs *)
41 cost_dist : float array; (* Distance code costs *)
42 literal_costs : float array; (* Cumulative literal costs *)
43 min_cost_cmd : float; (* Minimum command cost *)
44 num_bytes : int;
45}
46
47(* SetCost from brotli-c: calculate Shannon entropy costs from histogram *)
48let set_cost histogram histogram_size is_literal =
49 let cost = Array.make histogram_size 0.0 in
50 let sum = Array.fold_left (+) 0 histogram in
51 if sum = 0 then cost
52 else begin
53 let log2sum = fast_log2 sum in
54 let missing_symbol_sum =
55 if is_literal then sum
56 else sum + (Array.fold_left (fun acc h -> if h = 0 then acc + 1 else acc) 0 histogram)
57 in
58 let missing_symbol_cost = (fast_log2 missing_symbol_sum) +. 2.0 in
59 for i = 0 to histogram_size - 1 do
60 if histogram.(i) = 0 then
61 cost.(i) <- missing_symbol_cost
62 else begin
63 (* Shannon bits: log2(sum) - log2(count) *)
64 cost.(i) <- max 1.0 (log2sum -. fast_log2 histogram.(i))
65 end
66 done;
67 cost
68 end
69
70(* UTF-8 position detection from brotli-c literal_cost.c:
71 Returns the expected position within a UTF-8 multi-byte sequence.
72 0 = single byte or first byte, 1 = second byte, 2 = third byte *)
73let utf8_position last_byte current_byte max_utf8 =
74 if current_byte < 128 then
75 0 (* ASCII - next one is byte 1 again *)
76 else if current_byte >= 192 then
77 (* Start of multi-byte sequence *)
78 min 1 max_utf8
79 else begin
80 (* Continuation byte - check last byte to determine position *)
81 if last_byte < 0xE0 then
82 0 (* Completed two-byte sequence *)
83 else
84 (* Third byte of three-byte sequence *)
85 min 2 max_utf8
86 end
87
88(* Detect if data is mostly UTF-8 and determine histogram level
89 Returns 0 for ASCII, 1 for 2-byte UTF-8, 2 for 3-byte UTF-8 *)
90let decide_utf8_level src src_pos len =
91 let counts = Array.make 3 0 in
92 let last_c = ref 0 in
93 for i = 0 to min 2000 len - 1 do
94 let c = Char.code (Bytes.get src (src_pos + i)) in
95 let utf8_pos = utf8_position !last_c c 2 in
96 counts.(utf8_pos) <- counts.(utf8_pos) + 1;
97 last_c := c
98 done;
99 (* Use 3-byte histograms if >500 third-position bytes,
100 2-byte if >25 second/third position bytes combined,
101 otherwise single histogram *)
102 if counts.(2) < 500 then begin
103 if counts.(1) + counts.(2) < 25 then 0
104 else 1
105 end else 2
106
107(* Sliding window literal cost estimation matching brotli-c literal_cost.c
108 Uses a sliding window to estimate per-position literal costs based on
109 local byte frequency distribution. For UTF-8 text, uses position-aware
110 histograms for better cost estimation. *)
111let estimate_literal_costs_sliding_window src src_pos num_bytes =
112 let costs = Array.make (num_bytes + 2) 0.0 in
113 if num_bytes = 0 then costs
114 else begin
115 let max_utf8 = decide_utf8_level src src_pos num_bytes in
116
117 if max_utf8 > 0 then begin
118 (* UTF-8 mode: use position-aware histograms *)
119 let window_half = 495 in (* Smaller window for UTF-8 from brotli-c *)
120 let num_histograms = max_utf8 + 1 in
121 let histograms = Array.init num_histograms (fun _ -> Array.make 256 0) in
122 let in_window_utf8 = Array.make num_histograms 0 in
123
124 (* Bootstrap histograms *)
125 let initial_window = min window_half num_bytes in
126 let last_c = ref 0 in
127 let utf8_pos = ref 0 in
128 for i = 0 to initial_window - 1 do
129 let c = Char.code (Bytes.get src (src_pos + i)) in
130 histograms.(!utf8_pos).(c) <- histograms.(!utf8_pos).(c) + 1;
131 in_window_utf8.(!utf8_pos) <- in_window_utf8.(!utf8_pos) + 1;
132 utf8_pos := utf8_position !last_c c max_utf8;
133 last_c := c
134 done;
135
136 costs.(0) <- 0.0;
137 let prev1 = ref 0 in
138 let prev2 = ref 0 in
139 for i = 0 to num_bytes - 1 do
140 (* Slide window: remove byte from past *)
141 if i >= window_half then begin
142 let past_c = if i < window_half + 1 then 0
143 else Char.code (Bytes.get src (src_pos + i - window_half - 1)) in
144 let past_last = if i < window_half + 2 then 0
145 else Char.code (Bytes.get src (src_pos + i - window_half - 2)) in
146 let utf8_pos2 = utf8_position past_last past_c max_utf8 in
147 let remove_c = Char.code (Bytes.get src (src_pos + i - window_half)) in
148 histograms.(utf8_pos2).(remove_c) <- histograms.(utf8_pos2).(remove_c) - 1;
149 in_window_utf8.(utf8_pos2) <- in_window_utf8.(utf8_pos2) - 1
150 end;
151 (* Slide window: add byte from future *)
152 if i + window_half < num_bytes then begin
153 let fut_c = Char.code (Bytes.get src (src_pos + i + window_half - 1)) in
154 let fut_last = Char.code (Bytes.get src (src_pos + i + window_half - 2)) in
155 let utf8_pos2 = utf8_position fut_last fut_c max_utf8 in
156 let add_c = Char.code (Bytes.get src (src_pos + i + window_half)) in
157 histograms.(utf8_pos2).(add_c) <- histograms.(utf8_pos2).(add_c) + 1;
158 in_window_utf8.(utf8_pos2) <- in_window_utf8.(utf8_pos2) + 1
159 end;
160
161 (* Calculate cost for current byte using UTF-8 position *)
162 let c = Char.code (Bytes.get src (src_pos + i)) in
163 let utf8_pos = utf8_position !prev2 !prev1 max_utf8 in
164 let histo = max 1 histograms.(utf8_pos).(c) in
165 let in_win = max 1 in_window_utf8.(utf8_pos) in
166 let lit_cost = fast_log2 in_win -. fast_log2 histo +. 0.02905 in
167 let lit_cost = if lit_cost < 1.0 then lit_cost *. 0.5 +. 0.5 else lit_cost in
168 let prologue_length = 2000 in
169 let lit_cost =
170 if i < prologue_length then
171 lit_cost +. 0.35 +. 0.35 /. float_of_int prologue_length *. float_of_int i
172 else lit_cost
173 in
174 costs.(i + 1) <- costs.(i) +. lit_cost;
175 prev2 := !prev1;
176 prev1 := c
177 done;
178 costs
179 end else begin
180 (* Binary/ASCII mode: single histogram *)
181 let window_half = 2000 in (* Larger window for non-UTF-8 *)
182 let histogram = Array.make 256 0 in
183
184 (* Bootstrap histogram for first window_half bytes *)
185 let initial_window = min window_half num_bytes in
186 for i = 0 to initial_window - 1 do
187 let c = Char.code (Bytes.get src (src_pos + i)) in
188 histogram.(c) <- histogram.(c) + 1
189 done;
190 let in_window = ref initial_window in
191
192 costs.(0) <- 0.0;
193 for i = 0 to num_bytes - 1 do
194 (* Slide window: remove byte from past *)
195 if i >= window_half then begin
196 let old_c = Char.code (Bytes.get src (src_pos + i - window_half)) in
197 histogram.(old_c) <- histogram.(old_c) - 1;
198 decr in_window
199 end;
200 (* Slide window: add byte from future *)
201 if i + window_half < num_bytes then begin
202 let new_c = Char.code (Bytes.get src (src_pos + i + window_half)) in
203 histogram.(new_c) <- histogram.(new_c) + 1;
204 incr in_window
205 end;
206
207 (* Calculate cost for current byte *)
208 let c = Char.code (Bytes.get src (src_pos + i)) in
209 let histo = max 1 histogram.(c) in
210 let lit_cost = fast_log2 !in_window -. fast_log2 histo +. 0.029 in
211 let lit_cost = if lit_cost < 1.0 then lit_cost *. 0.5 +. 0.5 else lit_cost in
212 let prologue_length = 2000 in
213 let lit_cost =
214 if i < prologue_length then
215 lit_cost +. 0.35 +. 0.35 /. float_of_int prologue_length *. float_of_int i
216 else lit_cost
217 in
218 costs.(i + 1) <- costs.(i) +. lit_cost
219 done;
220 costs
221 end
222 end
223
224(* Initialize cost model from literal costs (first pass) *)
225let init_cost_model_from_literals src src_pos num_bytes =
226 (* Use sliding window for accurate per-position literal cost estimation *)
227 let literal_costs = estimate_literal_costs_sliding_window src src_pos num_bytes in
228
229 (* Command costs: FastLog2(11 + cmd_code) *)
230 let cost_cmd = Array.init 704 (fun i -> fast_log2 (11 + i)) in
231 let min_cost_cmd = fast_log2 11 in
232
233 (* Distance costs: FastLog2(20 + dist_code) *)
234 let cost_dist = Array.init 544 (fun i -> fast_log2 (20 + i)) in
235
236 { cost_cmd; cost_dist; literal_costs; min_cost_cmd; num_bytes }
237
238(* Initialize cost model from command histograms (second pass for Q11) *)
239let init_cost_model_from_histograms src src_pos num_bytes
240 ~lit_histogram ~cmd_histogram ~dist_histogram =
241 (* Literal costs from histogram *)
242 let lit_costs = set_cost lit_histogram 256 true in
243 let literal_costs = Array.make (num_bytes + 2) 0.0 in
244 literal_costs.(0) <- 0.0;
245 for i = 0 to num_bytes - 1 do
246 let c = Char.code (Bytes.get src (src_pos + i)) in
247 literal_costs.(i + 1) <- literal_costs.(i) +. lit_costs.(c)
248 done;
249
250 (* Command costs from histogram *)
251 let cost_cmd = set_cost cmd_histogram 704 false in
252 let min_cost_cmd = Array.fold_left min infinity cost_cmd in
253
254 (* Distance costs from histogram *)
255 let cost_dist = set_cost dist_histogram 544 false in
256
257 { cost_cmd; cost_dist; literal_costs; min_cost_cmd; num_bytes }
258
259let get_literal_cost model from_pos to_pos =
260 model.literal_costs.(to_pos) -. model.literal_costs.(from_pos)
261
262let get_command_cost model cmd_code =
263 if cmd_code < 704 then model.cost_cmd.(cmd_code) else 20.0
264
265let get_distance_cost model dist_code =
266 if dist_code < 544 then model.cost_dist.(dist_code) else 20.0
267
268(* ============================================================
269 StartPosQueue - maintains 8 best starting positions
270 ============================================================ *)
271
272type pos_data = {
273 pos : int;
274 distance_cache : int array;
275 costdiff : float;
276 cost : float;
277}
278
279type start_pos_queue = {
280 mutable q : pos_data array;
281 mutable idx : int;
282}
283
284let create_start_pos_queue () =
285 let empty = { pos = 0; distance_cache = [|16;15;11;4|]; costdiff = infinity; cost = infinity } in
286 { q = Array.make 8 empty; idx = 0 }
287
288let start_pos_queue_size queue =
289 min queue.idx 8
290
291let start_pos_queue_push queue posdata =
292 let offset = (lnot queue.idx) land 7 in
293 queue.idx <- queue.idx + 1;
294 let len = start_pos_queue_size queue in
295 queue.q.(offset) <- posdata;
296 (* Restore sorted order by costdiff *)
297 let q = queue.q in
298 for i = 1 to len - 1 do
299 let idx1 = (offset + i - 1) land 7 in
300 let idx2 = (offset + i) land 7 in
301 if q.(idx1).costdiff > q.(idx2).costdiff then begin
302 let tmp = q.(idx1) in
303 q.(idx1) <- q.(idx2);
304 q.(idx2) <- tmp
305 end
306 done
307
308let start_pos_queue_at queue k =
309 queue.q.((k - queue.idx) land 7)
310
311(* ============================================================
312 Zopfli Node - DP state at each position
313 ============================================================ *)
314
315type zopfli_node = {
316 mutable length : int; (* Copy length (lower 25 bits) + len_code modifier *)
317 mutable distance : int; (* Copy distance *)
318 mutable dcode_insert_length : int; (* Short code (upper 5 bits) + insert length *)
319 mutable cost : float; (* Cost or next pointer *)
320 mutable shortcut : int; (* Shortcut for distance cache computation *)
321}
322
323let create_zopfli_node () =
324 { length = 1; distance = 0; dcode_insert_length = 0; cost = infinity; shortcut = 0 }
325
326let zopfli_node_copy_length node = node.length land 0x1FFFFFF
327let zopfli_node_copy_distance node = node.distance
328let zopfli_node_insert_length node = node.dcode_insert_length land 0x7FFFFFF
329let zopfli_node_distance_code node =
330 let short_code = node.dcode_insert_length lsr 27 in
331 if short_code = 0 then zopfli_node_copy_distance node + 16 - 1
332 else short_code - 1
333
334let zopfli_node_command_length node =
335 zopfli_node_copy_length node + zopfli_node_insert_length node
336
337(* ============================================================
338 Hash functions and match finding
339 ============================================================ *)
340
341let[@inline always] hash4 src pos =
342 let b0 = Char.code (Bytes.unsafe_get src pos) in
343 let b1 = Char.code (Bytes.unsafe_get src (pos + 1)) in
344 let b2 = Char.code (Bytes.unsafe_get src (pos + 2)) in
345 let b3 = Char.code (Bytes.unsafe_get src (pos + 3)) in
346 let v = b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) in
347 ((v * 0x1e35a7bd) land 0xFFFFFFFF) lsr (32 - hash_bits)
348
349let[@inline always] find_match_length src a b limit =
350 let len = ref 0 in
351 let max_len = min max_match (limit - b) in
352 while !len < max_len && Bytes.get src (a + !len) = Bytes.get src (b + !len) do
353 incr len
354 done;
355 !len
356
357(* Backward match structure *)
358type backward_match = {
359 bm_distance : int;
360 bm_length : int;
361 bm_len_code : int;
362}
363
364(* Find all matches at a position, sorted by length *)
365let find_all_matches src pos src_end hash_table chain_table chain_base max_distance =
366 if pos + min_match > src_end then []
367 else begin
368 let matches = ref [] in
369 let best_len = ref (min_match - 1) in
370
371 (* Search hash chain *)
372 let h = hash4 src pos in
373 let chain_pos = ref hash_table.(h) in
374 let chain_count = ref 0 in
375
376 while !chain_pos >= 0 && !chain_count < max_tree_search_depth do
377 let distance = pos - !chain_pos in
378 if distance > 0 && distance <= max_distance then begin
379 let match_len = find_match_length src !chain_pos pos src_end in
380 if match_len > !best_len then begin
381 best_len := match_len;
382 matches := { bm_distance = distance; bm_length = match_len; bm_len_code = match_len } :: !matches
383 end
384 end;
385 let chain_idx = !chain_pos - chain_base in
386 if chain_idx >= 0 && chain_idx < Array.length chain_table then
387 chain_pos := chain_table.(chain_idx)
388 else
389 chain_pos := -1;
390 incr chain_count
391 done;
392
393 (* Sort by length ascending *)
394 List.sort (fun a b -> compare a.bm_length b.bm_length) !matches
395 end
396
397(* ============================================================
398 Insert/Copy length encoding (from brotli-c prefix.h)
399 ============================================================ *)
400
401let get_insert_length_code insert_len =
402 if insert_len < 6 then insert_len
403 else if insert_len < 130 then
404 let nbits = Lz77.log2_floor_nonzero (insert_len - 2) - 1 in
405 (nbits lsl 1) + ((insert_len - 2) lsr nbits) + 2
406 else if insert_len < 2114 then
407 Lz77.log2_floor_nonzero (insert_len - 66) + 10
408 else if insert_len < 6210 then 21
409 else if insert_len < 22594 then 22
410 else 23
411
412let get_copy_length_code copy_len =
413 if copy_len < 10 then copy_len - 2
414 else if copy_len < 134 then
415 let nbits = Lz77.log2_floor_nonzero (copy_len - 6) - 1 in
416 (nbits lsl 1) + ((copy_len - 6) lsr nbits) + 4
417 else if copy_len < 2118 then
418 Lz77.log2_floor_nonzero (copy_len - 70) + 12
419 else 23
420
421let get_insert_extra insert_code =
422 let kInsertExtraBits = [| 0;0;0;0;0;0;1;1;2;2;3;3;4;4;5;5;6;7;8;9;10;12;14;24 |] in
423 if insert_code < 24 then kInsertExtraBits.(insert_code) else 24
424
425let get_copy_extra copy_code =
426 let kCopyExtraBits = [| 0;0;0;0;0;0;0;0;1;1;2;2;3;3;4;4;5;5;6;7;8;9;10;24 |] in
427 if copy_code < 24 then kCopyExtraBits.(copy_code) else 24
428
429let combine_length_codes inscode copycode use_last_distance =
430 let inscode64 = (inscode land 0x7) lor ((inscode land 0x18) lsl 2) in
431 let copycode64 = (copycode land 0x7) lor ((copycode land 0x18) lsl 3) in
432 let c = (copycode64 land 0x38) lor inscode64 in
433 if use_last_distance && inscode < 8 && copycode < 16 then c
434 else if inscode < 8 && copycode < 16 then c lor 64
435 else c lor (128 + (if copycode >= 16 then 64 else 0))
436
437(* ============================================================
438 Distance encoding
439 ============================================================ *)
440
441let prefix_encode_copy_distance dist_code =
442 if dist_code < 16 then (dist_code, 0, 0)
443 else begin
444 let dist = dist_code - 15 in
445 let nbits = Lz77.log2_floor_nonzero dist in
446 let prefix = (nbits lsl 1) + ((dist lsr (nbits - 1)) land 1) + 12 in
447 let extra_bits = nbits - 1 in
448 let extra = dist land ((1 lsl extra_bits) - 1) in
449 (prefix, extra_bits, extra)
450 end
451
452(* ============================================================
453 Main Zopfli DP Algorithm
454 ============================================================ *)
455
456(* Compute distance cache at a position from the DP path *)
457let compute_distance_cache pos starting_dist_cache nodes =
458 let dist_cache = Array.make 4 0 in
459 let idx = ref 0 in
460 let p = ref nodes.(pos).shortcut in
461 while !idx < 4 && !p > 0 do
462 let node = nodes.(!p) in
463 let c_len = zopfli_node_copy_length node in
464 let i_len = zopfli_node_insert_length node in
465 let dist = zopfli_node_copy_distance node in
466 dist_cache.(!idx) <- dist;
467 incr idx;
468 p := nodes.(!p - c_len - i_len).shortcut
469 done;
470 for i = !idx to 3 do
471 dist_cache.(i) <- starting_dist_cache.(i - !idx)
472 done;
473 dist_cache
474
475(* Compute distance shortcut *)
476let compute_distance_shortcut block_start pos max_backward_limit nodes =
477 if pos = 0 then 0
478 else begin
479 let node = nodes.(pos) in
480 let c_len = zopfli_node_copy_length node in
481 let i_len = zopfli_node_insert_length node in
482 let dist = zopfli_node_copy_distance node in
483 if dist + c_len <= block_start + pos &&
484 dist <= max_backward_limit &&
485 zopfli_node_distance_code node > 0 then
486 pos
487 else
488 nodes.(pos - c_len - i_len).shortcut
489 end
490
491(* Update Zopfli node with new values *)
492let update_zopfli_node nodes pos start len len_code dist short_code cost =
493 let node = nodes.(pos + len) in
494 node.length <- len lor ((len + 9 - len_code) lsl 25);
495 node.distance <- dist;
496 node.dcode_insert_length <- (short_code lsl 27) lor (pos - start);
497 node.cost <- cost
498
499(* Compute minimum copy length that can improve cost *)
500let compute_minimum_copy_length start_cost nodes num_bytes pos =
501 let min_cost = ref start_cost in
502 let len = ref 2 in
503 let next_len_bucket = ref 4 in
504 let next_len_offset = ref 10 in
505 while pos + !len <= num_bytes && nodes.(pos + !len).cost <= !min_cost do
506 incr len;
507 if !len = !next_len_offset then begin
508 min_cost := !min_cost +. 1.0;
509 next_len_offset := !next_len_offset + !next_len_bucket;
510 next_len_bucket := !next_len_bucket * 2
511 end
512 done;
513 !len
514
515(* Evaluate node and push to queue if eligible *)
516let evaluate_node block_start pos max_backward_limit starting_dist_cache model queue nodes =
517 let node_cost = nodes.(pos).cost in
518 nodes.(pos).shortcut <- compute_distance_shortcut block_start pos max_backward_limit nodes;
519 if node_cost <= get_literal_cost model 0 pos then begin
520 let dist_cache = compute_distance_cache pos starting_dist_cache nodes in
521 let posdata = {
522 pos;
523 distance_cache = dist_cache;
524 costdiff = node_cost -. get_literal_cost model 0 pos;
525 cost = node_cost;
526 } in
527 start_pos_queue_push queue posdata
528 end
529
530(* Update nodes at a position - core Zopfli DP step *)
531let update_nodes num_bytes block_start pos src src_pos model
532 max_backward_limit starting_dist_cache
533 num_matches matches queue nodes max_zopfli_len max_iters =
534 let cur_ix = block_start + pos in
535 let max_distance_here = min cur_ix max_backward_limit in
536 let max_len = num_bytes - pos in
537 let result = ref 0 in
538
539 evaluate_node block_start pos max_backward_limit starting_dist_cache model queue nodes;
540
541 (* Compute minimum copy length based on best queue entry *)
542 let posdata0 = start_pos_queue_at queue 0 in
543 let min_cost = posdata0.cost +. model.min_cost_cmd +. get_literal_cost model posdata0.pos pos in
544 let min_len = compute_minimum_copy_length min_cost nodes num_bytes pos in
545
546 (* Go over starting positions in order of increasing cost difference *)
547 let queue_size = start_pos_queue_size queue in
548 for k = 0 to min (max_iters - 1) (queue_size - 1) do
549 let posdata = start_pos_queue_at queue k in
550 let start = posdata.pos in
551 let inscode = get_insert_length_code (pos - start) in
552 let start_costdiff = posdata.costdiff in
553 let base_cost = start_costdiff +. float_of_int (get_insert_extra inscode) +.
554 get_literal_cost model 0 pos in
555
556 (* Check distance cache matches first *)
557 let best_len = ref (min_len - 1) in
558 for j = 0 to 15 do
559 if !best_len < max_len then begin
560 let idx = distance_cache_index.(j) in
561 let backward = posdata.distance_cache.(idx) + distance_cache_offset.(j) in
562 if backward > 0 && backward <= max_distance_here then begin
563 let prev_ix = cur_ix - backward in
564 let match_len = find_match_length src prev_ix (src_pos + pos) (src_pos + num_bytes) in
565 if match_len >= 2 then begin
566 let dist_cost = base_cost +. get_distance_cost model j in
567 for l = !best_len + 1 to match_len do
568 let copycode = get_copy_length_code l in
569 let cmdcode = combine_length_codes inscode copycode (j = 0) in
570 let cost = (if cmdcode < 128 then base_cost else dist_cost) +.
571 float_of_int (get_copy_extra copycode) +.
572 get_command_cost model cmdcode in
573 if cost < nodes.(pos + l).cost then begin
574 update_zopfli_node nodes pos start l l backward (j + 1) cost;
575 result := max !result l
576 end;
577 best_len := l
578 done
579 end
580 end
581 end
582 done;
583
584 (* For iterations >= 2, only look at distance cache matches *)
585 if k < 2 then begin
586 (* Loop through all matches *)
587 let len = ref min_len in
588 for j = 0 to num_matches - 1 do
589 let m = matches.(j) in
590 let dist = m.bm_distance in
591 let dist_code = dist + 16 - 1 in (* Add 16 short codes *)
592 let (dist_symbol, distnumextra, _) = prefix_encode_copy_distance dist_code in
593 let dist_cost = base_cost +. float_of_int distnumextra +.
594 get_distance_cost model dist_symbol in
595 let max_match_len = m.bm_length in
596
597 (* For long matches or dictionary, try only max length *)
598 if !len < max_match_len && max_match_len > max_zopfli_len then
599 len := max_match_len;
600
601 while !len <= max_match_len do
602 let len_code = m.bm_len_code in
603 let copycode = get_copy_length_code len_code in
604 let cmdcode = combine_length_codes inscode copycode false in
605 let cost = dist_cost +. float_of_int (get_copy_extra copycode) +.
606 get_command_cost model cmdcode in
607 if cost < nodes.(pos + !len).cost then begin
608 update_zopfli_node nodes pos start !len len_code dist 0 cost;
609 result := max !result !len
610 end;
611 incr len
612 done
613 done
614 end
615 done;
616 !result
617
618(* Compute shortest path from nodes *)
619let compute_shortest_path_from_nodes num_bytes nodes =
620 let index = ref num_bytes in
621 let num_commands = ref 0 in
622 (* Find the actual end position *)
623 while zopfli_node_insert_length nodes.(!index) = 0 &&
624 nodes.(!index).length = 1 && !index > 0 do
625 decr index
626 done;
627 nodes.(!index).shortcut <- max_int; (* Mark as end *)
628 while !index > 0 do
629 let len = zopfli_node_command_length nodes.(!index) in
630 index := !index - len;
631 nodes.(!index).shortcut <- len; (* Use shortcut to store next length *)
632 incr num_commands
633 done;
634 !num_commands
635
636(* ============================================================
637 Main Zopfli function for Q10
638 ============================================================ *)
639
640let zopfli_compute_shortest_path src src_pos num_bytes starting_dist_cache =
641 let max_backward_limit = max_distance in
642 let max_zopfli_len = max_zopfli_len_quality_10 in
643 let max_iters = max_zopfli_candidates_q10 in
644
645 (* Initialize nodes *)
646 let nodes = Array.init (num_bytes + 1) (fun _ -> create_zopfli_node ()) in
647 nodes.(0).length <- 0;
648 nodes.(0).cost <- 0.0;
649
650 (* Initialize cost model from literal costs (first pass) *)
651 let model = init_cost_model_from_literals src src_pos num_bytes in
652
653 (* Hash table and chain *)
654 let hash_table = Array.make hash_size (-1) in
655 let chain_table = Array.make num_bytes (-1) in
656 let chain_base = src_pos in
657
658 (* Initialize queue *)
659 let queue = create_start_pos_queue () in
660
661 (* Main DP loop *)
662 let i = ref 0 in
663 while !i + min_match - 1 < num_bytes do
664 let pos = src_pos + !i in
665 let max_distance_here = min pos max_backward_limit in
666
667 (* Update hash table *)
668 if pos + min_match <= src_pos + num_bytes then begin
669 let h = hash4 src pos in
670 let chain_idx = !i in
671 if chain_idx < Array.length chain_table then
672 chain_table.(chain_idx) <- hash_table.(h);
673 hash_table.(h) <- pos
674 end;
675
676 (* Find all matches *)
677 let matches = find_all_matches src pos (src_pos + num_bytes)
678 hash_table chain_table chain_base max_distance_here in
679 let matches_arr = Array.of_list matches in
680 let num_matches = Array.length matches_arr in
681
682 (* Check for long match to skip *)
683 let skip =
684 if num_matches > 0 then begin
685 let last_match = matches_arr.(num_matches - 1) in
686 if last_match.bm_length > max_zopfli_len then begin
687 (* Use only longest match *)
688 matches_arr.(0) <- last_match;
689 last_match.bm_length
690 end else 0
691 end else 0
692 in
693
694 let update_skip = update_nodes num_bytes src_pos !i src src_pos model
695 max_backward_limit starting_dist_cache
696 (if skip > 0 then 1 else num_matches) matches_arr queue nodes
697 max_zopfli_len max_iters in
698
699 let actual_skip = if update_skip < brotli_long_copy_quick_step then 0 else update_skip in
700 let skip = max skip actual_skip in
701
702 if skip > 1 then begin
703 let skip_remaining = ref (skip - 1) in
704 while !skip_remaining > 0 && !i + min_match - 1 < num_bytes do
705 incr i;
706 evaluate_node src_pos !i max_backward_limit starting_dist_cache model queue nodes;
707 decr skip_remaining
708 done
709 end;
710 incr i
711 done;
712
713 (nodes, compute_shortest_path_from_nodes num_bytes nodes)
714
715(* ============================================================
716 HQ Zopfli function for Q11 (two passes with histogram refinement)
717 ============================================================ *)
718
719(* Build histograms from completed DP nodes for second pass cost refinement.
720 This matches brotli-c ZopfliCostModelSetFromCommands in backward_references_hq.c *)
721let build_histograms_from_nodes src src_pos num_bytes nodes =
722 let lit_histogram = Array.make 256 0 in
723 let cmd_histogram = Array.make 704 0 in
724 let dist_histogram = Array.make 544 0 in
725
726 (* Reconstruct path from nodes *)
727 let idx = ref num_bytes in
728 (* Find the actual end position *)
729 while zopfli_node_insert_length nodes.(!idx) = 0 &&
730 nodes.(!idx).length = 1 && !idx > 0 do
731 decr idx
732 done;
733
734 let pending_lit_start = ref 0 in
735 let end_pos = !idx in
736
737 (* Walk backwards through the path *)
738 idx := end_pos;
739 let path = ref [] in
740 while !idx > 0 do
741 let node = nodes.(!idx) in
742 let cmd_len = zopfli_node_command_length node in
743 if cmd_len > 0 then begin
744 path := !idx :: !path;
745 idx := !idx - cmd_len
746 end else
747 idx := 0
748 done;
749
750 (* Process path forward to count symbols *)
751 pending_lit_start := 0;
752 List.iter (fun end_pos ->
753 let node = nodes.(end_pos) in
754 let copy_len = zopfli_node_copy_length node in
755 let _insert_len = zopfli_node_insert_length node in
756 let dist_code = zopfli_node_distance_code node in
757
758 let copy_start = end_pos - copy_len in
759 let lit_len = copy_start - !pending_lit_start in
760
761 (* Count literals *)
762 for i = !pending_lit_start to copy_start - 1 do
763 let c = Char.code (Bytes.get src (src_pos + i)) in
764 lit_histogram.(c) <- lit_histogram.(c) + 1
765 done;
766
767 (* Count command code *)
768 let inscode = get_insert_length_code lit_len in
769 let copycode = get_copy_length_code copy_len in
770 let use_last = dist_code = 0 in
771 let cmdcode = combine_length_codes inscode copycode use_last in
772 if cmdcode < 704 then
773 cmd_histogram.(cmdcode) <- cmd_histogram.(cmdcode) + 1;
774
775 (* Count distance code if explicit *)
776 if cmdcode >= 128 then begin
777 let dc = if dist_code < 16 then dist_code
778 else begin
779 let (symbol, _, _) = prefix_encode_copy_distance (node.distance + 16 - 1) in
780 symbol
781 end
782 in
783 if dc < 544 then
784 dist_histogram.(dc) <- dist_histogram.(dc) + 1
785 end;
786
787 pending_lit_start := end_pos
788 ) !path;
789
790 (* Count remaining literals *)
791 for i = !pending_lit_start to num_bytes - 1 do
792 let c = Char.code (Bytes.get src (src_pos + i)) in
793 lit_histogram.(c) <- lit_histogram.(c) + 1
794 done;
795
796 (lit_histogram, cmd_histogram, dist_histogram)
797
798let hq_zopfli_compute_shortest_path src src_pos num_bytes starting_dist_cache =
799 let max_backward_limit = max_distance in
800 let max_zopfli_len = max_zopfli_len_quality_11 in
801 let max_iters = max_zopfli_candidates_q11 in
802
803 (* Pre-compute all matches *)
804 let hash_table = Array.make hash_size (-1) in
805 let chain_table = Array.make num_bytes (-1) in
806 let chain_base = src_pos in
807 let all_matches = Array.make num_bytes [||] in
808 let num_matches_arr = Array.make num_bytes 0 in
809
810 for i = 0 to num_bytes - min_match do
811 let pos = src_pos + i in
812 let max_distance_here = min pos max_backward_limit in
813
814 (* Update hash *)
815 if pos + min_match <= src_pos + num_bytes then begin
816 let h = hash4 src pos in
817 chain_table.(i) <- hash_table.(h);
818 hash_table.(h) <- pos
819 end;
820
821 let matches = find_all_matches src pos (src_pos + num_bytes)
822 hash_table chain_table chain_base max_distance_here in
823 let matches_arr = Array.of_list matches in
824 all_matches.(i) <- matches_arr;
825 num_matches_arr.(i) <- Array.length matches_arr;
826
827 (* Skip after very long match *)
828 if Array.length matches_arr > 0 then begin
829 let last = matches_arr.(Array.length matches_arr - 1) in
830 if last.bm_length > max_zopfli_len then begin
831 let skip = last.bm_length - 1 in
832 for j = 1 to min skip (num_bytes - min_match - i) do
833 all_matches.(i + j) <- [||];
834 num_matches_arr.(i + j) <- 0
835 done
836 end
837 end
838 done;
839
840 (* Do two iterations with histogram refinement *)
841 let final_nodes = ref (Array.init (num_bytes + 1) (fun _ -> create_zopfli_node ())) in
842 let final_count = ref 0 in
843 let first_pass_nodes = ref None in
844
845 for iteration = 0 to 1 do
846 let nodes = Array.init (num_bytes + 1) (fun _ -> create_zopfli_node ()) in
847 nodes.(0).length <- 0;
848 nodes.(0).cost <- 0.0;
849
850 let model =
851 if iteration = 0 then
852 (* First pass: use sliding window literal cost estimation *)
853 init_cost_model_from_literals src src_pos num_bytes
854 else begin
855 (* Second pass: build histograms from first pass for refined estimation *)
856 match !first_pass_nodes with
857 | None -> init_cost_model_from_literals src src_pos num_bytes
858 | Some prev_nodes ->
859 let (lit_hist, cmd_hist, dist_hist) =
860 build_histograms_from_nodes src src_pos num_bytes prev_nodes
861 in
862 init_cost_model_from_histograms src src_pos num_bytes
863 ~lit_histogram:lit_hist ~cmd_histogram:cmd_hist ~dist_histogram:dist_hist
864 end
865 in
866
867 let queue = create_start_pos_queue () in
868
869 (* Main DP loop *)
870 let i = ref 0 in
871 while !i + min_match - 1 < num_bytes do
872 let skip = update_nodes num_bytes src_pos !i src src_pos model
873 max_backward_limit starting_dist_cache
874 num_matches_arr.(!i) all_matches.(!i) queue nodes
875 max_zopfli_len max_iters in
876
877 let skip = if skip < brotli_long_copy_quick_step then 0 else skip in
878
879 if skip > 1 then begin
880 let skip_remaining = ref (skip - 1) in
881 while !skip_remaining > 0 && !i + min_match - 1 < num_bytes do
882 incr i;
883 evaluate_node src_pos !i max_backward_limit starting_dist_cache model queue nodes;
884 decr skip_remaining
885 done
886 end;
887 incr i
888 done;
889
890 (* Save first pass nodes for histogram building *)
891 if iteration = 0 then begin
892 let _ = compute_shortest_path_from_nodes num_bytes nodes in
893 first_pass_nodes := Some nodes
894 end;
895
896 final_nodes := nodes;
897 final_count := compute_shortest_path_from_nodes num_bytes nodes
898 done;
899
900 (!final_nodes, !final_count)
901
902(* ============================================================
903 Create commands from Zopfli nodes
904 ============================================================ *)
905
906let zopfli_create_commands num_bytes src_pos nodes =
907 let commands = ref [] in
908 let ring = Lz77.create_dist_ring () in
909
910 (* First, reconstruct the path using shortcut field *)
911 let path = ref [] in
912 let idx = ref num_bytes in
913 while !idx > 0 && nodes.(!idx).shortcut <> max_int do
914 path := !idx :: !path;
915 let len = nodes.(!idx).shortcut in
916 if len > 0 && len <= !idx then
917 idx := !idx - len
918 else
919 idx := 0
920 done;
921
922 (* Now process each command in the path *)
923 let pending_lit_start = ref 0 in
924 List.iter (fun end_pos ->
925 let node = nodes.(end_pos) in
926 let copy_len = zopfli_node_copy_length node in
927 let _insert_len = zopfli_node_insert_length node in
928 let distance = zopfli_node_copy_distance node in
929 let dist_code = zopfli_node_distance_code node in
930
931 let copy_start = end_pos - copy_len in
932 let lit_len = copy_start - !pending_lit_start in
933
934 (* Determine short code *)
935 let short_code =
936 if dist_code < 16 then Some dist_code
937 else None
938 in
939
940 commands := Lz77.InsertCopy {
941 lit_start = src_pos + !pending_lit_start;
942 lit_len;
943 copy_len;
944 distance;
945 dist_code = short_code;
946 } :: !commands;
947
948 (* Update ring buffer *)
949 (match short_code with
950 | Some 0 -> ()
951 | _ -> Lz77.push_distance ring distance);
952
953 pending_lit_start := end_pos
954 ) !path;
955
956 (* Handle remaining literals *)
957 if !pending_lit_start < num_bytes then
958 commands := Lz77.Literals {
959 start = src_pos + !pending_lit_start;
960 len = num_bytes - !pending_lit_start
961 } :: !commands;
962
963 List.rev !commands
964
965(* ============================================================
966 Public API
967 ============================================================ *)
968
969let generate_commands ?(quality=11) src src_pos src_len =
970 if src_len = 0 then []
971 else if src_len < min_match then
972 [Lz77.Literals { start = src_pos; len = src_len }]
973 else begin
974 let starting_dist_cache = [| 16; 15; 11; 4 |] in
975
976 let (nodes, _num_commands) =
977 if quality >= 11 then
978 hq_zopfli_compute_shortest_path src src_pos src_len starting_dist_cache
979 else
980 zopfli_compute_shortest_path src src_pos src_len starting_dist_cache
981 in
982
983 zopfli_create_commands src_len src_pos nodes
984 end