Pure OCaml implementation of the Brotli compression algorithm
1(* Variable-width bit reading with little-endian semantics for Brotli *)
2
3type t = {
4 src : bytes;
5 src_len : int;
6 mutable byte_pos : int;
7 mutable bit_pos : int; (* 0-7: bits already read from current byte *)
8}
9
10exception End_of_input
11
12(* Bit masks for extracting n bits *)
13let[@inline always] bit_mask n =
14 (1 lsl n) - 1
15
16(* Get byte at position, returns 0 if past end (zero-padding) *)
17let[@inline always] get_byte t pos =
18 if pos < t.src_len then
19 Char.code (Bytes.unsafe_get t.src pos)
20 else
21 0
22
23let create ~src ~pos ~len =
24 { src; src_len = pos + len; byte_pos = pos; bit_pos = 0 }
25
26let create_from_string s =
27 create ~src:(Bytes.unsafe_of_string s) ~pos:0 ~len:(String.length s)
28
29let reset t =
30 t.byte_pos <- 0;
31 t.bit_pos <- 0
32
33let position t =
34 t.byte_pos * 8 + t.bit_pos
35
36let bytes_remaining t =
37 let total_bits = (t.src_len - t.byte_pos) * 8 - t.bit_pos in
38 (total_bits + 7) / 8
39
40let has_more t =
41 t.byte_pos < t.src_len || t.bit_pos > 0
42
43(* Read n bits (1-24) without advancing the position - optimized for common cases *)
44let[@inline] peek_bits t n_bits =
45 if n_bits = 0 then 0
46 else begin
47 let bit_offset = t.bit_pos in
48 let byte_pos = t.byte_pos in
49 let bits_needed = n_bits + bit_offset in
50 (* Optimized path for reading up to 24 bits (most common) *)
51 if bits_needed <= 24 && byte_pos + 2 < t.src_len then begin
52 (* Read 3 bytes at once *)
53 let b0 = Char.code (Bytes.unsafe_get t.src byte_pos) in
54 let b1 = Char.code (Bytes.unsafe_get t.src (byte_pos + 1)) in
55 let b2 = Char.code (Bytes.unsafe_get t.src (byte_pos + 2)) in
56 let combined = b0 lor (b1 lsl 8) lor (b2 lsl 16) in
57 (combined lsr bit_offset) land bit_mask n_bits
58 end
59 else begin
60 (* Fallback for edge cases and larger reads *)
61 let result = ref 0 in
62 let bytes_shift = ref 0 in
63 let buf_pos = ref byte_pos in
64 while !bytes_shift < bits_needed do
65 result := !result lor (get_byte t !buf_pos lsl !bytes_shift);
66 bytes_shift := !bytes_shift + 8;
67 incr buf_pos
68 done;
69 (!result lsr bit_offset) land bit_mask n_bits
70 end
71 end
72
73(* Advance by n bits without reading *)
74let skip_bits t n_bits =
75 if n_bits > 0 then begin
76 let next_in_bits = t.bit_pos + n_bits in
77 t.bit_pos <- next_in_bits land 7;
78 t.byte_pos <- t.byte_pos + (next_in_bits lsr 3)
79 end
80
81(* Read n bits (1-24) and advance position *)
82let[@inline] read_bits t n_bits =
83 let value = peek_bits t n_bits in
84 skip_bits t n_bits;
85 value
86
87(* Read a single bit *)
88let[@inline] read_bit t =
89 read_bits t 1
90
91(* Advance to next byte boundary *)
92let align_to_byte t =
93 if t.bit_pos <> 0 then begin
94 t.bit_pos <- 0;
95 t.byte_pos <- t.byte_pos + 1
96 end
97
98(* Copy n bytes to destination buffer, first aligning to byte boundary *)
99let copy_bytes t ~dst ~dst_pos ~len =
100 align_to_byte t;
101 if len > 0 then begin
102 let src_pos = t.byte_pos in
103 if src_pos + len > t.src_len then
104 raise End_of_input;
105 Bytes.blit t.src src_pos dst dst_pos len;
106 t.byte_pos <- src_pos + len
107 end
108
109(* Check if we have enough bits remaining *)
110let check_bits t n_bits =
111 let total_bits = (t.src_len - t.byte_pos) * 8 - t.bit_pos in
112 total_bits >= n_bits