Pure OCaml implementation of the Brotli compression algorithm
1(* Brotli format constants from RFC 7932 *)
2
3(* Specification: 2. Compressed representation overview *)
4let max_number_of_block_types = 256
5
6(* Specification: 3.3. Alphabet sizes *)
7let num_literal_symbols = 256
8let num_command_symbols = 704
9let num_block_len_symbols = 26
10let num_ins_copy_codes = 24
11
12(* Specification: 3.5. Complex prefix codes *)
13let repeat_previous_code_length = 16
14let repeat_zero_code_length = 17
15let code_length_codes = 18 (* repeat_zero_code_length + 1 *)
16let initial_repeated_code_length = 8
17
18(* Specification: 7.3. Encoding of the context map *)
19let context_map_max_rle = 16
20let max_context_map_symbols = max_number_of_block_types + context_map_max_rle
21let max_block_type_symbols = max_number_of_block_types + 2
22
23(* Specification: 7.1. Context modes and context ID lookup for literals *)
24let literal_context_bits = 6
25let num_literal_contexts = 1 lsl literal_context_bits (* 64 *)
26
27(* Specification: 7.2. Context ID for distances *)
28let distance_context_bits = 2
29let num_distance_contexts = 1 lsl distance_context_bits (* 4 *)
30
31(* Specification: 4. Encoding of distances *)
32let num_distance_short_codes = 16
33let max_npostfix = 3
34let max_ndirect = 120
35let max_distance_bits = 24
36
37(* Large window brotli *)
38let large_max_distance_bits = 62
39let large_min_wbits = 10
40let large_max_wbits = 30
41
42(* Calculate distance alphabet size *)
43let distance_alphabet_size ~npostfix ~ndirect ~max_nbits =
44 num_distance_short_codes + ndirect + (max_nbits lsl (npostfix + 1))
45
46(* Standard distance alphabet size *)
47let num_distance_symbols =
48 distance_alphabet_size ~npostfix:max_npostfix ~ndirect:max_ndirect
49 ~max_nbits:large_max_distance_bits
50
51(* Maximum expressible distance with NPOSTFIX=0, NDIRECT=0 *)
52let max_distance = 0x3FFFFFC (* (1 lsl 26) - 4 *)
53
54(* Specification: 9.1. Format of the Stream Header *)
55let window_gap = 16
56let min_window_bits = 10
57let max_window_bits = 24
58
59let max_backward_limit wbits = (1 lsl wbits) - window_gap
60
61(* Huffman coding constants *)
62let huffman_max_code_length = 15
63let huffman_max_code_length_code_length = 5
64let huffman_max_table_bits = 8 (* Root table size for literals *)
65let huffman_max_command_table_bits = 10 (* Root table size for commands *)
66
67(* Code length code order (RFC 7932 section 3.5) *)
68let code_length_code_order = [|
69 1; 2; 3; 4; 0; 5; 17; 6; 16; 7; 8; 9; 10; 11; 12; 13; 14; 15
70|]
71
72(* Minimum dictionary word length *)
73let min_dictionary_word_length = 4
74let max_dictionary_word_length = 24
75
76(* Number of transforms *)
77let num_transforms = 121
78
79(* ============================================================
80 Shared utility functions
81 ============================================================ *)
82
83(* Hash multiplier for 4-byte hash functions (from brotli-c) *)
84let hash_multiplier = 0x1e35a7bd
85
86(* Fast log2 approximation matching brotli-c FastLog2.
87 Returns floor(log2(v)) as a float, or 0.0 for v <= 0. *)
88let[@inline always] fast_log2 v =
89 if v <= 0 then 0.0
90 else
91 let rec log2_floor v acc = if v <= 1 then acc else log2_floor (v lsr 1) (acc + 1) in
92 float_of_int (log2_floor v 0)
93
94(* Hash a 4-byte sequence from a bytes buffer.
95 Returns a hash value with the specified number of bits. *)
96let[@inline always] hash4_bytes src pos bits =
97 let b0 = Char.code (Bytes.unsafe_get src pos) in
98 let b1 = Char.code (Bytes.unsafe_get src (pos + 1)) in
99 let b2 = Char.code (Bytes.unsafe_get src (pos + 2)) in
100 let b3 = Char.code (Bytes.unsafe_get src (pos + 3)) in
101 let v = b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) in
102 ((v * hash_multiplier) land 0xFFFFFFFF) lsr (32 - bits)
103
104(* Hash a 4-byte sequence from a string.
105 Returns a hash value with the specified number of bits. *)
106let[@inline always] hash4_string s pos bits =
107 let b0 = Char.code (String.unsafe_get s pos) in
108 let b1 = Char.code (String.unsafe_get s (pos + 1)) in
109 let b2 = Char.code (String.unsafe_get s (pos + 2)) in
110 let b3 = Char.code (String.unsafe_get s (pos + 3)) in
111 let v = b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) in
112 ((v * hash_multiplier) land 0xFFFFFFFF) lsr (32 - bits)