Pure OCaml implementation of the Brotli compression algorithm
1(* Prefix code tables for Brotli variable-length values *)
2
3(* A prefix code range: [offset, offset + 2^nbits) *)
4type prefix = {
5 offset : int;
6 nbits : int;
7}
8
9(* Lookup tables for command code -> insert/copy code indices *)
10let insert_range_lut = [| 0; 0; 8; 8; 0; 16; 8; 16; 16 |]
11let copy_range_lut = [| 0; 8; 0; 8; 16; 0; 16; 8; 16 |]
12
13(* Block length prefix codes (26 entries) *)
14let block_length_prefix = [|
15 { offset = 1; nbits = 2 }; { offset = 5; nbits = 2 };
16 { offset = 9; nbits = 2 }; { offset = 13; nbits = 2 };
17 { offset = 17; nbits = 3 }; { offset = 25; nbits = 3 };
18 { offset = 33; nbits = 3 }; { offset = 41; nbits = 3 };
19 { offset = 49; nbits = 4 }; { offset = 65; nbits = 4 };
20 { offset = 81; nbits = 4 }; { offset = 97; nbits = 4 };
21 { offset = 113; nbits = 5 }; { offset = 145; nbits = 5 };
22 { offset = 177; nbits = 5 }; { offset = 209; nbits = 5 };
23 { offset = 241; nbits = 6 }; { offset = 305; nbits = 6 };
24 { offset = 369; nbits = 7 }; { offset = 497; nbits = 8 };
25 { offset = 753; nbits = 9 }; { offset = 1265; nbits = 10 };
26 { offset = 2289; nbits = 11 }; { offset = 4337; nbits = 12 };
27 { offset = 8433; nbits = 13 }; { offset = 16625; nbits = 24 };
28|]
29
30(* Insert length prefix codes (24 entries) *)
31let insert_length_prefix = [|
32 { offset = 0; nbits = 0 }; { offset = 1; nbits = 0 };
33 { offset = 2; nbits = 0 }; { offset = 3; nbits = 0 };
34 { offset = 4; nbits = 0 }; { offset = 5; nbits = 0 };
35 { offset = 6; nbits = 1 }; { offset = 8; nbits = 1 };
36 { offset = 10; nbits = 2 }; { offset = 14; nbits = 2 };
37 { offset = 18; nbits = 3 }; { offset = 26; nbits = 3 };
38 { offset = 34; nbits = 4 }; { offset = 50; nbits = 4 };
39 { offset = 66; nbits = 5 }; { offset = 98; nbits = 5 };
40 { offset = 130; nbits = 6 }; { offset = 194; nbits = 7 };
41 { offset = 322; nbits = 8 }; { offset = 578; nbits = 9 };
42 { offset = 1090; nbits = 10 }; { offset = 2114; nbits = 12 };
43 { offset = 6210; nbits = 14 }; { offset = 22594; nbits = 24 };
44|]
45
46(* Copy length prefix codes (24 entries) *)
47let copy_length_prefix = [|
48 { offset = 2; nbits = 0 }; { offset = 3; nbits = 0 };
49 { offset = 4; nbits = 0 }; { offset = 5; nbits = 0 };
50 { offset = 6; nbits = 0 }; { offset = 7; nbits = 0 };
51 { offset = 8; nbits = 0 }; { offset = 9; nbits = 0 };
52 { offset = 10; nbits = 1 }; { offset = 12; nbits = 1 };
53 { offset = 14; nbits = 2 }; { offset = 18; nbits = 2 };
54 { offset = 22; nbits = 3 }; { offset = 30; nbits = 3 };
55 { offset = 38; nbits = 4 }; { offset = 54; nbits = 4 };
56 { offset = 70; nbits = 5 }; { offset = 102; nbits = 5 };
57 { offset = 134; nbits = 6 }; { offset = 198; nbits = 7 };
58 { offset = 326; nbits = 8 }; { offset = 582; nbits = 9 };
59 { offset = 1094; nbits = 10 }; { offset = 2118; nbits = 24 };
60|]
61
62(* Decode a block length from prefix code *)
63let[@inline] decode_block_length br code =
64 let p = block_length_prefix.(code) in
65 if p.nbits = 0 then p.offset
66 else p.offset + Bit_reader.read_bits br p.nbits
67
68(* Decode insert length from prefix code *)
69let[@inline] decode_insert_length br code =
70 let p = insert_length_prefix.(code) in
71 if p.nbits = 0 then p.offset
72 else p.offset + Bit_reader.read_bits br p.nbits
73
74(* Decode copy length from prefix code *)
75let[@inline] decode_copy_length br code =
76 let p = copy_length_prefix.(code) in
77 if p.nbits = 0 then p.offset
78 else p.offset + Bit_reader.read_bits br p.nbits
79
80(* Get insert and copy length codes from command code *)
81let[@inline] get_insert_length_code cmd_code =
82 let range_idx = cmd_code lsr 6 in
83 let insert_code_base = insert_range_lut.(range_idx) in
84 insert_code_base + ((cmd_code lsr 3) land 7)
85
86let[@inline] get_copy_length_code cmd_code =
87 let range_idx = cmd_code lsr 6 in
88 let copy_code_base = copy_range_lut.(range_idx) in
89 copy_code_base + (cmd_code land 7)
90
91(* Command code to distance context mapping *)
92let[@inline] command_code_to_distance_context cmd_code =
93 if cmd_code < 128 then 0
94 else if cmd_code < 256 then 1
95 else if cmd_code < 384 then 2
96 else 3