Pure OCaml implementation of the Brotli compression algorithm
1(* Context modeling lookup tables for Brotli (RFC 7932 Section 7) *)
2
3(* Context modes *)
4type mode = LSB6 | MSB6 | UTF8 | SIGNED
5
6let mode_of_int = function
7 | 0 -> LSB6
8 | 1 -> MSB6
9 | 2 -> UTF8
10 | 3 -> SIGNED
11 | _ -> invalid_arg "Invalid context mode"
12
13let int_of_mode = function
14 | LSB6 -> 0
15 | MSB6 -> 1
16 | UTF8 -> 2
17 | SIGNED -> 3
18
19(* The master lookup table - all context modes combined *)
20let lookup = [|
21 (* CONTEXT_UTF8, last byte (offset 0) *)
22 (* ASCII range *)
23 0; 0; 0; 0; 0; 0; 0; 0; 0; 4; 4; 0; 0; 4; 0; 0;
24 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
25 8; 12; 16; 12; 12; 20; 12; 16; 24; 28; 12; 12; 32; 12; 36; 12;
26 44; 44; 44; 44; 44; 44; 44; 44; 44; 44; 32; 32; 24; 40; 28; 12;
27 12; 48; 52; 52; 52; 48; 52; 52; 52; 48; 52; 52; 52; 52; 52; 48;
28 52; 52; 52; 52; 52; 48; 52; 52; 52; 52; 52; 24; 12; 28; 12; 12;
29 12; 56; 60; 60; 60; 56; 60; 60; 60; 56; 60; 60; 60; 60; 60; 56;
30 60; 60; 60; 60; 60; 56; 60; 60; 60; 60; 60; 24; 12; 28; 12; 0;
31 (* UTF8 continuation byte range *)
32 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1;
33 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1;
34 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1;
35 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1;
36 (* UTF8 lead byte range *)
37 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3;
38 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3;
39 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3;
40 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3;
41
42 (* CONTEXT_UTF8 second last byte (offset 256) *)
43 (* ASCII range *)
44 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
45 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
46 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
47 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 1; 1; 1; 1; 1; 1;
48 1; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
49 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 1; 1; 1; 1; 1;
50 1; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
51 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 1; 1; 1; 1; 0;
52 (* UTF8 continuation byte range *)
53 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
54 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
55 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
56 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
57 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
58 (* UTF8 lead byte range *)
59 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
60 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
61 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
62
63 (* CONTEXT_SIGNED, second last byte (offset 512) *)
64 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1;
65 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
66 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
67 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2;
68 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
69 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
70 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
71 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3;
72 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4;
73 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4;
74 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4;
75 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4;
76 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5;
77 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5;
78 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5;
79 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 7;
80
81 (* CONTEXT_SIGNED, last byte (offset 768) - same as above shifted by 3 bits *)
82 0; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8;
83 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16;
84 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16;
85 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16;
86 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24;
87 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24;
88 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24;
89 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24;
90 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32;
91 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32;
92 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32;
93 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32;
94 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40;
95 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40;
96 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40;
97 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 56;
98
99 (* CONTEXT_LSB6, last byte (offset 1024) *)
100 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15;
101 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31;
102 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47;
103 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63;
104 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15;
105 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31;
106 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47;
107 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63;
108 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15;
109 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31;
110 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47;
111 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63;
112 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15;
113 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31;
114 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47;
115 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63;
116
117 (* CONTEXT_MSB6, last byte (offset 1280) *)
118 0; 0; 0; 0; 1; 1; 1; 1; 2; 2; 2; 2; 3; 3; 3; 3;
119 4; 4; 4; 4; 5; 5; 5; 5; 6; 6; 6; 6; 7; 7; 7; 7;
120 8; 8; 8; 8; 9; 9; 9; 9; 10; 10; 10; 10; 11; 11; 11; 11;
121 12; 12; 12; 12; 13; 13; 13; 13; 14; 14; 14; 14; 15; 15; 15; 15;
122 16; 16; 16; 16; 17; 17; 17; 17; 18; 18; 18; 18; 19; 19; 19; 19;
123 20; 20; 20; 20; 21; 21; 21; 21; 22; 22; 22; 22; 23; 23; 23; 23;
124 24; 24; 24; 24; 25; 25; 25; 25; 26; 26; 26; 26; 27; 27; 27; 27;
125 28; 28; 28; 28; 29; 29; 29; 29; 30; 30; 30; 30; 31; 31; 31; 31;
126 32; 32; 32; 32; 33; 33; 33; 33; 34; 34; 34; 34; 35; 35; 35; 35;
127 36; 36; 36; 36; 37; 37; 37; 37; 38; 38; 38; 38; 39; 39; 39; 39;
128 40; 40; 40; 40; 41; 41; 41; 41; 42; 42; 42; 42; 43; 43; 43; 43;
129 44; 44; 44; 44; 45; 45; 45; 45; 46; 46; 46; 46; 47; 47; 47; 47;
130 48; 48; 48; 48; 49; 49; 49; 49; 50; 50; 50; 50; 51; 51; 51; 51;
131 52; 52; 52; 52; 53; 53; 53; 53; 54; 54; 54; 54; 55; 55; 55; 55;
132 56; 56; 56; 56; 57; 57; 57; 57; 58; 58; 58; 58; 59; 59; 59; 59;
133 60; 60; 60; 60; 61; 61; 61; 61; 62; 62; 62; 62; 63; 63; 63; 63;
134
135 (* CONTEXT_{M,L}SB6, second last byte (offset 1536) - all zeros *)
136 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
137 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
138 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
139 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
140 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
141 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
142 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
143 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
144 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
145 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
146 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
147 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
148 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
149 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
150 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
151 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0;
152|]
153
154(* Offsets into lookup table for each mode *)
155let lookup_offsets = [|
156 (* LSB6 *) 1024; 1536;
157 (* MSB6 *) 1280; 1536;
158 (* UTF8 *) 0; 256;
159 (* SIGNED *) 768; 512;
160|]
161
162(* Get context ID from previous two bytes *)
163let[@inline] get_context mode ~prev_byte1 ~prev_byte2 =
164 let mode_idx = int_of_mode mode in
165 let offset1 = lookup_offsets.(mode_idx * 2) in
166 let offset2 = lookup_offsets.(mode_idx * 2 + 1) in
167 lookup.(offset1 + prev_byte1) lor lookup.(offset2 + prev_byte2)
168
169(* Distance context based on copy length *)
170let[@inline] distance_context copy_length =
171 if copy_length > 4 then 3 else copy_length - 2