Pure OCaml implementation of the Brotli compression algorithm
at main 171 lines 8.1 kB view raw
1(* Context modeling lookup tables for Brotli (RFC 7932 Section 7) *) 2 3(* Context modes *) 4type mode = LSB6 | MSB6 | UTF8 | SIGNED 5 6let mode_of_int = function 7 | 0 -> LSB6 8 | 1 -> MSB6 9 | 2 -> UTF8 10 | 3 -> SIGNED 11 | _ -> invalid_arg "Invalid context mode" 12 13let int_of_mode = function 14 | LSB6 -> 0 15 | MSB6 -> 1 16 | UTF8 -> 2 17 | SIGNED -> 3 18 19(* The master lookup table - all context modes combined *) 20let lookup = [| 21 (* CONTEXT_UTF8, last byte (offset 0) *) 22 (* ASCII range *) 23 0; 0; 0; 0; 0; 0; 0; 0; 0; 4; 4; 0; 0; 4; 0; 0; 24 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 25 8; 12; 16; 12; 12; 20; 12; 16; 24; 28; 12; 12; 32; 12; 36; 12; 26 44; 44; 44; 44; 44; 44; 44; 44; 44; 44; 32; 32; 24; 40; 28; 12; 27 12; 48; 52; 52; 52; 48; 52; 52; 52; 48; 52; 52; 52; 52; 52; 48; 28 52; 52; 52; 52; 52; 48; 52; 52; 52; 52; 52; 24; 12; 28; 12; 12; 29 12; 56; 60; 60; 60; 56; 60; 60; 60; 56; 60; 60; 60; 60; 60; 56; 30 60; 60; 60; 60; 60; 56; 60; 60; 60; 60; 60; 24; 12; 28; 12; 0; 31 (* UTF8 continuation byte range *) 32 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 33 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 34 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 35 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 0; 1; 36 (* UTF8 lead byte range *) 37 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 38 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 39 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 40 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 2; 3; 41 42 (* CONTEXT_UTF8 second last byte (offset 256) *) 43 (* ASCII range *) 44 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 45 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 46 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 47 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 1; 1; 1; 1; 1; 1; 48 1; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 49 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 1; 1; 1; 1; 1; 50 1; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 51 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 1; 1; 1; 1; 0; 52 (* UTF8 continuation byte range *) 53 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 54 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 55 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 56 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 57 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 58 (* UTF8 lead byte range *) 59 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 60 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 61 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 62 63 (* CONTEXT_SIGNED, second last byte (offset 512) *) 64 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 65 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 66 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 67 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 68 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 69 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 70 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 71 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 3; 72 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 73 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 74 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 75 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 4; 76 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 77 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 78 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 5; 79 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 6; 7; 80 81 (* CONTEXT_SIGNED, last byte (offset 768) - same as above shifted by 3 bits *) 82 0; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 8; 83 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 84 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 85 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 16; 86 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 87 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 88 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 89 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 24; 90 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 91 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 92 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 93 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 32; 94 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 95 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 96 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 40; 97 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 48; 56; 98 99 (* CONTEXT_LSB6, last byte (offset 1024) *) 100 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 101 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 102 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 103 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63; 104 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 105 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 106 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 107 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63; 108 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 109 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 110 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 111 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63; 112 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 113 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 114 32; 33; 34; 35; 36; 37; 38; 39; 40; 41; 42; 43; 44; 45; 46; 47; 115 48; 49; 50; 51; 52; 53; 54; 55; 56; 57; 58; 59; 60; 61; 62; 63; 116 117 (* CONTEXT_MSB6, last byte (offset 1280) *) 118 0; 0; 0; 0; 1; 1; 1; 1; 2; 2; 2; 2; 3; 3; 3; 3; 119 4; 4; 4; 4; 5; 5; 5; 5; 6; 6; 6; 6; 7; 7; 7; 7; 120 8; 8; 8; 8; 9; 9; 9; 9; 10; 10; 10; 10; 11; 11; 11; 11; 121 12; 12; 12; 12; 13; 13; 13; 13; 14; 14; 14; 14; 15; 15; 15; 15; 122 16; 16; 16; 16; 17; 17; 17; 17; 18; 18; 18; 18; 19; 19; 19; 19; 123 20; 20; 20; 20; 21; 21; 21; 21; 22; 22; 22; 22; 23; 23; 23; 23; 124 24; 24; 24; 24; 25; 25; 25; 25; 26; 26; 26; 26; 27; 27; 27; 27; 125 28; 28; 28; 28; 29; 29; 29; 29; 30; 30; 30; 30; 31; 31; 31; 31; 126 32; 32; 32; 32; 33; 33; 33; 33; 34; 34; 34; 34; 35; 35; 35; 35; 127 36; 36; 36; 36; 37; 37; 37; 37; 38; 38; 38; 38; 39; 39; 39; 39; 128 40; 40; 40; 40; 41; 41; 41; 41; 42; 42; 42; 42; 43; 43; 43; 43; 129 44; 44; 44; 44; 45; 45; 45; 45; 46; 46; 46; 46; 47; 47; 47; 47; 130 48; 48; 48; 48; 49; 49; 49; 49; 50; 50; 50; 50; 51; 51; 51; 51; 131 52; 52; 52; 52; 53; 53; 53; 53; 54; 54; 54; 54; 55; 55; 55; 55; 132 56; 56; 56; 56; 57; 57; 57; 57; 58; 58; 58; 58; 59; 59; 59; 59; 133 60; 60; 60; 60; 61; 61; 61; 61; 62; 62; 62; 62; 63; 63; 63; 63; 134 135 (* CONTEXT_{M,L}SB6, second last byte (offset 1536) - all zeros *) 136 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 137 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 138 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 139 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 140 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 141 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 142 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 143 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 144 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 145 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 146 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 147 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 148 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 149 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 150 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 151 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 152|] 153 154(* Offsets into lookup table for each mode *) 155let lookup_offsets = [| 156 (* LSB6 *) 1024; 1536; 157 (* MSB6 *) 1280; 1536; 158 (* UTF8 *) 0; 256; 159 (* SIGNED *) 768; 512; 160|] 161 162(* Get context ID from previous two bytes *) 163let[@inline] get_context mode ~prev_byte1 ~prev_byte2 = 164 let mode_idx = int_of_mode mode in 165 let offset1 = lookup_offsets.(mode_idx * 2) in 166 let offset2 = lookup_offsets.(mode_idx * 2 + 1) in 167 lookup.(offset1 + prev_byte1) lor lookup.(offset2 + prev_byte2) 168 169(* Distance context based on copy length *) 170let[@inline] distance_context copy_length = 171 if copy_length > 4 then 3 else copy_length - 2