Pure OCaml implementation of the Brotli compression algorithm
at main 112 lines 4.1 kB view raw
1(* Brotli format constants from RFC 7932 *) 2 3(* Specification: 2. Compressed representation overview *) 4let max_number_of_block_types = 256 5 6(* Specification: 3.3. Alphabet sizes *) 7let num_literal_symbols = 256 8let num_command_symbols = 704 9let num_block_len_symbols = 26 10let num_ins_copy_codes = 24 11 12(* Specification: 3.5. Complex prefix codes *) 13let repeat_previous_code_length = 16 14let repeat_zero_code_length = 17 15let code_length_codes = 18 (* repeat_zero_code_length + 1 *) 16let initial_repeated_code_length = 8 17 18(* Specification: 7.3. Encoding of the context map *) 19let context_map_max_rle = 16 20let max_context_map_symbols = max_number_of_block_types + context_map_max_rle 21let max_block_type_symbols = max_number_of_block_types + 2 22 23(* Specification: 7.1. Context modes and context ID lookup for literals *) 24let literal_context_bits = 6 25let num_literal_contexts = 1 lsl literal_context_bits (* 64 *) 26 27(* Specification: 7.2. Context ID for distances *) 28let distance_context_bits = 2 29let num_distance_contexts = 1 lsl distance_context_bits (* 4 *) 30 31(* Specification: 4. Encoding of distances *) 32let num_distance_short_codes = 16 33let max_npostfix = 3 34let max_ndirect = 120 35let max_distance_bits = 24 36 37(* Large window brotli *) 38let large_max_distance_bits = 62 39let large_min_wbits = 10 40let large_max_wbits = 30 41 42(* Calculate distance alphabet size *) 43let distance_alphabet_size ~npostfix ~ndirect ~max_nbits = 44 num_distance_short_codes + ndirect + (max_nbits lsl (npostfix + 1)) 45 46(* Standard distance alphabet size *) 47let num_distance_symbols = 48 distance_alphabet_size ~npostfix:max_npostfix ~ndirect:max_ndirect 49 ~max_nbits:large_max_distance_bits 50 51(* Maximum expressible distance with NPOSTFIX=0, NDIRECT=0 *) 52let max_distance = 0x3FFFFFC (* (1 lsl 26) - 4 *) 53 54(* Specification: 9.1. Format of the Stream Header *) 55let window_gap = 16 56let min_window_bits = 10 57let max_window_bits = 24 58 59let max_backward_limit wbits = (1 lsl wbits) - window_gap 60 61(* Huffman coding constants *) 62let huffman_max_code_length = 15 63let huffman_max_code_length_code_length = 5 64let huffman_max_table_bits = 8 (* Root table size for literals *) 65let huffman_max_command_table_bits = 10 (* Root table size for commands *) 66 67(* Code length code order (RFC 7932 section 3.5) *) 68let code_length_code_order = [| 69 1; 2; 3; 4; 0; 5; 17; 6; 16; 7; 8; 9; 10; 11; 12; 13; 14; 15 70|] 71 72(* Minimum dictionary word length *) 73let min_dictionary_word_length = 4 74let max_dictionary_word_length = 24 75 76(* Number of transforms *) 77let num_transforms = 121 78 79(* ============================================================ 80 Shared utility functions 81 ============================================================ *) 82 83(* Hash multiplier for 4-byte hash functions (from brotli-c) *) 84let hash_multiplier = 0x1e35a7bd 85 86(* Fast log2 approximation matching brotli-c FastLog2. 87 Returns floor(log2(v)) as a float, or 0.0 for v <= 0. *) 88let[@inline always] fast_log2 v = 89 if v <= 0 then 0.0 90 else 91 let rec log2_floor v acc = if v <= 1 then acc else log2_floor (v lsr 1) (acc + 1) in 92 float_of_int (log2_floor v 0) 93 94(* Hash a 4-byte sequence from a bytes buffer. 95 Returns a hash value with the specified number of bits. *) 96let[@inline always] hash4_bytes src pos bits = 97 let b0 = Char.code (Bytes.unsafe_get src pos) in 98 let b1 = Char.code (Bytes.unsafe_get src (pos + 1)) in 99 let b2 = Char.code (Bytes.unsafe_get src (pos + 2)) in 100 let b3 = Char.code (Bytes.unsafe_get src (pos + 3)) in 101 let v = b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) in 102 ((v * hash_multiplier) land 0xFFFFFFFF) lsr (32 - bits) 103 104(* Hash a 4-byte sequence from a string. 105 Returns a hash value with the specified number of bits. *) 106let[@inline always] hash4_string s pos bits = 107 let b0 = Char.code (String.unsafe_get s pos) in 108 let b1 = Char.code (String.unsafe_get s (pos + 1)) in 109 let b2 = Char.code (String.unsafe_get s (pos + 2)) in 110 let b3 = Char.code (String.unsafe_get s (pos + 3)) in 111 let v = b0 lor (b1 lsl 8) lor (b2 lsl 16) lor (b3 lsl 24) in 112 ((v * hash_multiplier) land 0xFFFFFFFF) lsr (32 - bits)