(** Zstandard format constants (RFC 8878) *) (** Magic numbers *) let zstd_magic_number = 0xFD2FB528l let dict_magic_number = 0xEC30A437l let skippable_magic_start = 0x184D2A50l let skippable_magic_mask = 0xFFFFFFF0l let skippable_header_size = 8 (** Block size limits *) let block_size_max = 128 * 1024 (* 128 KB *) let max_literals_size = block_size_max (** Maximum values *) let max_window_log = 31 let min_window_log = 10 let max_huffman_bits = 11 let max_fse_accuracy_log = 15 let max_huffman_symbols = 256 let max_fse_symbols = 256 (** Block types *) type block_type = | Raw_block | RLE_block | Compressed_block | Reserved_block let block_type_of_int = function | 0 -> Raw_block | 1 -> RLE_block | 2 -> Compressed_block | _ -> Reserved_block (* Block type integer values for encoding *) let block_raw = 0 let block_rle = 1 let block_compressed = 2 (** Literals block types *) type literals_block_type = | Raw_literals | RLE_literals | Compressed_literals | Treeless_literals let literals_block_type_of_int = function | 0 -> Raw_literals | 1 -> RLE_literals | 2 -> Compressed_literals | _ -> Treeless_literals (** Sequence compression modes *) type seq_mode = | Predefined_mode | RLE_mode | FSE_mode | Repeat_mode let seq_mode_of_int = function | 0 -> Predefined_mode | 1 -> RLE_mode | 2 -> FSE_mode | _ -> Repeat_mode (** Default FSE distribution tables for predefined mode *) (* Literals length default distribution (accuracy log 6, 64 states) *) let ll_default_distribution = [| 4; 3; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 2; 1; 1; 1; 2; 2; 2; 2; 2; 2; 2; 2; 2; 3; 2; 1; 1; 1; 1; 1; -1; -1; -1; -1 |] let ll_default_accuracy_log = 6 let ll_max_accuracy_log = 9 (* Match length default distribution (accuracy log 6, 64 states) *) let ml_default_distribution = [| 1; 4; 3; 2; 2; 2; 2; 2; 2; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; -1; -1; -1; -1; -1; -1; -1 |] let ml_default_accuracy_log = 6 let ml_max_accuracy_log = 9 (* Offset default distribution (accuracy log 5, 32 states) *) let of_default_distribution = [| 1; 1; 1; 1; 1; 1; 2; 2; 2; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; -1; -1; -1; -1; -1 |] let of_default_accuracy_log = 5 let of_max_accuracy_log = 8 (** Sequence code baselines and extra bits *) (* Literals length: code 0-35 *) let ll_baselines = [| 0; 1; 2; 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 18; 20; 22; 24; 28; 32; 40; 48; 64; 128; 256; 512; 1024; 2048; 4096; 8192; 16384; 32768; 65536 |] let ll_extra_bits = [| 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 1; 1; 1; 1; 2; 2; 3; 3; 4; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16 |] let ll_max_code = 35 (* Match length: code 0-52 *) let ml_baselines = [| 3; 4; 5; 6; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16; 17; 18; 19; 20; 21; 22; 23; 24; 25; 26; 27; 28; 29; 30; 31; 32; 33; 34; 35; 37; 39; 41; 43; 47; 51; 59; 67; 83; 99; 131; 259; 515; 1027; 2051; 4099; 8195; 16387; 32771; 65539 |] let ml_extra_bits = [| 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 0; 1; 1; 1; 1; 2; 2; 3; 3; 4; 4; 5; 7; 8; 9; 10; 11; 12; 13; 14; 15; 16 |] let ml_max_code = 52 (* Offset codes: the code is the number of bits to read *) let of_max_code = 31 (** Initial repeat offsets *) let initial_repeat_offsets = [| 1; 4; 8 |] (** Error types *) type error = | Invalid_magic_number | Invalid_frame_header | Invalid_block_type | Invalid_block_size | Invalid_literals_header | Invalid_huffman_table | Invalid_fse_table | Invalid_sequence_header | Invalid_offset | Invalid_match_length | Truncated_input | Output_too_small | Checksum_mismatch | Dictionary_mismatch | Corruption exception Zstd_error of error let error_message = function | Invalid_magic_number -> "Invalid magic number" | Invalid_frame_header -> "Invalid frame header" | Invalid_block_type -> "Invalid block type" | Invalid_block_size -> "Invalid block size" | Invalid_literals_header -> "Invalid literals header" | Invalid_huffman_table -> "Invalid Huffman table" | Invalid_fse_table -> "Invalid FSE table" | Invalid_sequence_header -> "Invalid sequence header" | Invalid_offset -> "Invalid offset" | Invalid_match_length -> "Invalid match length" | Truncated_input -> "Truncated input" | Output_too_small -> "Output buffer too small" | Checksum_mismatch -> "Checksum mismatch" | Dictionary_mismatch -> "Dictionary mismatch" | Corruption -> "Data corruption detected"