Zstd compression in pure OCaml
at main 183 lines 6.2 kB view raw
1(** Pure OCaml implementation of Zstandard compression (RFC 8878). 2 3 {2 Decoder} 4 5 The decoder is fully compliant with the zstd format specification and can 6 decompress any valid zstd frame produced by any conforming encoder. It 7 supports all block types (raw, RLE, compressed), Huffman and FSE entropy 8 coding, and content checksums. 9 10 {2 Encoder} 11 12 The encoder produces valid zstd frames that can be decompressed by any 13 conforming decoder (including the reference C implementation). Current 14 encoding strategy: 15 16 - {b RLE blocks}: Data consisting of a single repeated byte is encoded as 17 RLE blocks (4 bytes total regardless of decompressed size) 18 - {b Raw blocks}: All other data is stored as raw (uncompressed) blocks 19 20 This means the encoder always produces valid output, but compression ratios 21 are not optimal for most data. The encoder is suitable for: 22 - Applications where decompression speed matters more than compressed size 23 - Data that is already compressed or has high entropy 24 - Testing zstd decoders 25 26 Future improvements planned: 27 - LZ77 match finding with sequence encoding 28 - Huffman compression for literals 29 - FSE-compressed blocks for better ratios 30 31 {2 Dictionary Support} 32 33 Dictionary decompression is supported. Dictionary compression is not yet 34 implemented (falls back to regular compression). *) 35 36type error = Constants.error = 37 | Invalid_magic_number 38 | Invalid_frame_header 39 | Invalid_block_type 40 | Invalid_block_size 41 | Invalid_literals_header 42 | Invalid_huffman_table 43 | Invalid_fse_table 44 | Invalid_sequence_header 45 | Invalid_offset 46 | Invalid_match_length 47 | Truncated_input 48 | Output_too_small 49 | Checksum_mismatch 50 | Dictionary_mismatch 51 | Corruption 52 53exception Zstd_error = Constants.Zstd_error 54 55type dictionary = Zstd_decode.dictionary 56 57let error_message = Constants.error_message 58 59(** Check if data starts with zstd magic number *) 60let is_zstd_frame s = 61 if String.length s < 4 then false 62 else 63 let b = Bytes.unsafe_of_string s in 64 let magic = Bytes.get_int32_le b 0 in 65 magic = Constants.zstd_magic_number 66 67(** Get decompressed size from frame header *) 68let get_decompressed_size s = 69 if String.length s < 5 then None 70 else 71 let b = Bytes.unsafe_of_string s in 72 Zstd_decode.get_decompressed_size b ~pos:0 ~len:(String.length s) 73 74(** Calculate maximum compressed size *) 75let compress_bound src_len = 76 (* zstd guarantees compressed size <= src_len + (src_len >> 8) + constant *) 77 src_len + (src_len lsr 8) + 64 78 79(** Load dictionary *) 80let load_dictionary s = 81 let b = Bytes.of_string s in 82 Zstd_decode.parse_dictionary b ~pos:0 ~len:(String.length s) 83 84(** Decompress bytes *) 85let decompress_bytes_exn src = 86 Zstd_decode.decompress_frame src ~pos:0 ~len:(Bytes.length src) 87 88let decompress_bytes src = 89 try Ok (decompress_bytes_exn src) 90 with Zstd_error e -> Error (error_message e) 91 92(** Decompress string *) 93let decompress_exn s = 94 let src = Bytes.unsafe_of_string s in 95 let result = Zstd_decode.decompress_frame src ~pos:0 ~len:(String.length s) in 96 Bytes.unsafe_to_string result 97 98let decompress s = 99 try Ok (decompress_exn s) 100 with Zstd_error e -> Error (error_message e) 101 102(** Decompress with dictionary *) 103let decompress_with_dict_exn dict s = 104 let src = Bytes.unsafe_of_string s in 105 let result = Zstd_decode.decompress_frame ~dict src ~pos:0 ~len:(String.length s) in 106 Bytes.unsafe_to_string result 107 108let decompress_with_dict dict s = 109 try Ok (decompress_with_dict_exn dict s) 110 with Zstd_error e -> Error (error_message e) 111 112(** Decompress into pre-allocated buffer *) 113let decompress_into ~src ~src_pos ~src_len ~dst ~dst_pos = 114 let result = Zstd_decode.decompress_frame src ~pos:src_pos ~len:src_len in 115 let result_len = Bytes.length result in 116 if dst_pos + result_len > Bytes.length dst then 117 raise (Zstd_error Output_too_small); 118 Bytes.blit result 0 dst dst_pos result_len; 119 result_len 120 121(** Compress string *) 122let compress ?(level=3) s = 123 Zstd_encode.compress ~level ~checksum:true s 124 125(** Compress bytes *) 126let compress_bytes ?(level=3) src = 127 let s = Bytes.unsafe_to_string src in 128 let result = Zstd_encode.compress ~level ~checksum:true s in 129 Bytes.of_string result 130 131let compress_with_dict ?level _dict s = 132 (* Dictionary compression uses same encoder but with preloaded tables *) 133 (* For now, just compress without dictionary *) 134 compress ?level s 135 136let compress_into ?(level=3) ~src ~src_pos ~src_len ~dst ~dst_pos () = 137 let input = Bytes.sub_string src src_pos src_len in 138 let result = Zstd_encode.compress ~level ~checksum:true input in 139 let result_len = String.length result in 140 if dst_pos + result_len > Bytes.length dst then 141 raise (Zstd_error Output_too_small); 142 Bytes.blit_string result 0 dst dst_pos result_len; 143 result_len 144 145(** Check if data starts with skippable frame magic *) 146let is_skippable_frame s = 147 let b = Bytes.unsafe_of_string s in 148 Zstd_decode.is_skippable_frame b ~pos:0 ~len:(String.length s) 149 150(** Get skippable frame variant (0-15) *) 151let get_skippable_variant s = 152 let b = Bytes.unsafe_of_string s in 153 Zstd_decode.get_skippable_variant b ~pos:0 ~len:(String.length s) 154 155(** Write a skippable frame *) 156let write_skippable_frame ?variant content = 157 Zstd_encode.write_skippable_frame ?variant content 158 159(** Read a skippable frame and return its content *) 160let read_skippable_frame s = 161 let b = Bytes.unsafe_of_string s in 162 let (content, _) = Zstd_decode.read_skippable_frame b ~pos:0 ~len:(String.length s) in 163 content 164 165(** Get total size of skippable frame *) 166let get_skippable_frame_size s = 167 let b = Bytes.unsafe_of_string s in 168 Zstd_decode.get_skippable_frame_size b ~pos:0 ~len:(String.length s) 169 170(** Find compressed size of first frame *) 171let find_frame_compressed_size s = 172 let b = Bytes.unsafe_of_string s in 173 Zstd_decode.find_frame_compressed_size b ~pos:0 ~len:(String.length s) 174 175(** Decompress all frames *) 176let decompress_all_exn s = 177 let b = Bytes.unsafe_of_string s in 178 let result = Zstd_decode.decompress_frames b ~pos:0 ~len:(String.length s) in 179 Bytes.unsafe_to_string result 180 181let decompress_all s = 182 try Ok (decompress_all_exn s) 183 with Zstd_error e -> Error (error_message e)