Zstd compression in pure OCaml
1(** Pure OCaml implementation of Zstandard compression (RFC 8878).
2
3 {2 Decoder}
4
5 The decoder is fully compliant with the zstd format specification and can
6 decompress any valid zstd frame produced by any conforming encoder. It
7 supports all block types (raw, RLE, compressed), Huffman and FSE entropy
8 coding, and content checksums.
9
10 {2 Encoder}
11
12 The encoder produces valid zstd frames that can be decompressed by any
13 conforming decoder (including the reference C implementation). Current
14 encoding strategy:
15
16 - {b RLE blocks}: Data consisting of a single repeated byte is encoded as
17 RLE blocks (4 bytes total regardless of decompressed size)
18 - {b Raw blocks}: All other data is stored as raw (uncompressed) blocks
19
20 This means the encoder always produces valid output, but compression ratios
21 are not optimal for most data. The encoder is suitable for:
22 - Applications where decompression speed matters more than compressed size
23 - Data that is already compressed or has high entropy
24 - Testing zstd decoders
25
26 Future improvements planned:
27 - LZ77 match finding with sequence encoding
28 - Huffman compression for literals
29 - FSE-compressed blocks for better ratios
30
31 {2 Dictionary Support}
32
33 Dictionary decompression is supported. Dictionary compression is not yet
34 implemented (falls back to regular compression). *)
35
36type error = Constants.error =
37 | Invalid_magic_number
38 | Invalid_frame_header
39 | Invalid_block_type
40 | Invalid_block_size
41 | Invalid_literals_header
42 | Invalid_huffman_table
43 | Invalid_fse_table
44 | Invalid_sequence_header
45 | Invalid_offset
46 | Invalid_match_length
47 | Truncated_input
48 | Output_too_small
49 | Checksum_mismatch
50 | Dictionary_mismatch
51 | Corruption
52
53exception Zstd_error = Constants.Zstd_error
54
55type dictionary = Zstd_decode.dictionary
56
57let error_message = Constants.error_message
58
59(** Check if data starts with zstd magic number *)
60let is_zstd_frame s =
61 if String.length s < 4 then false
62 else
63 let b = Bytes.unsafe_of_string s in
64 let magic = Bytes.get_int32_le b 0 in
65 magic = Constants.zstd_magic_number
66
67(** Get decompressed size from frame header *)
68let get_decompressed_size s =
69 if String.length s < 5 then None
70 else
71 let b = Bytes.unsafe_of_string s in
72 Zstd_decode.get_decompressed_size b ~pos:0 ~len:(String.length s)
73
74(** Calculate maximum compressed size *)
75let compress_bound src_len =
76 (* zstd guarantees compressed size <= src_len + (src_len >> 8) + constant *)
77 src_len + (src_len lsr 8) + 64
78
79(** Load dictionary *)
80let load_dictionary s =
81 let b = Bytes.of_string s in
82 Zstd_decode.parse_dictionary b ~pos:0 ~len:(String.length s)
83
84(** Decompress bytes *)
85let decompress_bytes_exn src =
86 Zstd_decode.decompress_frame src ~pos:0 ~len:(Bytes.length src)
87
88let decompress_bytes src =
89 try Ok (decompress_bytes_exn src)
90 with Zstd_error e -> Error (error_message e)
91
92(** Decompress string *)
93let decompress_exn s =
94 let src = Bytes.unsafe_of_string s in
95 let result = Zstd_decode.decompress_frame src ~pos:0 ~len:(String.length s) in
96 Bytes.unsafe_to_string result
97
98let decompress s =
99 try Ok (decompress_exn s)
100 with Zstd_error e -> Error (error_message e)
101
102(** Decompress with dictionary *)
103let decompress_with_dict_exn dict s =
104 let src = Bytes.unsafe_of_string s in
105 let result = Zstd_decode.decompress_frame ~dict src ~pos:0 ~len:(String.length s) in
106 Bytes.unsafe_to_string result
107
108let decompress_with_dict dict s =
109 try Ok (decompress_with_dict_exn dict s)
110 with Zstd_error e -> Error (error_message e)
111
112(** Decompress into pre-allocated buffer *)
113let decompress_into ~src ~src_pos ~src_len ~dst ~dst_pos =
114 let result = Zstd_decode.decompress_frame src ~pos:src_pos ~len:src_len in
115 let result_len = Bytes.length result in
116 if dst_pos + result_len > Bytes.length dst then
117 raise (Zstd_error Output_too_small);
118 Bytes.blit result 0 dst dst_pos result_len;
119 result_len
120
121(** Compress string *)
122let compress ?(level=3) s =
123 Zstd_encode.compress ~level ~checksum:true s
124
125(** Compress bytes *)
126let compress_bytes ?(level=3) src =
127 let s = Bytes.unsafe_to_string src in
128 let result = Zstd_encode.compress ~level ~checksum:true s in
129 Bytes.of_string result
130
131let compress_with_dict ?level _dict s =
132 (* Dictionary compression uses same encoder but with preloaded tables *)
133 (* For now, just compress without dictionary *)
134 compress ?level s
135
136let compress_into ?(level=3) ~src ~src_pos ~src_len ~dst ~dst_pos () =
137 let input = Bytes.sub_string src src_pos src_len in
138 let result = Zstd_encode.compress ~level ~checksum:true input in
139 let result_len = String.length result in
140 if dst_pos + result_len > Bytes.length dst then
141 raise (Zstd_error Output_too_small);
142 Bytes.blit_string result 0 dst dst_pos result_len;
143 result_len
144
145(** Check if data starts with skippable frame magic *)
146let is_skippable_frame s =
147 let b = Bytes.unsafe_of_string s in
148 Zstd_decode.is_skippable_frame b ~pos:0 ~len:(String.length s)
149
150(** Get skippable frame variant (0-15) *)
151let get_skippable_variant s =
152 let b = Bytes.unsafe_of_string s in
153 Zstd_decode.get_skippable_variant b ~pos:0 ~len:(String.length s)
154
155(** Write a skippable frame *)
156let write_skippable_frame ?variant content =
157 Zstd_encode.write_skippable_frame ?variant content
158
159(** Read a skippable frame and return its content *)
160let read_skippable_frame s =
161 let b = Bytes.unsafe_of_string s in
162 let (content, _) = Zstd_decode.read_skippable_frame b ~pos:0 ~len:(String.length s) in
163 content
164
165(** Get total size of skippable frame *)
166let get_skippable_frame_size s =
167 let b = Bytes.unsafe_of_string s in
168 Zstd_decode.get_skippable_frame_size b ~pos:0 ~len:(String.length s)
169
170(** Find compressed size of first frame *)
171let find_frame_compressed_size s =
172 let b = Bytes.unsafe_of_string s in
173 Zstd_decode.find_frame_compressed_size b ~pos:0 ~len:(String.length s)
174
175(** Decompress all frames *)
176let decompress_all_exn s =
177 let b = Bytes.unsafe_of_string s in
178 let result = Zstd_decode.decompress_frames b ~pos:0 ~len:(String.length s) in
179 Bytes.unsafe_to_string result
180
181let decompress_all s =
182 try Ok (decompress_all_exn s)
183 with Zstd_error e -> Error (error_message e)