Bitlevel streams for OCaml
1(** Bitstream - Bit-level I/O for binary formats.
2
3 This library provides efficient bit-level reading and writing for parsing
4 and generating binary formats. It supports both forward (start-to-end) and
5 backward (end-to-start) operations, as required by various compression
6 algorithms.
7
8 {1 Overview}
9
10 {[
11 (* Forward reading from a slice (zero-copy) *)
12 let slice = { Bitstream.Slice.bytes = data; first = 0; length = n } in
13 let r = Bitstream.Forward_reader.of_slice slice in
14 let magic = Bitstream.Forward_reader.read_bits r 32 in
15 let flags = Bitstream.Forward_reader.read_bits r 8 in
16
17 (* Get remaining data as a slice (zero-copy) *)
18 let remaining = Bitstream.Forward_reader.to_slice r in
19
20 (* Backward reading - for FSE/ANS entropy decoding *)
21 let r = Bitstream.Backward_reader.of_slice slice in
22 let symbol = Bitstream.Backward_reader.read_bits r num_bits
23 ]}
24
25 {1 Bytesrw Compatibility}
26
27 The {!Slice} type is structurally compatible with [Bytesrw.Bytes.Slice.t],
28 enabling zero-copy integration with bytesrw-based streaming. All reader
29 and writer constructors accept slices as the primary input type.
30
31 {1 Error Handling}
32
33 Operations raise exceptions on error:
34 - {!End_of_stream}: Reading past end of stream
35 - {!Invalid_state}: Operation requires specific state (e.g., byte alignment)
36 - {!Corrupted_stream}: Malformed stream data *)
37
38(** {1 Slice Type}
39
40 A slice is a view into a byte buffer. This type is structurally compatible
41 with [Bytesrw.Bytes.Slice.t], enabling zero-copy interop. *)
42
43module Slice : sig
44 type t = {
45 bytes : bytes;
46 first : int;
47 length : int;
48 }
49 (** A slice referencing [length] bytes starting at [first] in [bytes].
50 This is structurally identical to [Bytesrw.Bytes.Slice.t]. *)
51
52 val make : bytes -> first:int -> length:int -> t
53 (** [make bytes ~first ~length] creates a slice. *)
54
55 val of_bytes : ?first:int -> ?length:int -> bytes -> t
56 (** [of_bytes bytes] creates a slice for the entire buffer.
57 Optional [first] and [length] can restrict the range. *)
58
59 val to_bytes : t -> bytes
60 (** [to_bytes t] copies the slice contents to a new buffer. *)
61
62 val is_empty : t -> bool
63 (** [is_empty t] returns true if the slice has zero length. *)
64
65 val sub : t -> first:int -> length:int -> t
66 (** [sub t ~first ~length] creates a sub-slice. [first] is relative to [t]. *)
67end
68
69(** {1 Exceptions} *)
70
71exception End_of_stream
72(** Raised when attempting to read past the end of the stream. *)
73
74exception Invalid_state of string
75(** Raised when an operation requires a specific state (e.g., byte alignment). *)
76
77exception Corrupted_stream of string
78(** Raised when stream data is malformed (e.g., invalid padding marker). *)
79
80(** {1 Forward Bitstream Reader} *)
81
82module Forward_reader : sig
83 (** Forward bitstream reader state. *)
84 type t
85
86 val of_slice : Slice.t -> t
87 (** [of_slice slice] creates a reader from a slice. Zero-copy. *)
88
89 val of_bytes : bytes -> t
90 (** [of_bytes src] creates a reader for the entire byte buffer. *)
91
92 val create : bytes -> pos:int -> len:int -> t
93 (** [create src ~pos ~len] creates a reader for [len] bytes starting at [pos]. *)
94
95 val remaining : t -> int
96 (** [remaining t] returns the number of unread bits. *)
97
98 val is_byte_aligned : t -> bool
99 (** [is_byte_aligned t] returns true if the reader is at a byte boundary. *)
100
101 val read_bits : t -> int -> int
102 (** [read_bits t n] reads and returns [n] bits (1-57) in little-endian order.
103 @raise End_of_stream if not enough data available.
104 @raise Invalid_argument if [n > 57]. *)
105
106 val read_byte : t -> int
107 (** [read_byte t] reads and returns the next byte (0-255).
108 @raise Invalid_state if not byte aligned.
109 @raise End_of_stream if at end of stream. *)
110
111 val rewind_bits : t -> int -> unit
112 (** [rewind_bits t n] rewinds the stream by [n] bits.
113 @raise End_of_stream if rewinding past the start. *)
114
115 val align : t -> unit
116 (** [align t] advances to the next byte boundary if not already aligned. *)
117
118 val byte_position : t -> int
119 (** [byte_position t] returns the current byte position.
120 @raise Invalid_state if not byte aligned. *)
121
122 val get_slice : t -> int -> Slice.t
123 (** [get_slice t n] returns the next [n] bytes as a slice (zero-copy).
124 The slice references the underlying buffer directly.
125 @raise Invalid_state if not byte aligned.
126 @raise End_of_stream if not enough data. *)
127
128 val get_bytes : t -> int -> bytes
129 (** [get_bytes t n] reads and returns the next [n] bytes as a new buffer.
130 Equivalent to [Slice.to_bytes (get_slice t n)].
131 @raise Invalid_state if not byte aligned.
132 @raise End_of_stream if not enough data. *)
133
134 val to_slice : t -> Slice.t
135 (** [to_slice t] returns the remaining data as a slice (zero-copy).
136 @raise Invalid_state if not byte aligned. *)
137
138 val advance : t -> int -> unit
139 (** [advance t n] skips [n] bytes without returning them.
140 @raise Invalid_state if not byte aligned.
141 @raise End_of_stream if not enough data. *)
142
143 val sub : t -> int -> t
144 (** [sub t n] creates a sub-reader for the next [n] bytes and advances [t].
145 @raise Invalid_state if not byte aligned.
146 @raise End_of_stream if not enough data. *)
147
148 val remaining_bytes : t -> int
149 (** [remaining_bytes t] returns the number of unread bytes.
150 @raise Invalid_state if not byte aligned. *)
151
152 val skip_bits : t -> int -> unit
153 (** [skip_bits t n] skips [n] bits without returning them.
154 @raise End_of_stream if not enough data. *)
155end
156
157(** {1 Backward Bitstream Reader}
158
159 Reads bits from the end of a buffer towards the start. The stream format
160 includes a padding marker: the highest 1-bit in the final byte indicates
161 where actual data begins.
162
163 This format is used by FSE and ANS entropy coders. *)
164
165module Backward_reader : sig
166 (** Backward bitstream reader state. *)
167 type t
168
169 val of_slice : Slice.t -> t
170 (** [of_slice slice] creates a backward reader from a slice. Zero-copy.
171 @raise End_of_stream if slice is empty.
172 @raise Corrupted_stream if padding marker is invalid. *)
173
174 val of_bytes : bytes -> pos:int -> len:int -> t
175 (** [of_bytes src ~pos ~len] creates a backward reader.
176 The stream is read from position [pos + len - 1] towards [pos].
177 @raise End_of_stream if [len = 0].
178 @raise Corrupted_stream if padding marker is invalid. *)
179
180 val remaining : t -> int
181 (** [remaining t] returns the number of bits remaining. *)
182
183 val is_empty : t -> bool
184 (** [is_empty t] returns true if no more bits are available. *)
185
186 val read_bits : t -> int -> int
187 (** [read_bits t n] reads and returns [n] bits (1-57).
188 Returns 0 bits when reading past the beginning.
189 @raise Invalid_argument if [n > 57]. *)
190
191 val peek_bits : t -> int -> int
192 (** [peek_bits t n] returns the next [n] bits without consuming them.
193 @raise Invalid_argument if [n > 57]. *)
194end
195
196(** {1 Forward Bitstream Writer} *)
197
198module Forward_writer : sig
199 (** Forward bitstream writer state. *)
200 type t
201
202 val of_slice : Slice.t -> t
203 (** [of_slice slice] creates a writer into a slice. Zero-copy. *)
204
205 val of_bytes : bytes -> t
206 (** [of_bytes dst] creates a writer starting at position 0. *)
207
208 val create : bytes -> pos:int -> t
209 (** [create dst ~pos] creates a writer starting at [pos] in buffer [dst]. *)
210
211 val write_bits : t -> int -> int -> unit
212 (** [write_bits t value n] writes the lower [n] bits (1-57) of [value]
213 in little-endian order.
214 @raise Invalid_argument if [n > 57]. *)
215
216 val write_byte : t -> int -> unit
217 (** [write_byte t value] writes a single byte. Flushes any partial bits first. *)
218
219 val write_slice : t -> Slice.t -> unit
220 (** [write_slice t slice] writes bytes from a slice. Flushes any partial bits first. *)
221
222 val write_bytes : t -> bytes -> unit
223 (** [write_bytes t src] writes all bytes from [src]. Flushes any partial bits first. *)
224
225 val byte_position : t -> int
226 (** [byte_position t] returns the current output position including any partial byte. *)
227
228 val flush : t -> unit
229 (** [flush t] writes any accumulated bits as a partial byte. *)
230
231 val finalize : t -> int
232 (** [finalize t] flushes and returns the total number of bytes written. *)
233
234 val to_slice : t -> Slice.t
235 (** [to_slice t] flushes and returns the written data as a slice (zero-copy).
236 The slice references the underlying destination buffer. *)
237end
238
239(** {1 Backward Bitstream Writer}
240
241 Accumulates bits to produce output that will be read backwards.
242 Used for FSE and Huffman encoding. *)
243
244module Backward_writer : sig
245 (** Backward bitstream writer state. *)
246 type t
247
248 val create : int -> t
249 (** [create size] creates a writer with an internal buffer of [size] bytes. *)
250
251 val write_bits : t -> int -> int -> unit
252 (** [write_bits t value n] accumulates [n] bits from [value]. *)
253
254 val flush_bytes : t -> unit
255 (** [flush_bytes t] flushes complete bytes to the internal buffer. *)
256
257 val finalize_to_slice : t -> Slice.t
258 (** [finalize_to_slice t] adds the padding marker, flushes, and returns output
259 as a slice (zero-copy). The slice references the internal buffer. *)
260
261 val finalize : t -> bytes
262 (** [finalize t] adds the padding marker, flushes, and returns the output.
263 Equivalent to [Slice.to_bytes (finalize_to_slice t)]. *)
264
265 val current_size : t -> int
266 (** [current_size t] returns the current output size estimate. *)
267end