Bitlevel streams for OCaml
at main 267 lines 9.5 kB view raw
1(** Bitstream - Bit-level I/O for binary formats. 2 3 This library provides efficient bit-level reading and writing for parsing 4 and generating binary formats. It supports both forward (start-to-end) and 5 backward (end-to-start) operations, as required by various compression 6 algorithms. 7 8 {1 Overview} 9 10 {[ 11 (* Forward reading from a slice (zero-copy) *) 12 let slice = { Bitstream.Slice.bytes = data; first = 0; length = n } in 13 let r = Bitstream.Forward_reader.of_slice slice in 14 let magic = Bitstream.Forward_reader.read_bits r 32 in 15 let flags = Bitstream.Forward_reader.read_bits r 8 in 16 17 (* Get remaining data as a slice (zero-copy) *) 18 let remaining = Bitstream.Forward_reader.to_slice r in 19 20 (* Backward reading - for FSE/ANS entropy decoding *) 21 let r = Bitstream.Backward_reader.of_slice slice in 22 let symbol = Bitstream.Backward_reader.read_bits r num_bits 23 ]} 24 25 {1 Bytesrw Compatibility} 26 27 The {!Slice} type is structurally compatible with [Bytesrw.Bytes.Slice.t], 28 enabling zero-copy integration with bytesrw-based streaming. All reader 29 and writer constructors accept slices as the primary input type. 30 31 {1 Error Handling} 32 33 Operations raise exceptions on error: 34 - {!End_of_stream}: Reading past end of stream 35 - {!Invalid_state}: Operation requires specific state (e.g., byte alignment) 36 - {!Corrupted_stream}: Malformed stream data *) 37 38(** {1 Slice Type} 39 40 A slice is a view into a byte buffer. This type is structurally compatible 41 with [Bytesrw.Bytes.Slice.t], enabling zero-copy interop. *) 42 43module Slice : sig 44 type t = { 45 bytes : bytes; 46 first : int; 47 length : int; 48 } 49 (** A slice referencing [length] bytes starting at [first] in [bytes]. 50 This is structurally identical to [Bytesrw.Bytes.Slice.t]. *) 51 52 val make : bytes -> first:int -> length:int -> t 53 (** [make bytes ~first ~length] creates a slice. *) 54 55 val of_bytes : ?first:int -> ?length:int -> bytes -> t 56 (** [of_bytes bytes] creates a slice for the entire buffer. 57 Optional [first] and [length] can restrict the range. *) 58 59 val to_bytes : t -> bytes 60 (** [to_bytes t] copies the slice contents to a new buffer. *) 61 62 val is_empty : t -> bool 63 (** [is_empty t] returns true if the slice has zero length. *) 64 65 val sub : t -> first:int -> length:int -> t 66 (** [sub t ~first ~length] creates a sub-slice. [first] is relative to [t]. *) 67end 68 69(** {1 Exceptions} *) 70 71exception End_of_stream 72(** Raised when attempting to read past the end of the stream. *) 73 74exception Invalid_state of string 75(** Raised when an operation requires a specific state (e.g., byte alignment). *) 76 77exception Corrupted_stream of string 78(** Raised when stream data is malformed (e.g., invalid padding marker). *) 79 80(** {1 Forward Bitstream Reader} *) 81 82module Forward_reader : sig 83 (** Forward bitstream reader state. *) 84 type t 85 86 val of_slice : Slice.t -> t 87 (** [of_slice slice] creates a reader from a slice. Zero-copy. *) 88 89 val of_bytes : bytes -> t 90 (** [of_bytes src] creates a reader for the entire byte buffer. *) 91 92 val create : bytes -> pos:int -> len:int -> t 93 (** [create src ~pos ~len] creates a reader for [len] bytes starting at [pos]. *) 94 95 val remaining : t -> int 96 (** [remaining t] returns the number of unread bits. *) 97 98 val is_byte_aligned : t -> bool 99 (** [is_byte_aligned t] returns true if the reader is at a byte boundary. *) 100 101 val read_bits : t -> int -> int 102 (** [read_bits t n] reads and returns [n] bits (1-57) in little-endian order. 103 @raise End_of_stream if not enough data available. 104 @raise Invalid_argument if [n > 57]. *) 105 106 val read_byte : t -> int 107 (** [read_byte t] reads and returns the next byte (0-255). 108 @raise Invalid_state if not byte aligned. 109 @raise End_of_stream if at end of stream. *) 110 111 val rewind_bits : t -> int -> unit 112 (** [rewind_bits t n] rewinds the stream by [n] bits. 113 @raise End_of_stream if rewinding past the start. *) 114 115 val align : t -> unit 116 (** [align t] advances to the next byte boundary if not already aligned. *) 117 118 val byte_position : t -> int 119 (** [byte_position t] returns the current byte position. 120 @raise Invalid_state if not byte aligned. *) 121 122 val get_slice : t -> int -> Slice.t 123 (** [get_slice t n] returns the next [n] bytes as a slice (zero-copy). 124 The slice references the underlying buffer directly. 125 @raise Invalid_state if not byte aligned. 126 @raise End_of_stream if not enough data. *) 127 128 val get_bytes : t -> int -> bytes 129 (** [get_bytes t n] reads and returns the next [n] bytes as a new buffer. 130 Equivalent to [Slice.to_bytes (get_slice t n)]. 131 @raise Invalid_state if not byte aligned. 132 @raise End_of_stream if not enough data. *) 133 134 val to_slice : t -> Slice.t 135 (** [to_slice t] returns the remaining data as a slice (zero-copy). 136 @raise Invalid_state if not byte aligned. *) 137 138 val advance : t -> int -> unit 139 (** [advance t n] skips [n] bytes without returning them. 140 @raise Invalid_state if not byte aligned. 141 @raise End_of_stream if not enough data. *) 142 143 val sub : t -> int -> t 144 (** [sub t n] creates a sub-reader for the next [n] bytes and advances [t]. 145 @raise Invalid_state if not byte aligned. 146 @raise End_of_stream if not enough data. *) 147 148 val remaining_bytes : t -> int 149 (** [remaining_bytes t] returns the number of unread bytes. 150 @raise Invalid_state if not byte aligned. *) 151 152 val skip_bits : t -> int -> unit 153 (** [skip_bits t n] skips [n] bits without returning them. 154 @raise End_of_stream if not enough data. *) 155end 156 157(** {1 Backward Bitstream Reader} 158 159 Reads bits from the end of a buffer towards the start. The stream format 160 includes a padding marker: the highest 1-bit in the final byte indicates 161 where actual data begins. 162 163 This format is used by FSE and ANS entropy coders. *) 164 165module Backward_reader : sig 166 (** Backward bitstream reader state. *) 167 type t 168 169 val of_slice : Slice.t -> t 170 (** [of_slice slice] creates a backward reader from a slice. Zero-copy. 171 @raise End_of_stream if slice is empty. 172 @raise Corrupted_stream if padding marker is invalid. *) 173 174 val of_bytes : bytes -> pos:int -> len:int -> t 175 (** [of_bytes src ~pos ~len] creates a backward reader. 176 The stream is read from position [pos + len - 1] towards [pos]. 177 @raise End_of_stream if [len = 0]. 178 @raise Corrupted_stream if padding marker is invalid. *) 179 180 val remaining : t -> int 181 (** [remaining t] returns the number of bits remaining. *) 182 183 val is_empty : t -> bool 184 (** [is_empty t] returns true if no more bits are available. *) 185 186 val read_bits : t -> int -> int 187 (** [read_bits t n] reads and returns [n] bits (1-57). 188 Returns 0 bits when reading past the beginning. 189 @raise Invalid_argument if [n > 57]. *) 190 191 val peek_bits : t -> int -> int 192 (** [peek_bits t n] returns the next [n] bits without consuming them. 193 @raise Invalid_argument if [n > 57]. *) 194end 195 196(** {1 Forward Bitstream Writer} *) 197 198module Forward_writer : sig 199 (** Forward bitstream writer state. *) 200 type t 201 202 val of_slice : Slice.t -> t 203 (** [of_slice slice] creates a writer into a slice. Zero-copy. *) 204 205 val of_bytes : bytes -> t 206 (** [of_bytes dst] creates a writer starting at position 0. *) 207 208 val create : bytes -> pos:int -> t 209 (** [create dst ~pos] creates a writer starting at [pos] in buffer [dst]. *) 210 211 val write_bits : t -> int -> int -> unit 212 (** [write_bits t value n] writes the lower [n] bits (1-57) of [value] 213 in little-endian order. 214 @raise Invalid_argument if [n > 57]. *) 215 216 val write_byte : t -> int -> unit 217 (** [write_byte t value] writes a single byte. Flushes any partial bits first. *) 218 219 val write_slice : t -> Slice.t -> unit 220 (** [write_slice t slice] writes bytes from a slice. Flushes any partial bits first. *) 221 222 val write_bytes : t -> bytes -> unit 223 (** [write_bytes t src] writes all bytes from [src]. Flushes any partial bits first. *) 224 225 val byte_position : t -> int 226 (** [byte_position t] returns the current output position including any partial byte. *) 227 228 val flush : t -> unit 229 (** [flush t] writes any accumulated bits as a partial byte. *) 230 231 val finalize : t -> int 232 (** [finalize t] flushes and returns the total number of bytes written. *) 233 234 val to_slice : t -> Slice.t 235 (** [to_slice t] flushes and returns the written data as a slice (zero-copy). 236 The slice references the underlying destination buffer. *) 237end 238 239(** {1 Backward Bitstream Writer} 240 241 Accumulates bits to produce output that will be read backwards. 242 Used for FSE and Huffman encoding. *) 243 244module Backward_writer : sig 245 (** Backward bitstream writer state. *) 246 type t 247 248 val create : int -> t 249 (** [create size] creates a writer with an internal buffer of [size] bytes. *) 250 251 val write_bits : t -> int -> int -> unit 252 (** [write_bits t value n] accumulates [n] bits from [value]. *) 253 254 val flush_bytes : t -> unit 255 (** [flush_bytes t] flushes complete bytes to the internal buffer. *) 256 257 val finalize_to_slice : t -> Slice.t 258 (** [finalize_to_slice t] adds the padding marker, flushes, and returns output 259 as a slice (zero-copy). The slice references the internal buffer. *) 260 261 val finalize : t -> bytes 262 (** [finalize t] adds the padding marker, flushes, and returns the output. 263 Equivalent to [Slice.to_bytes (finalize_to_slice t)]. *) 264 265 val current_size : t -> int 266 (** [current_size t] returns the current output size estimate. *) 267end