Bitlevel streams for OCaml

Initial commit of ocaml-bitstream

+936
+1
.gitignore
··· 1 + _build
+34
bitstream.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "Bit-level I/O for binary format parsing and generation" 4 + description: """ 5 + Forward and backward bitstream reading/writing for binary formats. 6 + Supports bit-level operations required by compression algorithms like 7 + FSE, ANS, and Huffman coding.""" 8 + maintainer: ["Anil Madhavapeddy <anil@recoil.org>"] 9 + authors: ["Anil Madhavapeddy <anil@recoil.org>"] 10 + license: "ISC" 11 + homepage: "https://tangled.org/anil.recoil.org/ocaml-bitstream" 12 + bug-reports: "https://tangled.org/anil.recoil.org/ocaml-bitstream/issues" 13 + depends: [ 14 + "dune" {>= "3.21"} 15 + "ocaml" {>= "5.2.0"} 16 + "alcotest" {with-test & >= "1.7.0"} 17 + "odoc" {with-doc} 18 + ] 19 + build: [ 20 + ["dune" "subst"] {dev} 21 + [ 22 + "dune" 23 + "build" 24 + "-p" 25 + name 26 + "-j" 27 + jobs 28 + "@install" 29 + "@runtest" {with-test} 30 + "@doc" {with-doc} 31 + ] 32 + ] 33 + dev-repo: "git+https://tangled.org/anil.recoil.org/ocaml-bitstream" 34 + x-maintenance-intent: ["(latest)"]
+1
dune
··· 1 + (vendored_dirs vendor)
+20
dune-project
··· 1 + (lang dune 3.21) 2 + (name bitstream) 3 + 4 + (generate_opam_files true) 5 + 6 + (license ISC) 7 + (authors "Anil Madhavapeddy <anil@recoil.org>") 8 + (maintainers "Anil Madhavapeddy <anil@recoil.org>") 9 + (source (tangled anil.recoil.org/ocaml-bitstream)) 10 + 11 + (package 12 + (name bitstream) 13 + (synopsis "Bit-level I/O for binary format parsing and generation") 14 + (description 15 + "Forward and backward bitstream reading/writing for binary formats. 16 + Supports bit-level operations required by compression algorithms like 17 + FSE, ANS, and Huffman coding.") 18 + (depends 19 + (ocaml (>= 5.2.0)) 20 + (alcotest (and :with-test (>= 1.7.0)))))
+385
src/bitstream.ml
··· 1 + (** Bitstream - Bit-level I/O for binary formats. 2 + 3 + Provides forward and backward bitstream reading and writing for parsing 4 + and generating binary formats that operate at the bit level. 5 + 6 + Forward streams read/write from the start of a buffer towards the end. 7 + Backward streams read/write from the end of a buffer towards the start, 8 + which is required by some compression algorithms (FSE, ANS). *) 9 + 10 + (** {1 Slice Type} *) 11 + 12 + module Slice = struct 13 + type t = { 14 + bytes : bytes; 15 + first : int; 16 + length : int; 17 + } 18 + 19 + let make bytes ~first ~length = 20 + { bytes; first; length } 21 + 22 + let of_bytes ?first ?length bytes = 23 + let first = Option.value first ~default:0 in 24 + let length = Option.value length ~default:(Bytes.length bytes - first) in 25 + { bytes; first; length } 26 + 27 + let to_bytes t = 28 + Bytes.sub t.bytes t.first t.length 29 + 30 + let is_empty t = 31 + t.length = 0 32 + 33 + let sub t ~first ~length = 34 + { bytes = t.bytes; first = t.first + first; length } 35 + end 36 + 37 + (** {1 Exceptions} *) 38 + 39 + exception End_of_stream 40 + (** Raised when attempting to read past the end of the stream. *) 41 + 42 + exception Invalid_state of string 43 + (** Raised when an operation requires a specific state (e.g., byte alignment). *) 44 + 45 + exception Corrupted_stream of string 46 + (** Raised when stream data is malformed (e.g., invalid padding marker). *) 47 + 48 + (** {1 Forward Bitstream Reader} *) 49 + 50 + module Forward_reader = struct 51 + type t = { 52 + src : bytes; 53 + start_pos : int; 54 + limit : int; 55 + mutable byte_pos : int; 56 + mutable bit_pos : int; (* 0-7, bits consumed in current byte *) 57 + } 58 + 59 + let of_slice (slice : Slice.t) = 60 + { src = slice.bytes; 61 + start_pos = slice.first; 62 + limit = slice.first + slice.length; 63 + byte_pos = slice.first; 64 + bit_pos = 0 } 65 + 66 + let of_bytes src = 67 + of_slice (Slice.of_bytes src) 68 + 69 + let create src ~pos ~len = 70 + of_slice (Slice.make src ~first:pos ~length:len) 71 + 72 + let[@inline] remaining t = 73 + (t.limit - t.byte_pos) * 8 - t.bit_pos 74 + 75 + let[@inline] is_byte_aligned t = 76 + t.bit_pos = 0 77 + 78 + let[@inline] read_bits t n = 79 + if n <= 0 then 0 80 + else if n > 57 then invalid_arg "read_bits: n > 57" 81 + else begin 82 + let result = ref 0 in 83 + let bits_read = ref 0 in 84 + while !bits_read < n do 85 + if t.byte_pos >= t.limit then 86 + raise End_of_stream; 87 + let byte = Bytes.get_uint8 t.src t.byte_pos in 88 + let available = 8 - t.bit_pos in 89 + let to_read = min available (n - !bits_read) in 90 + let mask = (1 lsl to_read) - 1 in 91 + let bits = (byte lsr t.bit_pos) land mask in 92 + result := !result lor (bits lsl !bits_read); 93 + bits_read := !bits_read + to_read; 94 + t.bit_pos <- t.bit_pos + to_read; 95 + if t.bit_pos >= 8 then begin 96 + t.bit_pos <- 0; 97 + t.byte_pos <- t.byte_pos + 1 98 + end 99 + done; 100 + !result 101 + end 102 + 103 + let[@inline] read_byte t = 104 + if t.bit_pos <> 0 then 105 + raise (Invalid_state "read_byte: not byte aligned"); 106 + if t.byte_pos >= t.limit then 107 + raise End_of_stream; 108 + let b = Bytes.get_uint8 t.src t.byte_pos in 109 + t.byte_pos <- t.byte_pos + 1; 110 + b 111 + 112 + let rewind_bits t n = 113 + let total_bits = (t.byte_pos - t.start_pos) * 8 + t.bit_pos in 114 + let new_total = total_bits - n in 115 + if new_total < 0 then 116 + raise End_of_stream; 117 + t.byte_pos <- t.start_pos + new_total / 8; 118 + t.bit_pos <- new_total mod 8 119 + 120 + let align t = 121 + if t.bit_pos <> 0 then begin 122 + t.bit_pos <- 0; 123 + t.byte_pos <- t.byte_pos + 1 124 + end 125 + 126 + let byte_position t = 127 + if t.bit_pos <> 0 then 128 + raise (Invalid_state "byte_position: not byte aligned"); 129 + t.byte_pos 130 + 131 + let get_slice t n : Slice.t = 132 + if t.bit_pos <> 0 then 133 + raise (Invalid_state "get_slice: not byte aligned"); 134 + if t.byte_pos + n > t.limit then 135 + raise End_of_stream; 136 + let result = Slice.make t.src ~first:t.byte_pos ~length:n in 137 + t.byte_pos <- t.byte_pos + n; 138 + result 139 + 140 + let get_bytes t n = 141 + Slice.to_bytes (get_slice t n) 142 + 143 + let to_slice t : Slice.t = 144 + if t.bit_pos <> 0 then 145 + raise (Invalid_state "to_slice: not byte aligned"); 146 + Slice.make t.src ~first:t.byte_pos ~length:(t.limit - t.byte_pos) 147 + 148 + let advance t n = 149 + if t.bit_pos <> 0 then 150 + raise (Invalid_state "advance: not byte aligned"); 151 + if t.byte_pos + n > t.limit then 152 + raise End_of_stream; 153 + t.byte_pos <- t.byte_pos + n 154 + 155 + let sub t n = 156 + if t.bit_pos <> 0 then 157 + raise (Invalid_state "sub: not byte aligned"); 158 + if t.byte_pos + n > t.limit then 159 + raise End_of_stream; 160 + let result = of_slice (Slice.make t.src ~first:t.byte_pos ~length:n) in 161 + t.byte_pos <- t.byte_pos + n; 162 + result 163 + 164 + let remaining_bytes t = 165 + if t.bit_pos <> 0 then 166 + raise (Invalid_state "remaining_bytes: not byte aligned"); 167 + t.limit - t.byte_pos 168 + 169 + let skip_bits t n = 170 + ignore (read_bits t n) 171 + end 172 + 173 + (** {1 Backward Bitstream Reader} 174 + 175 + Reads bits from the end of a buffer towards the start. The stream 176 + starts with a padding marker (highest 1-bit indicates start of data). *) 177 + 178 + module Backward_reader = struct 179 + type t = { 180 + src : bytes; 181 + start_pos : int; 182 + mutable bit_offset : int; (* Bits remaining from end, decreasing *) 183 + } 184 + 185 + let of_slice (slice : Slice.t) = 186 + if slice.length = 0 then 187 + raise End_of_stream; 188 + let last_byte_pos = slice.first + slice.length - 1 in 189 + let last_byte = Bytes.get_uint8 slice.bytes last_byte_pos in 190 + if last_byte = 0 then 191 + raise (Corrupted_stream "invalid padding marker"); 192 + (* Find the highest set bit - this is the padding marker *) 193 + let rec find_marker byte bit = 194 + if bit < 0 then 0 195 + else if (byte land (1 lsl bit)) <> 0 then bit 196 + else find_marker byte (bit - 1) 197 + in 198 + let padding = 8 - find_marker last_byte 7 in 199 + let bit_offset = slice.length * 8 - padding in 200 + { src = slice.bytes; start_pos = slice.first; bit_offset } 201 + 202 + let of_bytes src ~pos ~len = 203 + of_slice (Slice.make src ~first:pos ~length:len) 204 + 205 + let[@inline] remaining t = t.bit_offset 206 + 207 + let[@inline] is_empty t = t.bit_offset <= 0 208 + 209 + let[@inline] read_bits t n = 210 + if n <= 0 then 0 211 + else if n > 57 then invalid_arg "read_bits: n > 57" 212 + else begin 213 + t.bit_offset <- t.bit_offset - n; 214 + let actual_offset = max 0 t.bit_offset in 215 + let actual_bits = if t.bit_offset < 0 then n + t.bit_offset else n in 216 + if actual_bits <= 0 then 0 217 + else begin 218 + let byte_offset = t.start_pos + (actual_offset / 8) in 219 + let bit_offset = actual_offset mod 8 in 220 + let result = ref 0 in 221 + let bits_read = ref 0 in 222 + let current_byte = ref byte_offset in 223 + let current_bit = ref bit_offset in 224 + while !bits_read < actual_bits do 225 + let byte = Bytes.get_uint8 t.src !current_byte in 226 + let available = 8 - !current_bit in 227 + let to_read = min available (actual_bits - !bits_read) in 228 + let mask = (1 lsl to_read) - 1 in 229 + let bits = (byte lsr !current_bit) land mask in 230 + result := !result lor (bits lsl !bits_read); 231 + bits_read := !bits_read + to_read; 232 + current_bit := !current_bit + to_read; 233 + if !current_bit >= 8 then begin 234 + current_bit := 0; 235 + incr current_byte 236 + end 237 + done; 238 + (* If we read past the beginning, shift the result *) 239 + if t.bit_offset < 0 then 240 + !result lsl (-t.bit_offset) 241 + else 242 + !result 243 + end 244 + end 245 + 246 + let peek_bits t n = 247 + let saved_offset = t.bit_offset in 248 + let result = read_bits t n in 249 + t.bit_offset <- saved_offset; 250 + result 251 + end 252 + 253 + (** {1 Forward Bitstream Writer} *) 254 + 255 + module Forward_writer = struct 256 + type t = { 257 + dst : bytes; 258 + start_pos : int; 259 + mutable byte_pos : int; 260 + mutable bit_pos : int; (* 0-7, bits written in current byte *) 261 + mutable current_byte : int; 262 + } 263 + 264 + let of_slice (slice : Slice.t) = 265 + { dst = slice.bytes; 266 + start_pos = slice.first; 267 + byte_pos = slice.first; 268 + bit_pos = 0; 269 + current_byte = 0 } 270 + 271 + let of_bytes dst = 272 + of_slice (Slice.of_bytes dst) 273 + 274 + let create dst ~pos = 275 + of_slice (Slice.make dst ~first:pos ~length:(Bytes.length dst - pos)) 276 + 277 + let flush t = 278 + if t.bit_pos > 0 then begin 279 + Bytes.set_uint8 t.dst t.byte_pos t.current_byte; 280 + t.byte_pos <- t.byte_pos + 1; 281 + t.bit_pos <- 0; 282 + t.current_byte <- 0 283 + end 284 + 285 + let[@inline] write_bits t value n = 286 + if n <= 0 then () 287 + else if n > 57 then invalid_arg "write_bits: n > 57" 288 + else begin 289 + let value = ref value in 290 + let remaining = ref n in 291 + 292 + while !remaining > 0 do 293 + let available = 8 - t.bit_pos in 294 + let to_write = min available !remaining in 295 + let mask = (1 lsl to_write) - 1 in 296 + t.current_byte <- t.current_byte lor ((!value land mask) lsl t.bit_pos); 297 + value := !value lsr to_write; 298 + remaining := !remaining - to_write; 299 + t.bit_pos <- t.bit_pos + to_write; 300 + 301 + if t.bit_pos = 8 then begin 302 + Bytes.set_uint8 t.dst t.byte_pos t.current_byte; 303 + t.byte_pos <- t.byte_pos + 1; 304 + t.bit_pos <- 0; 305 + t.current_byte <- 0 306 + end 307 + done 308 + end 309 + 310 + let write_byte t value = 311 + if t.bit_pos <> 0 then flush t; 312 + Bytes.set_uint8 t.dst t.byte_pos value; 313 + t.byte_pos <- t.byte_pos + 1 314 + 315 + let write_slice t (slice : Slice.t) = 316 + if t.bit_pos <> 0 then flush t; 317 + Bytes.blit slice.bytes slice.first t.dst t.byte_pos slice.length; 318 + t.byte_pos <- t.byte_pos + slice.length 319 + 320 + let write_bytes t src = 321 + write_slice t (Slice.of_bytes src) 322 + 323 + let byte_position t = 324 + if t.bit_pos > 0 then t.byte_pos + 1 else t.byte_pos 325 + 326 + let finalize t = 327 + flush t; 328 + t.byte_pos - t.start_pos 329 + 330 + let to_slice t : Slice.t = 331 + flush t; 332 + Slice.make t.dst ~first:t.start_pos ~length:(t.byte_pos - t.start_pos) 333 + end 334 + 335 + (** {1 Backward Bitstream Writer} 336 + 337 + Accumulates bits to be read backwards. Used for FSE and Huffman encoding. *) 338 + 339 + module Backward_writer = struct 340 + type t = { 341 + mutable bits : int64; 342 + mutable num_bits : int; 343 + buffer : bytes; 344 + mutable buf_pos : int; 345 + } 346 + 347 + let create size = 348 + { bits = 0L; num_bits = 0; buffer = Bytes.create size; buf_pos = size } 349 + 350 + let[@inline] write_bits t value n = 351 + if n > 0 then begin 352 + t.bits <- Int64.logor t.bits (Int64.shift_left (Int64.of_int value) t.num_bits); 353 + t.num_bits <- t.num_bits + n 354 + end 355 + 356 + let flush_bytes t = 357 + while t.num_bits >= 8 do 358 + t.buf_pos <- t.buf_pos - 1; 359 + Bytes.set_uint8 t.buffer t.buf_pos (Int64.to_int (Int64.logand t.bits 0xFFL)); 360 + t.bits <- Int64.shift_right_logical t.bits 8; 361 + t.num_bits <- t.num_bits - 8 362 + done 363 + 364 + let finalize_to_slice t : Slice.t = 365 + write_bits t 1 1; 366 + if t.num_bits mod 8 <> 0 then 367 + t.num_bits <- ((t.num_bits + 7) / 8) * 8; 368 + flush_bytes t; 369 + let len = Bytes.length t.buffer - t.buf_pos in 370 + (* Reverse bytes in place so marker ends up at the end *) 371 + for i = 0 to len / 2 - 1 do 372 + let j = t.buf_pos + i in 373 + let k = t.buf_pos + len - 1 - i in 374 + let tmp = Bytes.get t.buffer j in 375 + Bytes.set t.buffer j (Bytes.get t.buffer k); 376 + Bytes.set t.buffer k tmp 377 + done; 378 + Slice.make t.buffer ~first:t.buf_pos ~length:len 379 + 380 + let finalize t = 381 + Slice.to_bytes (finalize_to_slice t) 382 + 383 + let current_size t = 384 + Bytes.length t.buffer - t.buf_pos + (t.num_bits + 7) / 8 385 + end
+267
src/bitstream.mli
··· 1 + (** Bitstream - Bit-level I/O for binary formats. 2 + 3 + This library provides efficient bit-level reading and writing for parsing 4 + and generating binary formats. It supports both forward (start-to-end) and 5 + backward (end-to-start) operations, as required by various compression 6 + algorithms. 7 + 8 + {1 Overview} 9 + 10 + {[ 11 + (* Forward reading from a slice (zero-copy) *) 12 + let slice = { Bitstream.Slice.bytes = data; first = 0; length = n } in 13 + let r = Bitstream.Forward_reader.of_slice slice in 14 + let magic = Bitstream.Forward_reader.read_bits r 32 in 15 + let flags = Bitstream.Forward_reader.read_bits r 8 in 16 + 17 + (* Get remaining data as a slice (zero-copy) *) 18 + let remaining = Bitstream.Forward_reader.to_slice r in 19 + 20 + (* Backward reading - for FSE/ANS entropy decoding *) 21 + let r = Bitstream.Backward_reader.of_slice slice in 22 + let symbol = Bitstream.Backward_reader.read_bits r num_bits 23 + ]} 24 + 25 + {1 Bytesrw Compatibility} 26 + 27 + The {!Slice} type is structurally compatible with [Bytesrw.Bytes.Slice.t], 28 + enabling zero-copy integration with bytesrw-based streaming. All reader 29 + and writer constructors accept slices as the primary input type. 30 + 31 + {1 Error Handling} 32 + 33 + Operations raise exceptions on error: 34 + - {!End_of_stream}: Reading past end of stream 35 + - {!Invalid_state}: Operation requires specific state (e.g., byte alignment) 36 + - {!Corrupted_stream}: Malformed stream data *) 37 + 38 + (** {1 Slice Type} 39 + 40 + A slice is a view into a byte buffer. This type is structurally compatible 41 + with [Bytesrw.Bytes.Slice.t], enabling zero-copy interop. *) 42 + 43 + module Slice : sig 44 + type t = { 45 + bytes : bytes; 46 + first : int; 47 + length : int; 48 + } 49 + (** A slice referencing [length] bytes starting at [first] in [bytes]. 50 + This is structurally identical to [Bytesrw.Bytes.Slice.t]. *) 51 + 52 + val make : bytes -> first:int -> length:int -> t 53 + (** [make bytes ~first ~length] creates a slice. *) 54 + 55 + val of_bytes : ?first:int -> ?length:int -> bytes -> t 56 + (** [of_bytes bytes] creates a slice for the entire buffer. 57 + Optional [first] and [length] can restrict the range. *) 58 + 59 + val to_bytes : t -> bytes 60 + (** [to_bytes t] copies the slice contents to a new buffer. *) 61 + 62 + val is_empty : t -> bool 63 + (** [is_empty t] returns true if the slice has zero length. *) 64 + 65 + val sub : t -> first:int -> length:int -> t 66 + (** [sub t ~first ~length] creates a sub-slice. [first] is relative to [t]. *) 67 + end 68 + 69 + (** {1 Exceptions} *) 70 + 71 + exception End_of_stream 72 + (** Raised when attempting to read past the end of the stream. *) 73 + 74 + exception Invalid_state of string 75 + (** Raised when an operation requires a specific state (e.g., byte alignment). *) 76 + 77 + exception Corrupted_stream of string 78 + (** Raised when stream data is malformed (e.g., invalid padding marker). *) 79 + 80 + (** {1 Forward Bitstream Reader} *) 81 + 82 + module Forward_reader : sig 83 + (** Forward bitstream reader state. *) 84 + type t 85 + 86 + val of_slice : Slice.t -> t 87 + (** [of_slice slice] creates a reader from a slice. Zero-copy. *) 88 + 89 + val of_bytes : bytes -> t 90 + (** [of_bytes src] creates a reader for the entire byte buffer. *) 91 + 92 + val create : bytes -> pos:int -> len:int -> t 93 + (** [create src ~pos ~len] creates a reader for [len] bytes starting at [pos]. *) 94 + 95 + val remaining : t -> int 96 + (** [remaining t] returns the number of unread bits. *) 97 + 98 + val is_byte_aligned : t -> bool 99 + (** [is_byte_aligned t] returns true if the reader is at a byte boundary. *) 100 + 101 + val read_bits : t -> int -> int 102 + (** [read_bits t n] reads and returns [n] bits (1-57) in little-endian order. 103 + @raise End_of_stream if not enough data available. 104 + @raise Invalid_argument if [n > 57]. *) 105 + 106 + val read_byte : t -> int 107 + (** [read_byte t] reads and returns the next byte (0-255). 108 + @raise Invalid_state if not byte aligned. 109 + @raise End_of_stream if at end of stream. *) 110 + 111 + val rewind_bits : t -> int -> unit 112 + (** [rewind_bits t n] rewinds the stream by [n] bits. 113 + @raise End_of_stream if rewinding past the start. *) 114 + 115 + val align : t -> unit 116 + (** [align t] advances to the next byte boundary if not already aligned. *) 117 + 118 + val byte_position : t -> int 119 + (** [byte_position t] returns the current byte position. 120 + @raise Invalid_state if not byte aligned. *) 121 + 122 + val get_slice : t -> int -> Slice.t 123 + (** [get_slice t n] returns the next [n] bytes as a slice (zero-copy). 124 + The slice references the underlying buffer directly. 125 + @raise Invalid_state if not byte aligned. 126 + @raise End_of_stream if not enough data. *) 127 + 128 + val get_bytes : t -> int -> bytes 129 + (** [get_bytes t n] reads and returns the next [n] bytes as a new buffer. 130 + Equivalent to [Slice.to_bytes (get_slice t n)]. 131 + @raise Invalid_state if not byte aligned. 132 + @raise End_of_stream if not enough data. *) 133 + 134 + val to_slice : t -> Slice.t 135 + (** [to_slice t] returns the remaining data as a slice (zero-copy). 136 + @raise Invalid_state if not byte aligned. *) 137 + 138 + val advance : t -> int -> unit 139 + (** [advance t n] skips [n] bytes without returning them. 140 + @raise Invalid_state if not byte aligned. 141 + @raise End_of_stream if not enough data. *) 142 + 143 + val sub : t -> int -> t 144 + (** [sub t n] creates a sub-reader for the next [n] bytes and advances [t]. 145 + @raise Invalid_state if not byte aligned. 146 + @raise End_of_stream if not enough data. *) 147 + 148 + val remaining_bytes : t -> int 149 + (** [remaining_bytes t] returns the number of unread bytes. 150 + @raise Invalid_state if not byte aligned. *) 151 + 152 + val skip_bits : t -> int -> unit 153 + (** [skip_bits t n] skips [n] bits without returning them. 154 + @raise End_of_stream if not enough data. *) 155 + end 156 + 157 + (** {1 Backward Bitstream Reader} 158 + 159 + Reads bits from the end of a buffer towards the start. The stream format 160 + includes a padding marker: the highest 1-bit in the final byte indicates 161 + where actual data begins. 162 + 163 + This format is used by FSE and ANS entropy coders. *) 164 + 165 + module Backward_reader : sig 166 + (** Backward bitstream reader state. *) 167 + type t 168 + 169 + val of_slice : Slice.t -> t 170 + (** [of_slice slice] creates a backward reader from a slice. Zero-copy. 171 + @raise End_of_stream if slice is empty. 172 + @raise Corrupted_stream if padding marker is invalid. *) 173 + 174 + val of_bytes : bytes -> pos:int -> len:int -> t 175 + (** [of_bytes src ~pos ~len] creates a backward reader. 176 + The stream is read from position [pos + len - 1] towards [pos]. 177 + @raise End_of_stream if [len = 0]. 178 + @raise Corrupted_stream if padding marker is invalid. *) 179 + 180 + val remaining : t -> int 181 + (** [remaining t] returns the number of bits remaining. *) 182 + 183 + val is_empty : t -> bool 184 + (** [is_empty t] returns true if no more bits are available. *) 185 + 186 + val read_bits : t -> int -> int 187 + (** [read_bits t n] reads and returns [n] bits (1-57). 188 + Returns 0 bits when reading past the beginning. 189 + @raise Invalid_argument if [n > 57]. *) 190 + 191 + val peek_bits : t -> int -> int 192 + (** [peek_bits t n] returns the next [n] bits without consuming them. 193 + @raise Invalid_argument if [n > 57]. *) 194 + end 195 + 196 + (** {1 Forward Bitstream Writer} *) 197 + 198 + module Forward_writer : sig 199 + (** Forward bitstream writer state. *) 200 + type t 201 + 202 + val of_slice : Slice.t -> t 203 + (** [of_slice slice] creates a writer into a slice. Zero-copy. *) 204 + 205 + val of_bytes : bytes -> t 206 + (** [of_bytes dst] creates a writer starting at position 0. *) 207 + 208 + val create : bytes -> pos:int -> t 209 + (** [create dst ~pos] creates a writer starting at [pos] in buffer [dst]. *) 210 + 211 + val write_bits : t -> int -> int -> unit 212 + (** [write_bits t value n] writes the lower [n] bits (1-57) of [value] 213 + in little-endian order. 214 + @raise Invalid_argument if [n > 57]. *) 215 + 216 + val write_byte : t -> int -> unit 217 + (** [write_byte t value] writes a single byte. Flushes any partial bits first. *) 218 + 219 + val write_slice : t -> Slice.t -> unit 220 + (** [write_slice t slice] writes bytes from a slice. Flushes any partial bits first. *) 221 + 222 + val write_bytes : t -> bytes -> unit 223 + (** [write_bytes t src] writes all bytes from [src]. Flushes any partial bits first. *) 224 + 225 + val byte_position : t -> int 226 + (** [byte_position t] returns the current output position including any partial byte. *) 227 + 228 + val flush : t -> unit 229 + (** [flush t] writes any accumulated bits as a partial byte. *) 230 + 231 + val finalize : t -> int 232 + (** [finalize t] flushes and returns the total number of bytes written. *) 233 + 234 + val to_slice : t -> Slice.t 235 + (** [to_slice t] flushes and returns the written data as a slice (zero-copy). 236 + The slice references the underlying destination buffer. *) 237 + end 238 + 239 + (** {1 Backward Bitstream Writer} 240 + 241 + Accumulates bits to produce output that will be read backwards. 242 + Used for FSE and Huffman encoding. *) 243 + 244 + module Backward_writer : sig 245 + (** Backward bitstream writer state. *) 246 + type t 247 + 248 + val create : int -> t 249 + (** [create size] creates a writer with an internal buffer of [size] bytes. *) 250 + 251 + val write_bits : t -> int -> int -> unit 252 + (** [write_bits t value n] accumulates [n] bits from [value]. *) 253 + 254 + val flush_bytes : t -> unit 255 + (** [flush_bytes t] flushes complete bytes to the internal buffer. *) 256 + 257 + val finalize_to_slice : t -> Slice.t 258 + (** [finalize_to_slice t] adds the padding marker, flushes, and returns output 259 + as a slice (zero-copy). The slice references the internal buffer. *) 260 + 261 + val finalize : t -> bytes 262 + (** [finalize t] adds the padding marker, flushes, and returns the output. 263 + Equivalent to [Slice.to_bytes (finalize_to_slice t)]. *) 264 + 265 + val current_size : t -> int 266 + (** [current_size t] returns the current output size estimate. *) 267 + end
+4
src/dune
··· 1 + (library 2 + (name bitstream) 3 + (public_name bitstream) 4 + (ocamlopt_flags (:standard -O3)))
+3
test/dune
··· 1 + (test 2 + (name test_bitstream) 3 + (libraries bitstream alcotest))
+221
test/test_bitstream.ml
··· 1 + (** Tests for Bitstream library *) 2 + 3 + let test_forward_reader_bytes () = 4 + let data = Bytes.of_string "\x12\x34\x56\x78" in 5 + let r = Bitstream.Forward_reader.of_bytes data in 6 + Alcotest.(check int) "byte 0" 0x12 (Bitstream.Forward_reader.read_byte r); 7 + Alcotest.(check int) "byte 1" 0x34 (Bitstream.Forward_reader.read_byte r); 8 + Alcotest.(check int) "byte 2" 0x56 (Bitstream.Forward_reader.read_byte r); 9 + Alcotest.(check int) "byte 3" 0x78 (Bitstream.Forward_reader.read_byte r); 10 + (* Reading past end raises End_of_stream *) 11 + Alcotest.check_raises "past end" Bitstream.End_of_stream (fun () -> 12 + ignore (Bitstream.Forward_reader.read_byte r)) 13 + 14 + let test_forward_reader_bits () = 15 + (* 0x12 = 0001_0010, 0x34 = 0011_0100 in little-endian bits: 16 + Reading 4 bits: 0010 = 2 17 + Reading 4 bits: 0001 = 1 18 + Reading 8 bits: 0011_0100 = 0x34 *) 19 + let data = Bytes.of_string "\x12\x34" in 20 + let r = Bitstream.Forward_reader.of_bytes data in 21 + Alcotest.(check int) "4 bits" 0x2 (Bitstream.Forward_reader.read_bits r 4); 22 + Alcotest.(check int) "4 bits" 0x1 (Bitstream.Forward_reader.read_bits r 4); 23 + Alcotest.(check int) "8 bits" 0x34 (Bitstream.Forward_reader.read_bits r 8) 24 + 25 + let test_forward_reader_mixed () = 26 + let data = Bytes.of_string "\xFF\x00\xAB\xCD" in 27 + let r = Bitstream.Forward_reader.of_bytes data in 28 + Alcotest.(check int) "16 bits LE" 0x00FF (Bitstream.Forward_reader.read_bits r 16); 29 + Alcotest.(check int) "remaining bits" 16 (Bitstream.Forward_reader.remaining r); 30 + Alcotest.(check int) "remaining bytes" 2 (Bitstream.Forward_reader.remaining_bytes r); 31 + let rest = Bitstream.Forward_reader.get_bytes r 2 in 32 + Alcotest.(check string) "get_bytes" "\xAB\xCD" (Bytes.to_string rest) 33 + 34 + let test_forward_reader_32bit () = 35 + let data = Bytes.of_string "\x78\x56\x34\x12" in 36 + let r = Bitstream.Forward_reader.of_bytes data in 37 + Alcotest.(check int) "32 bits LE" 0x12345678 (Bitstream.Forward_reader.read_bits r 32) 38 + 39 + let test_forward_reader_rewind () = 40 + let data = Bytes.of_string "\x12\x34\x56" in 41 + let r = Bitstream.Forward_reader.of_bytes data in 42 + let v1 = Bitstream.Forward_reader.read_bits r 12 in 43 + Alcotest.(check int) "first read 12 bits" 0x412 v1; (* 0x12 + lower 4 of 0x34 *) 44 + Bitstream.Forward_reader.rewind_bits r 4; 45 + Alcotest.(check int) "remaining after rewind" 16 (Bitstream.Forward_reader.remaining r); 46 + let v2 = Bitstream.Forward_reader.read_bits r 8 in 47 + Alcotest.(check int) "read after rewind" 0x34 v2 (* now at byte 1 *) 48 + 49 + let test_forward_reader_align () = 50 + let data = Bytes.of_string "\xFF\xAA\xBB" in 51 + let r = Bitstream.Forward_reader.of_bytes data in 52 + let _ = Bitstream.Forward_reader.read_bits r 3 in 53 + Alcotest.(check bool) "not aligned" false (Bitstream.Forward_reader.is_byte_aligned r); 54 + Bitstream.Forward_reader.align r; 55 + Alcotest.(check bool) "aligned" true (Bitstream.Forward_reader.is_byte_aligned r); 56 + Alcotest.(check int) "after align" 0xAA (Bitstream.Forward_reader.read_byte r) 57 + 58 + let test_forward_reader_sub () = 59 + let data = Bytes.of_string "\x12\x34\x56\x78\x9A" in 60 + let r = Bitstream.Forward_reader.of_bytes data in 61 + let _ = Bitstream.Forward_reader.read_byte r in 62 + let sub = Bitstream.Forward_reader.sub r 2 in 63 + Alcotest.(check int) "sub byte 0" 0x34 (Bitstream.Forward_reader.read_byte sub); 64 + Alcotest.(check int) "sub byte 1" 0x56 (Bitstream.Forward_reader.read_byte sub); 65 + Alcotest.(check int) "parent continues" 0x78 (Bitstream.Forward_reader.read_byte r) 66 + 67 + let test_forward_writer_bytes () = 68 + let buf = Bytes.create 8 in 69 + let w = Bitstream.Forward_writer.of_bytes buf in 70 + Bitstream.Forward_writer.write_byte w 0x12; 71 + Bitstream.Forward_writer.write_byte w 0x34; 72 + let len = Bitstream.Forward_writer.finalize w in 73 + Alcotest.(check int) "length" 2 len; 74 + Alcotest.(check int) "byte 0" 0x12 (Bytes.get_uint8 buf 0); 75 + Alcotest.(check int) "byte 1" 0x34 (Bytes.get_uint8 buf 1) 76 + 77 + let test_forward_writer_bits () = 78 + let buf = Bytes.create 8 in 79 + let w = Bitstream.Forward_writer.of_bytes buf in 80 + Bitstream.Forward_writer.write_bits w 0x2 4; (* lower 4 bits *) 81 + Bitstream.Forward_writer.write_bits w 0x1 4; (* upper 4 bits *) 82 + Bitstream.Forward_writer.write_bits w 0x34 8; 83 + let len = Bitstream.Forward_writer.finalize w in 84 + Alcotest.(check int) "length" 2 len; 85 + Alcotest.(check int) "byte 0" 0x12 (Bytes.get_uint8 buf 0); 86 + Alcotest.(check int) "byte 1" 0x34 (Bytes.get_uint8 buf 1) 87 + 88 + let test_forward_writer_32bit () = 89 + let buf = Bytes.create 8 in 90 + let w = Bitstream.Forward_writer.of_bytes buf in 91 + Bitstream.Forward_writer.write_bits w 0x12345678 32; 92 + let len = Bitstream.Forward_writer.finalize w in 93 + Alcotest.(check int) "length" 4 len; 94 + Alcotest.(check int) "byte 0" 0x78 (Bytes.get_uint8 buf 0); 95 + Alcotest.(check int) "byte 1" 0x56 (Bytes.get_uint8 buf 1); 96 + Alcotest.(check int) "byte 2" 0x34 (Bytes.get_uint8 buf 2); 97 + Alcotest.(check int) "byte 3" 0x12 (Bytes.get_uint8 buf 3) 98 + 99 + let test_forward_roundtrip () = 100 + (* Write various bit patterns, then read them back *) 101 + let buf = Bytes.create 64 in 102 + let w = Bitstream.Forward_writer.of_bytes buf in 103 + Bitstream.Forward_writer.write_bits w 0b101 3; 104 + Bitstream.Forward_writer.write_bits w 0b11001 5; 105 + Bitstream.Forward_writer.write_bits w 0xABCD 16; 106 + Bitstream.Forward_writer.write_bits w 0b1111 4; 107 + let len = Bitstream.Forward_writer.finalize w in 108 + 109 + let r = Bitstream.Forward_reader.create buf ~pos:0 ~len in 110 + Alcotest.(check int) "3 bits" 0b101 (Bitstream.Forward_reader.read_bits r 3); 111 + Alcotest.(check int) "5 bits" 0b11001 (Bitstream.Forward_reader.read_bits r 5); 112 + Alcotest.(check int) "16 bits" 0xABCD (Bitstream.Forward_reader.read_bits r 16); 113 + Alcotest.(check int) "4 bits" 0b1111 (Bitstream.Forward_reader.read_bits r 4) 114 + 115 + let test_backward_roundtrip () = 116 + (* Backward streams are read in REVERSE order of writing. 117 + This matches FSE: encode in reverse order, decode in forward order. *) 118 + let w = Bitstream.Backward_writer.create 64 in 119 + Bitstream.Backward_writer.write_bits w 0b101 3; 120 + Bitstream.Backward_writer.write_bits w 0b11001 5; 121 + Bitstream.Backward_writer.write_bits w 0xAB 8; 122 + let data = Bitstream.Backward_writer.finalize w in 123 + 124 + let r = Bitstream.Backward_reader.of_bytes data ~pos:0 ~len:(Bytes.length data) in 125 + (* Read in REVERSE order (last written = first read) *) 126 + Alcotest.(check int) "8 bits (written last)" 0xAB (Bitstream.Backward_reader.read_bits r 8); 127 + Alcotest.(check int) "5 bits" 0b11001 (Bitstream.Backward_reader.read_bits r 5); 128 + Alcotest.(check int) "3 bits (written first)" 0b101 (Bitstream.Backward_reader.read_bits r 3) 129 + 130 + let test_backward_reader_peek () = 131 + (* For backward streams, bits are read from MSB to LSB within accumulated data. 132 + 0x5A = 0101_1010 binary. Reading 4 bits at a time gives: 133 + - First 4 bits (high nibble): 0101 = 0x5 134 + - Last 4 bits (low nibble): 1010 = 0xA *) 135 + let w = Bitstream.Backward_writer.create 64 in 136 + Bitstream.Backward_writer.write_bits w 0x5A 8; 137 + let data = Bitstream.Backward_writer.finalize w in 138 + 139 + let r = Bitstream.Backward_reader.of_bytes data ~pos:0 ~len:(Bytes.length data) in 140 + Alcotest.(check int) "peek 4 (high nibble)" 0x5 (Bitstream.Backward_reader.peek_bits r 4); 141 + Alcotest.(check int) "peek 4 again" 0x5 (Bitstream.Backward_reader.peek_bits r 4); 142 + Alcotest.(check int) "read 4" 0x5 (Bitstream.Backward_reader.read_bits r 4); 143 + Alcotest.(check int) "read 4 (low nibble)" 0xA (Bitstream.Backward_reader.read_bits r 4) 144 + 145 + let test_backward_is_empty () = 146 + let w = Bitstream.Backward_writer.create 64 in 147 + Bitstream.Backward_writer.write_bits w 0xFF 8; 148 + let data = Bitstream.Backward_writer.finalize w in 149 + 150 + let r = Bitstream.Backward_reader.of_bytes data ~pos:0 ~len:(Bytes.length data) in 151 + Alcotest.(check bool) "not empty" false (Bitstream.Backward_reader.is_empty r); 152 + let _ = Bitstream.Backward_reader.read_bits r 8 in 153 + Alcotest.(check bool) "empty after read" true (Bitstream.Backward_reader.is_empty r) 154 + 155 + let test_backward_empty_stream () = 156 + (* Empty stream should raise End_of_stream *) 157 + Alcotest.check_raises "empty stream" Bitstream.End_of_stream (fun () -> 158 + ignore (Bitstream.Backward_reader.of_bytes (Bytes.empty) ~pos:0 ~len:0)) 159 + 160 + let test_backward_invalid_padding () = 161 + (* Zero byte has no padding marker - should raise *) 162 + let data = Bytes.of_string "\x00" in 163 + Alcotest.check_raises "zero padding" (Bitstream.Corrupted_stream "invalid padding marker") (fun () -> 164 + ignore (Bitstream.Backward_reader.of_bytes data ~pos:0 ~len:1)) 165 + 166 + let test_edge_cases () = 167 + (* Zero bits *) 168 + let buf = Bytes.create 8 in 169 + let w = Bitstream.Forward_writer.of_bytes buf in 170 + Bitstream.Forward_writer.write_bits w 0 0; 171 + let len = Bitstream.Forward_writer.finalize w in 172 + Alcotest.(check int) "zero bits" 0 len; 173 + 174 + (* Read zero bits *) 175 + let data = Bytes.of_string "\xFF" in 176 + let r = Bitstream.Forward_reader.of_bytes data in 177 + Alcotest.(check int) "read 0 bits" 0 (Bitstream.Forward_reader.read_bits r 0); 178 + Alcotest.(check int) "byte still available" 0xFF (Bitstream.Forward_reader.read_byte r) 179 + 180 + let test_not_aligned_errors () = 181 + let data = Bytes.of_string "\xFF\xAA" in 182 + let r = Bitstream.Forward_reader.of_bytes data in 183 + let _ = Bitstream.Forward_reader.read_bits r 3 in 184 + 185 + Alcotest.check_raises "read_byte not aligned" 186 + (Bitstream.Invalid_state "read_byte: not byte aligned") 187 + (fun () -> ignore (Bitstream.Forward_reader.read_byte r)); 188 + 189 + Alcotest.check_raises "byte_position not aligned" 190 + (Bitstream.Invalid_state "byte_position: not byte aligned") 191 + (fun () -> ignore (Bitstream.Forward_reader.byte_position r)); 192 + 193 + Alcotest.check_raises "remaining_bytes not aligned" 194 + (Bitstream.Invalid_state "remaining_bytes: not byte aligned") 195 + (fun () -> ignore (Bitstream.Forward_reader.remaining_bytes r)) 196 + 197 + let tests = [ 198 + "forward reader bytes", `Quick, test_forward_reader_bytes; 199 + "forward reader bits", `Quick, test_forward_reader_bits; 200 + "forward reader mixed", `Quick, test_forward_reader_mixed; 201 + "forward reader 32bit", `Quick, test_forward_reader_32bit; 202 + "forward reader rewind", `Quick, test_forward_reader_rewind; 203 + "forward reader align", `Quick, test_forward_reader_align; 204 + "forward reader sub", `Quick, test_forward_reader_sub; 205 + "forward writer bytes", `Quick, test_forward_writer_bytes; 206 + "forward writer bits", `Quick, test_forward_writer_bits; 207 + "forward writer 32bit", `Quick, test_forward_writer_32bit; 208 + "forward roundtrip", `Quick, test_forward_roundtrip; 209 + "backward roundtrip", `Quick, test_backward_roundtrip; 210 + "backward reader peek", `Quick, test_backward_reader_peek; 211 + "backward is_empty", `Quick, test_backward_is_empty; 212 + "backward empty stream", `Quick, test_backward_empty_stream; 213 + "backward invalid padding", `Quick, test_backward_invalid_padding; 214 + "edge cases", `Quick, test_edge_cases; 215 + "not aligned errors", `Quick, test_not_aligned_errors; 216 + ] 217 + 218 + let () = 219 + Alcotest.run "Bitstream" [ 220 + "bitstream", tests; 221 + ]