type order = LSB | MSB type error = Invalid_code of int | Unexpected_eof | Buffer_overflow let error_to_string = function | Invalid_code c -> Printf.sprintf "invalid LZW code: %d" c | Unexpected_eof -> "unexpected end of compressed data" | Buffer_overflow -> "decompressed data too large" let clear_code = 256 let eof_code = 257 let initial_dict_size = 258 let max_code_bits = 12 let max_dict_size = 1 lsl max_code_bits type bit_reader = { data : Cstruct.t; mutable pos : int; mutable bits_buf : int; mutable bits_count : int; } let make_bit_reader data = { data; pos = 0; bits_buf = 0; bits_count = 0 } let read_bits_lsb reader n = while reader.bits_count < n do if reader.pos >= Cstruct.length reader.data then raise Exit else begin let byte = Cstruct.get_uint8 reader.data reader.pos in reader.bits_buf <- reader.bits_buf lor (byte lsl reader.bits_count); reader.bits_count <- reader.bits_count + 8; reader.pos <- reader.pos + 1 end done; let result = reader.bits_buf land ((1 lsl n) - 1) in reader.bits_buf <- reader.bits_buf lsr n; reader.bits_count <- reader.bits_count - n; result let decompress_to_buffer ~src ~dst = try let reader = make_bit_reader src in let out_pos = ref 0 in let dst_len = Cstruct.length dst in let dict = Array.make max_dict_size (Cstruct.empty, 0) in for i = 0 to 255 do dict.(i) <- (Cstruct.of_string (String.make 1 (Char.chr i)), 1) done; dict.(clear_code) <- (Cstruct.empty, 0); dict.(eof_code) <- (Cstruct.empty, 0); let dict_size = ref initial_dict_size in let code_bits = ref 9 in let prev_code = ref (-1) in let write_entry (entry, len) = if !out_pos + len > dst_len then raise (Failure "overflow"); Cstruct.blit entry 0 dst !out_pos len; out_pos := !out_pos + len in let add_to_dict first_byte = if !dict_size < max_dict_size && !prev_code >= 0 then begin let prev_entry, prev_len = dict.(!prev_code) in let new_entry = Cstruct.create (prev_len + 1) in Cstruct.blit prev_entry 0 new_entry 0 prev_len; Cstruct.set_uint8 new_entry prev_len first_byte; dict.(!dict_size) <- (new_entry, prev_len + 1); incr dict_size; if !dict_size >= 1 lsl !code_bits && !code_bits < max_code_bits then incr code_bits end in let reset_dict () = dict_size := initial_dict_size; code_bits := 9; prev_code := -1 in let rec decode_loop () = let code = read_bits_lsb reader !code_bits in if code = eof_code then () else if code = clear_code then begin reset_dict (); decode_loop () end else begin let entry, len, first_byte = if code < !dict_size then let e, l = dict.(code) in (e, l, Cstruct.get_uint8 e 0) else if code = !dict_size && !prev_code >= 0 then ( let prev_entry, prev_len = dict.(!prev_code) in let first = Cstruct.get_uint8 prev_entry 0 in let new_entry = Cstruct.create (prev_len + 1) in Cstruct.blit prev_entry 0 new_entry 0 prev_len; Cstruct.set_uint8 new_entry prev_len first; (new_entry, prev_len + 1, first)) else raise (Failure "invalid") in write_entry (entry, len); add_to_dict first_byte; prev_code := code; decode_loop () end in decode_loop (); Ok !out_pos with | Exit -> Error Unexpected_eof | Failure msg when msg = "overflow" -> Error Buffer_overflow | Failure msg when msg = "invalid" -> Error (Invalid_code 0) | _ -> Error (Invalid_code 0) let decompress_cstruct src = let estimated_size = max (Cstruct.length src * 4) 4096 in let dst = Cstruct.create estimated_size in match decompress_to_buffer ~src ~dst with | Ok len -> Ok (Cstruct.sub dst 0 len) | Error Buffer_overflow -> ( let larger = Cstruct.create (estimated_size * 4) in match decompress_to_buffer ~src ~dst:larger with | Ok len -> Ok (Cstruct.sub larger 0 len) | Error e -> Error e) | Error e -> Error e let decompress ?(order = LSB) ?(lit_width = 8) data = if order <> LSB then Error (Invalid_code 0) else if lit_width <> 8 then Error (Invalid_code 0) else let src = Cstruct.of_string data in match decompress_cstruct src with | Ok cs -> Ok (Cstruct.to_string cs) | Error e -> Error e let decompress_lsb8 data = decompress ~order:LSB ~lit_width:8 data