type order = LSB | MSB type error = Invalid_code of int | Unexpected_eof | Buffer_overflow let error_to_string = function | Invalid_code c -> Printf.sprintf "invalid LZW code: %d" c | Unexpected_eof -> "unexpected end of compressed data" | Buffer_overflow -> "decompressed data too large" let clear_code = 256 let eof_code = 257 let initial_dict_size = 258 let max_code_bits = 12 let max_dict_size = 1 lsl max_code_bits type bit_reader = { data : string; mutable pos : int; mutable bit_pos : int; mutable bits_buf : int; mutable bits_count : int; } let make_bit_reader data = { data; pos = 0; bit_pos = 0; bits_buf = 0; bits_count = 0 } let read_bits_lsb reader n = while reader.bits_count < n do if reader.pos >= String.length reader.data then raise (Failure "eof") else begin let byte = Char.code reader.data.[reader.pos] in reader.bits_buf <- reader.bits_buf lor (byte lsl reader.bits_count); reader.bits_count <- reader.bits_count + 8; reader.pos <- reader.pos + 1 end done; let result = reader.bits_buf land ((1 lsl n) - 1) in reader.bits_buf <- reader.bits_buf lsr n; reader.bits_count <- reader.bits_count - n; result let decompress ?(order = LSB) ?(lit_width = 8) data = if order <> LSB then Error (Invalid_code 0) else if lit_width <> 8 then Error (Invalid_code 0) else try let reader = make_bit_reader data in let output = Buffer.create (String.length data * 2) in let dict = Array.make max_dict_size "" in for i = 0 to 255 do dict.(i) <- String.make 1 (Char.chr i) done; dict.(clear_code) <- ""; dict.(eof_code) <- ""; let dict_size = ref initial_dict_size in let code_bits = ref 9 in let prev_string = ref "" in let add_to_dict s = if !dict_size < max_dict_size then begin dict.(!dict_size) <- s; incr dict_size; if !dict_size >= 1 lsl !code_bits && !code_bits < max_code_bits then incr code_bits end in let reset_dict () = dict_size := initial_dict_size; code_bits := 9; prev_string := "" in let rec decode_loop () = let code = read_bits_lsb reader !code_bits in if code = eof_code then () else if code = clear_code then begin reset_dict (); decode_loop () end else begin let current_string = if code < !dict_size then dict.(code) else if code = !dict_size then !prev_string ^ String.make 1 !prev_string.[0] else raise (Failure (Printf.sprintf "invalid code %d >= %d" code !dict_size)) in Buffer.add_string output current_string; if !prev_string <> "" then add_to_dict (!prev_string ^ String.make 1 current_string.[0]); prev_string := current_string; decode_loop () end in decode_loop (); Ok (Buffer.contents output) with | Failure msg when msg = "eof" -> Error Unexpected_eof | Failure msg when String.length msg > 12 && String.sub msg 0 12 = "invalid code" -> Error (Invalid_code 0) | _ -> Error (Invalid_code 0) let decompress_lsb8 data = decompress ~order:LSB ~lit_width:8 data