···84 (match v with `Malformed _ -> input_malformed := true | _ -> ());
85 (pp_decode inf d) Format.std_formatter v
8687-88let dump_ inf encoding nln src =
89 let rec loop inf d = match Uutf.decode d with `Await -> assert false
90 | v ->
···182(* Trip *)
183184let trip_ inf nln ie oe src dst =
185- let malformed = log_malformed inf in
00186 let rec loop d e = function `Await -> assert false
187 | `Uchar _ as v -> ignore (Uutf.encode e v); loop d e (Uutf.decode d)
188 | `End -> ignore (Uutf.encode e `End)
189- | `Malformed _ as v -> malformed d v; loop d e (Uutf.decode d)
190 in
191 let d = Uutf.decoder ?nln ?encoding:ie src in
192 let e, first = match oe with
···203 loop d e first; close_src src
204205let trip_unix inf usize nln ie oe fdi fdo =
206- let malformed = log_malformed inf in
00207 let rec loop fdi fdo ds es d e = function
208 | `Uchar _ as v ->
209 encode_unix fdo es e v; loop fdi fdo ds es d e (Uutf.decode d)
210 | `End -> encode_unix fdo es e `End
211- | `Malformed _ as v -> malformed d v; loop fdi fdo ds es d e (Uutf.decode d)
212 | `Await ->
213 let rc = unix_read fdi ds 0 (String.length ds) in
214 Uutf.Manual.src d ds 0 rc; loop fdi fdo ds es d e (Uutf.decode d)
···331 Arg.(value & pos 0 string "-" & info [] ~doc ~docv:"FILE")
332333let cmd =
334- let doc = "Output the input text as Unicode scalar values, one per line,
335- in the US-ASCII charset with their position
336- (see POSITION INFORMATION for more details)."
337 in
338 let ascii = `Ascii, Arg.info ["a"; "ascii"] ~doc in
339- let doc = "Only guess the encoding." in
00340 let guess = `Guess, Arg.info ["g"; "guess"] ~doc in
341 let doc = "Decode only, no encoding." in
342 let dec = `Decode, Arg.info ["decode"] ~doc in
···352 to stdout in various ways. If no input encoding is specified,
353 it is guessed. If no output encoding is specified, the input
354 encoding is used.";
00355 `S "POSITION INFORMATION";
356 `P "The format for position information is:";
357 `P "filename:line.col:(count,byte)";
···84 (match v with `Malformed _ -> input_malformed := true | _ -> ());
85 (pp_decode inf d) Format.std_formatter v
86087let dump_ inf encoding nln src =
88 let rec loop inf d = match Uutf.decode d with `Await -> assert false
89 | v ->
···181(* Trip *)
182183let trip_ inf nln ie oe src dst =
184+ let malformed d v e =
185+ log_malformed inf d v; ignore (Uutf.encode e (`Uchar Uutf.u_rep))
186+ in
187 let rec loop d e = function `Await -> assert false
188 | `Uchar _ as v -> ignore (Uutf.encode e v); loop d e (Uutf.decode d)
189 | `End -> ignore (Uutf.encode e `End)
190+ | `Malformed _ as v -> malformed d v e; loop d e (Uutf.decode d)
191 in
192 let d = Uutf.decoder ?nln ?encoding:ie src in
193 let e, first = match oe with
···204 loop d e first; close_src src
205206let trip_unix inf usize nln ie oe fdi fdo =
207+ let malformed d v e =
208+ log_malformed inf d v; ignore (Uutf.encode e (`Uchar Uutf.u_rep))
209+ in
210 let rec loop fdi fdo ds es d e = function
211 | `Uchar _ as v ->
212 encode_unix fdo es e v; loop fdi fdo ds es d e (Uutf.decode d)
213 | `End -> encode_unix fdo es e `End
214+ | `Malformed _ as v -> malformed d v e; loop fdi fdo ds es d e (Uutf.decode d)
215 | `Await ->
216 let rc = unix_read fdi ds 0 (String.length ds) in
217 Uutf.Manual.src d ds 0 rc; loop fdi fdo ds es d e (Uutf.decode d)
···334 Arg.(value & pos 0 string "-" & info [] ~doc ~docv:"FILE")
335336let cmd =
337+ let doc = "Output the input text as Unicode scalar values or malformed
338+ sequences, one per line, in the US-ASCII charset with their
339+ position (see POSITION INFORMATION for more details)."
340 in
341 let ascii = `Ascii, Arg.info ["a"; "ascii"] ~doc in
342+ let doc = "Only guess an UTF encoding. The result of a guess can only be
343+ UTF-8 or UTF-16{LE,BE}."
344+ in
345 let guess = `Guess, Arg.info ["g"; "guess"] ~doc in
346 let doc = "Decode only, no encoding." in
347 let dec = `Decode, Arg.info ["decode"] ~doc in
···357 to stdout in various ways. If no input encoding is specified,
358 it is guessed. If no output encoding is specified, the input
359 encoding is used.";
360+ `P "Invalid byte sequences in the input are reported on stderr and
361+ replaced by the Unicode replacement character (U+FFFD) in the output.";
362 `S "POSITION INFORMATION";
363 `P "The format for position information is:";
364 `P "filename:line.col:(count,byte)";