···8484 (match v with `Malformed _ -> input_malformed := true | _ -> ());
8585 (pp_decode inf d) Format.std_formatter v
86868787-8887let dump_ inf encoding nln src =
8988 let rec loop inf d = match Uutf.decode d with `Await -> assert false
9089 | v ->
···182181(* Trip *)
183182184183let trip_ inf nln ie oe src dst =
185185- let malformed = log_malformed inf in
184184+ let malformed d v e =
185185+ log_malformed inf d v; ignore (Uutf.encode e (`Uchar Uutf.u_rep))
186186+ in
186187 let rec loop d e = function `Await -> assert false
187188 | `Uchar _ as v -> ignore (Uutf.encode e v); loop d e (Uutf.decode d)
188189 | `End -> ignore (Uutf.encode e `End)
189189- | `Malformed _ as v -> malformed d v; loop d e (Uutf.decode d)
190190+ | `Malformed _ as v -> malformed d v e; loop d e (Uutf.decode d)
190191 in
191192 let d = Uutf.decoder ?nln ?encoding:ie src in
192193 let e, first = match oe with
···203204 loop d e first; close_src src
204205205206let trip_unix inf usize nln ie oe fdi fdo =
206206- let malformed = log_malformed inf in
207207+ let malformed d v e =
208208+ log_malformed inf d v; ignore (Uutf.encode e (`Uchar Uutf.u_rep))
209209+ in
207210 let rec loop fdi fdo ds es d e = function
208211 | `Uchar _ as v ->
209212 encode_unix fdo es e v; loop fdi fdo ds es d e (Uutf.decode d)
210213 | `End -> encode_unix fdo es e `End
211211- | `Malformed _ as v -> malformed d v; loop fdi fdo ds es d e (Uutf.decode d)
214214+ | `Malformed _ as v -> malformed d v e; loop fdi fdo ds es d e (Uutf.decode d)
212215 | `Await ->
213216 let rc = unix_read fdi ds 0 (String.length ds) in
214217 Uutf.Manual.src d ds 0 rc; loop fdi fdo ds es d e (Uutf.decode d)
···331334 Arg.(value & pos 0 string "-" & info [] ~doc ~docv:"FILE")
332335333336let cmd =
334334- let doc = "Output the input text as Unicode scalar values, one per line,
335335- in the US-ASCII charset with their position
336336- (see POSITION INFORMATION for more details)."
337337+ let doc = "Output the input text as Unicode scalar values or malformed
338338+ sequences, one per line, in the US-ASCII charset with their
339339+ position (see POSITION INFORMATION for more details)."
337340 in
338341 let ascii = `Ascii, Arg.info ["a"; "ascii"] ~doc in
339339- let doc = "Only guess the encoding." in
342342+ let doc = "Only guess an UTF encoding. The result of a guess can only be
343343+ UTF-8 or UTF-16{LE,BE}."
344344+ in
340345 let guess = `Guess, Arg.info ["g"; "guess"] ~doc in
341346 let doc = "Decode only, no encoding." in
342347 let dec = `Decode, Arg.info ["decode"] ~doc in
···352357 to stdout in various ways. If no input encoding is specified,
353358 it is guessed. If no output encoding is specified, the input
354359 encoding is used.";
360360+ `P "Invalid byte sequences in the input are reported on stderr and
361361+ replaced by the Unicode replacement character (U+FFFD) in the output.";
355362 `S "POSITION INFORMATION";
356363 `P "The format for position information is:";
357364 `P "filename:line.col:(count,byte)";