···11-(* The [Lexing] module keeps track of only byte offsets into the input. To get
22- line/column locations, the lexer usually has to call [Lexing.new_line] on
33- every newline character.
11+(* odoc's uses an ocamllex lexer. The "engine" for such lexers is the standard
22+ [Lexing] module.
33+44+ As the [Lexing] module reads the input, it keeps track of only the byte
55+ offset into the input. It is normally the job of each particular lexer
66+ implementation to decide which character sequences count as newlines, and
77+ keep track of line/column locations. This is usually done by writing several
88+ extra regular expressions, and calling [Lexing.new_line] at the right time.
99+1010+ Keeping track of newlines like this makes the odoc lexer somewhat too
1111+ diffiult to read, however. To factor the aspect of keeping track of newlines
1212+ fully out of the odoc lexer, instead of having it keep track of newlines as
1313+ it's scanning the input, the input is pre-scanned before feeding it into the
1414+ lexer. A table of all the newlines is assembled, and used to convert offsets
1515+ into line/column pairs after the lexer emits tokens.
41655- However, to keep the odoc lexer simple, it doesn't do that. Instead, this
66- function is given the input string, and it returns a function which converts
77- absolute offsets into the input into line/byte offset within line pairs. *)
88-let make_offset_to_location_function
99- : string -> (int -> Model.Location_.point) = fun s ->
1717+ [offset_to_location ~input ~comment_location offset] converts the byte
1818+ [offset], relative to the beginning of a comment, into a location, relative
1919+ to the beginning of the file containing the comment. [input] is the comment
2020+ text, and [comment_location] is the location of the comment within its file.
2121+ The function is meant to be partially applied to its first two arguments, at
2222+ which point it creates the table described above. The remaining function is
2323+ then passed to the lexer, so it can apply the table to its emitted tokens. *)
2424+let offset_to_location
2525+ : input:string -> comment_location:Lexing.position ->
2626+ (int -> Model.Location_.point) =
2727+ fun ~input ~comment_location ->
10281129 let rec find_newlines line_number input_index newlines_accumulator =
1212- if input_index >= String.length s then
3030+ if input_index >= String.length input then
1331 newlines_accumulator
1432 else
1515- if s.[input_index] = '\n' then
3333+ (* This is good enough to detect CR-LF also. *)
3434+ if input.[input_index] = '\n' then
1635 find_newlines
1736 (line_number + 1) (input_index + 1)
1837 ((line_number + 1, input_index + 1)::newlines_accumulator)
···2342 let reversed_newlines : (int * int) list =
2443 find_newlines 1 0 [(1, 0)] in
25442626- fun absolute_offset ->
4545+ fun byte_offset ->
2746 let rec scan_to_last_newline reversed_newlines_prefix =
2847 match reversed_newlines_prefix with
2948 | [] ->
3049 assert false
3131- | (line_number, line_start_offset)::prefix ->
3232- if line_start_offset <= absolute_offset then
3333- {
3434- Model.Location_.line = line_number;
3535- column = absolute_offset - line_start_offset
3636- }
5050+ | (line_in_comment, line_start_offset)::prefix ->
5151+ if line_start_offset > byte_offset then
5252+ scan_to_last_newline prefix
3753 else
3838- scan_to_last_newline prefix
5454+ let column_in_comment = byte_offset - line_start_offset in
5555+ let line_in_file =
5656+ line_in_comment + comment_location.Lexing.pos_lnum - 1 in
5757+ let column_in_file =
5858+ if line_in_comment = 1 then
5959+ column_in_comment +
6060+ comment_location.Lexing.pos_cnum -
6161+ comment_location.Lexing.pos_bol
6262+ else
6363+ column_in_comment
6464+ in
6565+ {Model.Location_.line = line_in_file; column = column_in_file}
3966 in
4067 scan_to_last_newline reversed_newlines
4168···4471let parse_comment
4572 ~permissive ~sections_allowed ~containing_definition ~location ~text =
46734747- (* Converts byte offsets into the comment to line, column pairs, which are
4848- relative to the start of the file that contains the comment. *)
4949- let offset_to_location =
5050- let offset_to_location_relative_to_start_of_comment =
5151- lazy (make_offset_to_location_function text) in
5252-5353- let offset_to_location_relative_to_start_of_file offset =
5454- let in_comment =
5555- (Lazy.force offset_to_location_relative_to_start_of_comment) offset in
5656-5757- let line_in_file = in_comment.line + location.Lexing.pos_lnum - 1 in
5858- let offset_in_line =
5959- if in_comment.line = 1 then
6060- in_comment.column + location.Lexing.pos_cnum - location.Lexing.pos_bol
6161- else
6262- in_comment.column
6363- in
6464-6565- {Model.Location_.line = line_in_file; column = offset_in_line}
6666- in
6767-6868- offset_to_location_relative_to_start_of_file
6969- in
7070-7174 let token_stream =
7275 let lexbuf = Lexing.from_string text in
7676+ let offset_to_location =
7777+ offset_to_location ~input:text ~comment_location:location in
7378 let input : Lexer.input =
7479 {
7580 file = location.Lexing.pos_fname;