The unpac monorepo manager self-hosting as a monorepo using unpac

Return sentinel value on all parsing errors

Completely reimplements the best-effort parsing mode. The previous
attempt failed if a lexing or parsing error occured before the
opam-version item had been fully parsed.

This revised version begins by reading three tokens from the lexer
directly to parse the opam-version, if it's present. It _then_ calls the
ocamlyacc parser (feeding the three tokens back to it).

If the opam-version parsed is greater than the library's internal
version, then _any_ exception causes just the parsed opam-version header
to be returned (which is sufficient for the client to display that the
file is newer than itself).

Finally, in order to permit, for example, opam 2.2 to have new fields
but no new lexer or parser, exceptions cause both the opam-version
variable to be returned and also a sentinel group with kind `#` which
can be used by the client to determine that a parsing/lexing error
occurred (and where).

+180 -99
+2 -1
vendor/opam/opam-file-format/CHANGES
··· 3 3 * opam-version: "2.1" must appear at most once and as the first non-comment 4 4 item. If opam-version is at the start and is greater than the library version, 5 5 `OpamLexer.Error` and `Parsing.Parse_error` are no longer raised; instead the 6 - items parsed so far are returned. [#43 @dra27] 6 + opam-version variable is returned along with an invalid group to signal the 7 + parsing error, giving the client enough information to act. [#43, #44 @dra27] 7 8 8 9 2.1.2 [07 Jan 2021] 9 10 * Some hash-consing for strings [#27 @AltGr]
+127 -87
vendor/opam/opam-file-format/src/opamBaseParser.mly
··· 15 15 16 16 (** Opam config file generic type parser *) 17 17 18 - let get_pos_full ?(s=1) n = 19 - let spos = Parsing.rhs_start_pos s in 20 - let epos = Parsing.rhs_end_pos n in 18 + let pos_of_lexing_pos spos epos = 21 19 Lexing.({ 22 - filename = spos.pos_fname; 23 - start = spos.pos_lnum, spos.pos_cnum - spos.pos_bol; 24 - stop = epos.pos_lnum, epos.pos_cnum - epos.pos_bol; 25 - }) 20 + filename = spos.pos_fname; 21 + start = spos.pos_lnum, spos.pos_cnum - spos.pos_bol; 22 + stop = epos.pos_lnum, epos.pos_cnum - epos.pos_bol; 23 + }) 24 + 25 + let get_pos_full ?(s=1) n = 26 + pos_of_lexing_pos (Parsing.rhs_start_pos s) (Parsing.rhs_end_pos n) 26 27 27 28 let get_pos n = get_pos_full ~s:n n 28 - 29 - let parsed_so_far = ref [] 30 - 31 - let record_token t = 32 - parsed_so_far := t :: !parsed_so_far; t 33 29 34 30 (* This must match up with the package's version; checked by the build system *) 35 31 let version = (2, 1) ··· 69 65 %% 70 66 71 67 main: 72 - | items EOF { parsed_so_far := []; fun file_name -> 68 + | items EOF { fun file_name -> 73 69 { file_contents = $1; file_name } } 74 70 ; 75 71 ··· 80 76 81 77 item: 82 78 | IDENT COLON value { 83 - record_token 84 79 { pos = get_pos_full 3; 85 80 pelem = 86 81 Variable ({ pos = get_pos 1; pelem = $1 }, $3); 87 82 } 88 83 } 89 84 | IDENT LBRACE items RBRACE { 90 - record_token 91 85 { pos = get_pos_full 4; 92 86 pelem = 93 87 Section ({section_kind = { pos = get_pos 1; pelem = $1 }; ··· 98 92 } 99 93 } 100 94 | IDENT STRING LBRACE items RBRACE { 101 - record_token 102 95 { pos = get_pos_full 4; 103 96 pelem = 104 97 Section ({section_kind = { pos = get_pos 1; pelem = $1 }; ··· 165 158 Parsing.clear_parser (); 166 159 raise e 167 160 168 - exception Nothing 169 - 170 - let reset_lexbuf l file_name (start_line, start_col) (end_line, end_col) = 161 + (* Update a lexbuf with position information prior to raising an exception *) 162 + let reset_lexbuf_and_abort l file_name (start_line, start_col) (end_line, end_col) exn = 171 163 let open Lexing in 172 164 l.lex_start_p <- {pos_fname = file_name; pos_lnum = start_line; pos_bol = 0; pos_cnum = start_col}; 173 165 l.lex_curr_p <- {pos_fname = file_name; pos_lnum = end_line; pos_bol = 0; pos_cnum = end_col}; 174 - true 166 + exn () 175 167 176 - let main t l file_name = 177 - (* Always return a result from parsing/lexing, but note if an exception 178 - occurred. *) 179 - let parsing_exception = ref Lexing.(Nothing, dummy_pos, dummy_pos) in 180 - let raise_if_parsing_failed = function 181 - | false -> () 182 - | true -> 183 - match parsing_exception with 184 - | {contents = (Nothing, _, _)} -> () 185 - | {contents = (e, start, curr)} -> 186 - let open Lexing in 187 - l.lex_start_p <- start; 188 - l.lex_curr_p <- curr; 189 - raise e 168 + (* cf. OpamStd.fatal - always allow standard exceptions to propagate. *) 169 + let not_fatal = function 170 + | Sys.Break 171 + | Assert_failure _ 172 + | Match_failure _ -> false 173 + | _ -> true 174 + 175 + (* Wrap the ocamlyacc parser *) 176 + let main lexer lexbuf file_name = 177 + (* Extract the exceptions for opam-version not at the top of the file and 178 + opam-version duplicated. OpamLexer has special cases for these two 179 + constants. If OpamLexer.token isn't used, raise Parse_error instead. *) 180 + let exn_not_first () = 181 + let _ = lexer (Lexing.from_string "version: \"42\"\nopam-version: \"2.1\"") in 182 + raise Parsing.Parse_error 183 + and exn_duplicate () = 184 + let _ = lexer (Lexing.from_string "opam-version: \"2.1\"\nopam-version: \"z\"") in 185 + raise Parsing.Parse_error 186 + and restore_pos (start, curr) = 187 + let open Lexing in 188 + lexbuf.lex_start_p <- start; 189 + lexbuf.lex_curr_p <- curr 190 + in 191 + (* Raises the exn_not_first or exn_duplicate exceptions if an invalid 192 + opam-version variable is found in the result. *) 193 + let scan_opam_version_variable format_2_1_or_greater = function 194 + | {pelem = Variable({pelem = "opam-version"; _}, {pelem = String ver; _}); pos = {start; stop; _}} -> 195 + if format_2_1_or_greater then 196 + (* [opam-version] can only appear once for 2.1+ *) 197 + reset_lexbuf_and_abort lexbuf file_name start stop exn_duplicate 198 + else if nopatch ver > (2, 0) then 199 + (* Only [opam-version: "2.0"] can appear after the first non-comment/whitespace line of the file *) 200 + reset_lexbuf_and_abort lexbuf file_name start stop exn_not_first 201 + else 202 + () 203 + | _ -> () 190 204 in 191 - let t l = 192 - try t l 205 + (* Now parse the header of the file manually. The smallest valid opam file 206 + is `ident: atom`, so if we can't read three tokens we have a parse error *) 207 + let ((((_, p0) as initial_pos), ((_, p1) as pos1), ((_, p2) as pos2), ((_, p3) as pos3)), (t1, t2, t3)) = 208 + let open Lexing in 209 + try 210 + let p0 = lexbuf.lex_start_p, lexbuf.lex_curr_p in 211 + let t1 = lexer lexbuf in 212 + let p1 = lexbuf.lex_start_p, lexbuf.lex_curr_p in 213 + let t2 = lexer lexbuf in 214 + let p2 = lexbuf.lex_start_p, lexbuf.lex_curr_p in 215 + let t3 = lexer lexbuf in 216 + let p3 = lexbuf.lex_start_p, lexbuf.lex_curr_p in 217 + ((p0, p1, p2, p3), (t1, t2, t3)) 193 218 with 194 - | Sys.Break 195 - | Assert_failure _ 196 - | Match_failure _ as e -> raise e 197 - | e -> parsing_exception := Lexing.(e, l.lex_start_p, l.lex_curr_p); EOF 219 + | e when not_fatal e -> raise Parsing.Parse_error 198 220 in 199 - let r = 200 - try with_clear_parser (main t l) file_name 201 - with Parsing.Parse_error as e -> 202 - parsing_exception := Lexing.(e, l.lex_start_p, l.lex_curr_p); 203 - (* Record the tokens captured so far *) 204 - let r = {file_contents = List.rev !parsed_so_far; file_name} in 205 - parsed_so_far := []; 206 - r 207 - in 208 - match r with 209 - | {file_contents = {pelem = Variable({pelem = "opam-version"; _}, {pelem = String ver; _}); _}::items; _} 210 - when nopatch ver >= (2, 1) -> 211 - let opam_version_variable = function 212 - | {pelem = Variable({pelem = "opam-version"; _}, _); pos = {start; stop; _}} -> 213 - reset_lexbuf l file_name start stop 214 - | _ -> false 221 + (* Parse those three tokens if they are [opam-version: ver] *) 222 + let (header, format_2_1_or_greater, trap_exceptions) = 223 + match (t1, t2, t3) with 224 + | (IDENT "opam-version", COLON, STRING ver) -> 225 + let header = 226 + (* Parsing or lexing errors immediate following opam-version may cause 227 + an exception to be raised before the element has been fully parsed. 228 + In this case, we generate a single opam-version Variable to return. 229 + *) 230 + {pelem = Variable({pelem = "opam-version"; pos = pos_of_lexing_pos p0 p1}, 231 + {pelem = String ver; pos = pos_of_lexing_pos p2 p3}); 232 + pos = pos_of_lexing_pos p0 p3} 233 + in 234 + (header, (nopatch ver >= (2, 1)), (nopatch ver > version)) 235 + | _ -> 236 + (* Default is [opam-version: "2.0"] *) 237 + let pos = {filename = ""; start = (0, 0); stop = (0, 0)} in 238 + ({pelem = Variable ({pelem = ""; pos}, {pelem = Int 42; pos}); pos}, false, false) 239 + in 240 + (* The parser will use position information from the lexbuf, so replay the 241 + positions, even if we're not actually reading anything. *) 242 + restore_pos initial_pos; 243 + (* Wrap the lexer to simulate reading those three tokens a second time *) 244 + let lexer = 245 + let tokens = ref [t1, pos1; t2, pos2; t3, pos3] in 246 + fun lexbuf -> 247 + match tokens with 248 + | {contents = (t, p)::rest} -> 249 + tokens := rest; 250 + restore_pos p; 251 + t 252 + | {contents = []} -> 253 + lexer lexbuf 254 + in 255 + let result = 256 + try with_clear_parser (main lexer lexbuf) file_name 257 + with e when trap_exceptions && not_fatal e -> 258 + (* Append a syntactically invalid sentinel section "#" to the version 259 + header which was manually parsed. That is then sufficient 260 + information for a client to determine that the file was invalid. 261 + If OpamBaseParser.version = (2, 1), this would allow 262 + `opam-version: "2.2"`, containing no lexer or parser changes, still to 263 + report syntax errors in opam 2.2, by using this sentinel group to 264 + detect the parsing error. *) 265 + let sentinel = 266 + let pos = 267 + Lexing.(pos_of_lexing_pos lexbuf.lex_start_p lexbuf.lex_curr_p) 215 268 in 216 - (* For opam-version: 2.1 and later, there must be no other opam-version 217 - fields. *) 218 - if List.exists opam_version_variable items then 219 - raise Parsing.Parse_error; 220 - (* Parsing and lexing errors from future versions of opam are ignored: 221 - the intent is that the tool will abort/ignore because of the 222 - opam-version field rather than through lexer/parser errors. *) 223 - raise_if_parsing_failed (nopatch ver <= version); 224 - r 225 - | {file_contents = items; _} -> 226 - let opam_version_greater_2_0 = function 227 - | {pelem = Variable({pelem = "opam-version"; _}, {pelem = String ver; _}); pos = {start; stop; _}} 228 - when nopatch ver > (2, 0) -> 229 - reset_lexbuf l file_name start stop 230 - | _ -> false 269 + let section = 270 + {section_kind = {pelem = "#"; pos}; 271 + section_name = None; 272 + section_items = {pelem = []; pos}} 231 273 in 232 - (* opam-version: 2.1 or later must be the first item. *) 233 - if List.exists opam_version_greater_2_0 items then 234 - raise Parsing.Parse_error; 235 - (* If no opam-version field was given, all exceptions must be 236 - raised. *) 237 - raise_if_parsing_failed true; 238 - r 274 + {pelem = Section section; pos} 275 + in 276 + {file_contents = [header; sentinel]; file_name} 277 + in 278 + begin 279 + match result with 280 + | {file_contents = _::items; _} -> 281 + (* Ensure that there are no `opam-version` fields with a value >= "2.1" 282 + further down the file. *) 283 + List.iter (scan_opam_version_variable format_2_1_or_greater) items 284 + | _ -> () 285 + end; 286 + result 239 287 240 - let value t l = 241 - try 242 - let r = value t l in 243 - Parsing.clear_parser (); 244 - r 245 - with 246 - | e -> 247 - Parsing.clear_parser (); 248 - raise e 288 + let value t l = with_clear_parser (value t) l
+7
vendor/opam/opam-file-format/src/opamLexer.mll
··· 138 138 | pfxop { PFXOP (FullPos.pfxop (Lexing.lexeme lexbuf)) } 139 139 | envop { ENVOP (FullPos.env_update_op (Lexing.lexeme lexbuf)) } 140 140 | eof { EOF } 141 + (* OpamBaseParser can't directly access OpamLexer.Error so it uses these 142 + constants (which would parse that way) to extract the exception values. 143 + *) 144 + | "opam-version: \"2.1\"\nopam-version: \"z\"" eof 145 + { error "opam-version cannot be repeated" } 146 + | "version: \"42\"\nopam-version: \"2.1\"" eof 147 + { error "opam-version must be the first non-comment line" } 141 148 | _ { let token = Lexing.lexeme lexbuf in 142 149 error "'%s' is not a valid token" token } 143 150
+44 -11
vendor/opam/opam-file-format/tests/versions.ml
··· 11 11 module A = Alcotest 12 12 13 13 let tests_exn = [ 14 - "opam-version > 2.0 not at start 1", 14 + "opam-version > 2.0 not at start 1", OpamLexer.Error("opam-version must be the first non-comment line"), 15 15 {| 16 16 version: "2.1" 17 17 opam-version: "2.1" 18 18 |}; 19 - "opam-version > 2.1 repeated", 19 + "opam-version > 2.1 repeated", OpamLexer.Error("opam-version cannot be repeated"), 20 20 {| 21 21 opam-version: "2.1" 22 22 opam-version: "2.1" 23 23 |}; 24 - "no opam-version and parsing error", 24 + "no opam-version and parsing error", Parsing.Parse_error, 25 25 {| 26 26 build: [ "echo" 27 27 |}; 28 - "opam-version 2.1 and parsing error", 28 + "opam-version 2.1 and lexing error", OpamLexer.Error "'@' is not a valid token", 29 + {| 30 + opam-version: "2.1" 31 + @ 32 + |}; 33 + "opam-version 2.1 and parsing error", Parsing.Parse_error, 29 34 {| 30 35 opam-version: "2.1" 31 36 build: [ "echo" 32 37 |}; 33 - ] |> List.map (fun (name, content) -> 38 + "opam-version 2.1 and immediate parsing error", Parsing.Parse_error, 39 + {| 40 + opam-version: "2.1" 41 + !! 42 + |}; 43 + ] |> List.map (fun (name, exn, content) -> 34 44 name, (fun () -> 35 - A.check_raises name Parsing.Parse_error (fun () -> 45 + A.check_raises name exn (fun () -> 36 46 OpamParser.FullPos.string content "broken.opam" |> ignore))) 37 47 48 + let has_sentinel = 49 + let open OpamParserTypes.FullPos in 50 + fun {file_contents; _} -> 51 + match List.rev file_contents with 52 + | {pelem = Section {section_kind = {pelem = "#"; _}; _}; _}::_ -> true 53 + | _ -> false 54 + 38 55 let tests_noexn = [ 39 - "opam-version 2.2 and parsing error", 56 + "opam-version 42.0 and parsing error", 57 + {| 58 + opam-version: "42.0" 59 + version: "42.0" 60 + !! 61 + |}; 62 + "opam-version 42.0 and evil parsing error", 63 + {| 64 + opam-version: "42.0" < 65 + |}; 66 + "opam-version 42.0 and immediate parsing error", 67 + {| 68 + opam-version: "42.0" 69 + !! 70 + |}; 71 + "opam-version 42.0 and lexing error", 40 72 {| 41 - opam-version: "2.2" 42 - build: [ "echo" 73 + opam-version: "42.0" 74 + @ 43 75 |}; 44 76 ] |> List.map (fun (name, content) -> 45 77 name, (fun () -> 46 - A.check A.unit name () 47 - (OpamParser.FullPos.string content "broken.opam" |> ignore))) 78 + A.check A.bool name true 79 + (OpamParser.FullPos.string content "broken.opam" 80 + |> has_sentinel))) 48 81 49 82 let tests = 50 83 ["opam-version", tests_exn @ tests_noexn]