···11+{0 Building a REPL}
22+33+@scrolly Building a REPL in OCaml
44+{ol
55+{li
66+ {b The Expression Type}
77+88+ A REPL evaluates expressions. We start with a tiny language:
99+ integer literals, addition, let-bindings, and variables.
1010+ Four constructors is all we need.
1111+1212+ {[
1313+type expr =
1414+ | Lit of int
1515+ | Add of expr * expr
1616+ | Let of string * expr * expr
1717+ | Var of string
1818+ ]}
1919+}
2020+{li
2121+ {b Values and Environments}
2222+2323+ Evaluation produces values. For now, just integers. An
2424+ environment maps variable names to their values using a
2525+ simple association list.
2626+2727+ {[
2828+type expr =
2929+ | Lit of int
3030+ | Add of expr * expr
3131+ | Let of string * expr * expr
3232+ | Var of string
3333+3434+type value = Int of int
3535+3636+type env = (string * value) list
3737+3838+let empty_env : env = []
3939+4040+let extend env name v = (name, v) :: env
4141+4242+let lookup env name =
4343+ match List.assoc_opt name env with
4444+ | Some v -> v
4545+ | None -> failwith ("unbound: " ^ name)
4646+ ]}
4747+}
4848+{li
4949+ {b The Evaluator}
5050+5151+ Pattern matching makes the evaluator beautifully direct.
5252+ Each expression form maps to a straightforward computation.
5353+ Let-bindings extend the environment for the body expression.
5454+5555+ {[
5656+type expr =
5757+ | Lit of int
5858+ | Add of expr * expr
5959+ | Let of string * expr * expr
6060+ | Var of string
6161+6262+type value = Int of int
6363+6464+type env = (string * value) list
6565+6666+let empty_env : env = []
6767+6868+let extend env name v = (name, v) :: env
6969+7070+let lookup env name =
7171+ match List.assoc_opt name env with
7272+ | Some v -> v
7373+ | None -> failwith ("unbound: " ^ name)
7474+7575+let rec eval env = function
7676+ | Lit n -> Int n
7777+ | Add (a, b) ->
7878+ let (Int x) = eval env a in
7979+ let (Int y) = eval env b in
8080+ Int (x + y)
8181+ | Let (name, rhs, body) ->
8282+ let v = eval env rhs in
8383+ eval (extend env name v) body
8484+ | Var name -> lookup env name
8585+ ]}
8686+}
8787+{li
8888+ {b A Tiny Tokenizer}
8989+9090+ To read user input, we need a tokenizer. It splits a string
9191+ into meaningful chunks: numbers, identifiers, operators, and
9292+ parentheses. Whitespace is consumed but not produced.
9393+9494+ {[
9595+type expr =
9696+ | Lit of int
9797+ | Add of expr * expr
9898+ | Let of string * expr * expr
9999+ | Var of string
100100+101101+type value = Int of int
102102+type env = (string * value) list
103103+let empty_env : env = []
104104+let extend env name v = (name, v) :: env
105105+let lookup env name =
106106+ match List.assoc_opt name env with
107107+ | Some v -> v
108108+ | None -> failwith ("unbound: " ^ name)
109109+110110+let rec eval env = function
111111+ | Lit n -> Int n
112112+ | Add (a, b) ->
113113+ let (Int x) = eval env a in
114114+ let (Int y) = eval env b in
115115+ Int (x + y)
116116+ | Let (name, rhs, body) ->
117117+ let v = eval env rhs in
118118+ eval (extend env name v) body
119119+ | Var name -> lookup env name
120120+121121+type token =
122122+ | TNum of int
123123+ | TIdent of string
124124+ | TPlus | TEqual
125125+ | TLParen | TRParen
126126+ | TLet | TIn
127127+128128+let is_alpha c =
129129+ (c >= 'a' && c <= 'z')
130130+ || (c >= 'A' && c <= 'Z')
131131+ || c = '_'
132132+133133+let is_digit c = c >= '0' && c <= '9'
134134+135135+let tokenize input =
136136+ let len = String.length input in
137137+ let pos = ref 0 in
138138+ let tokens = ref [] in
139139+ while !pos < len do
140140+ let c = input.[!pos] in
141141+ if c = ' ' || c = '\t' || c = '\n' then
142142+ incr pos
143143+ else if is_digit c then begin
144144+ let start = !pos in
145145+ while !pos < len && is_digit input.[!pos] do
146146+ incr pos done;
147147+ let s = String.sub input start (!pos - start) in
148148+ tokens := TNum (int_of_string s) :: !tokens
149149+ end else if is_alpha c then begin
150150+ let start = !pos in
151151+ while !pos < len && is_alpha input.[!pos] do
152152+ incr pos done;
153153+ let s = String.sub input start (!pos - start) in
154154+ let tok = match s with
155155+ | "let" -> TLet | "in" -> TIn
156156+ | _ -> TIdent s in
157157+ tokens := tok :: !tokens
158158+ end else begin
159159+ let tok = match c with
160160+ | '+' -> TPlus | '=' -> TEqual
161161+ | '(' -> TLParen | ')' -> TRParen
162162+ | _ -> failwith "unexpected char" in
163163+ tokens := tok :: !tokens;
164164+ incr pos
165165+ end
166166+ done;
167167+ List.rev !tokens
168168+ ]}
169169+}
170170+{li
171171+ {b The Parser}
172172+173173+ A recursive descent parser turns tokens into our expression AST.
174174+ It handles operator precedence naturally: addition is parsed as
175175+ a left-associative chain of atoms.
176176+177177+ {[
178178+type expr =
179179+ | Lit of int
180180+ | Add of expr * expr
181181+ | Let of string * expr * expr
182182+ | Var of string
183183+184184+type value = Int of int
185185+type env = (string * value) list
186186+let empty_env : env = []
187187+let extend env name v = (name, v) :: env
188188+let lookup env name =
189189+ match List.assoc_opt name env with
190190+ | Some v -> v
191191+ | None -> failwith ("unbound: " ^ name)
192192+193193+let rec eval env = function
194194+ | Lit n -> Int n
195195+ | Add (a, b) ->
196196+ let (Int x) = eval env a in
197197+ let (Int y) = eval env b in
198198+ Int (x + y)
199199+ | Let (name, rhs, body) ->
200200+ let v = eval env rhs in
201201+ eval (extend env name v) body
202202+ | Var name -> lookup env name
203203+204204+type token =
205205+ | TNum of int | TIdent of string
206206+ | TPlus | TEqual
207207+ | TLParen | TRParen
208208+ | TLet | TIn
209209+210210+let is_alpha c =
211211+ (c >= 'a' && c <= 'z')
212212+ || (c >= 'A' && c <= 'Z') || c = '_'
213213+let is_digit c = c >= '0' && c <= '9'
214214+215215+let tokenize input =
216216+ let len = String.length input in
217217+ let pos = ref 0 in
218218+ let tokens = ref [] in
219219+ while !pos < len do
220220+ let c = input.[!pos] in
221221+ if c = ' ' || c = '\t' || c = '\n' then
222222+ incr pos
223223+ else if is_digit c then begin
224224+ let start = !pos in
225225+ while !pos < len && is_digit input.[!pos]
226226+ do incr pos done;
227227+ let s = String.sub input start
228228+ (!pos - start) in
229229+ tokens := TNum (int_of_string s) :: !tokens
230230+ end else if is_alpha c then begin
231231+ let start = !pos in
232232+ while !pos < len && is_alpha input.[!pos]
233233+ do incr pos done;
234234+ let s = String.sub input start
235235+ (!pos - start) in
236236+ let tok = match s with
237237+ | "let" -> TLet | "in" -> TIn
238238+ | _ -> TIdent s in
239239+ tokens := tok :: !tokens
240240+ end else begin
241241+ let tok = match c with
242242+ | '+' -> TPlus | '=' -> TEqual
243243+ | '(' -> TLParen | ')' -> TRParen
244244+ | _ -> failwith "unexpected char" in
245245+ tokens := tok :: !tokens; incr pos
246246+ end
247247+ done;
248248+ List.rev !tokens
249249+250250+let parse tokens =
251251+ let toks = ref tokens in
252252+ let next () =
253253+ match !toks with
254254+ | [] -> failwith "unexpected end"
255255+ | t :: rest -> toks := rest; t in
256256+ let peek () =
257257+ match !toks with [] -> None | t :: _ -> Some t in
258258+ let rec parse_expr () =
259259+ let left = parse_atom () in
260260+ parse_add left
261261+ and parse_add left =
262262+ match peek () with
263263+ | Some TPlus ->
264264+ ignore (next ());
265265+ let right = parse_atom () in
266266+ parse_add (Add (left, right))
267267+ | _ -> left
268268+ and parse_atom () =
269269+ match next () with
270270+ | TNum n -> Lit n
271271+ | TIdent s -> Var s
272272+ | TLParen ->
273273+ let e = parse_expr () in
274274+ ignore (next ()); e
275275+ | TLet ->
276276+ let (TIdent name) = next () in
277277+ ignore (next ());
278278+ let rhs = parse_expr () in
279279+ ignore (next ());
280280+ let body = parse_expr () in
281281+ Let (name, rhs, body)
282282+ | _ -> failwith "unexpected token" in
283283+ parse_expr ()
284284+ ]}
285285+}
286286+{li
287287+ {b The Read-Eval-Print Loop}
288288+289289+ Now we connect all the pieces. The REPL reads a line,
290290+ tokenizes it, parses the tokens, evaluates the expression,
291291+ and prints the result. A persistent environment accumulates
292292+ bindings across interactions.
293293+294294+ {[
295295+type expr =
296296+ | Lit of int
297297+ | Add of expr * expr
298298+ | Let of string * expr * expr
299299+ | Var of string
300300+301301+type value = Int of int
302302+type env = (string * value) list
303303+let empty_env : env = []
304304+let extend env name v = (name, v) :: env
305305+let lookup env name =
306306+ match List.assoc_opt name env with
307307+ | Some v -> v
308308+ | None -> failwith ("unbound: " ^ name)
309309+310310+let rec eval env = function
311311+ | Lit n -> Int n
312312+ | Add (a, b) ->
313313+ let (Int x) = eval env a in
314314+ let (Int y) = eval env b in
315315+ Int (x + y)
316316+ | Let (name, rhs, body) ->
317317+ let v = eval env rhs in
318318+ eval (extend env name v) body
319319+ | Var name -> lookup env name
320320+321321+type token =
322322+ | TNum of int | TIdent of string
323323+ | TPlus | TEqual
324324+ | TLParen | TRParen
325325+ | TLet | TIn
326326+327327+let is_alpha c =
328328+ (c >= 'a' && c <= 'z')
329329+ || (c >= 'A' && c <= 'Z') || c = '_'
330330+let is_digit c = c >= '0' && c <= '9'
331331+332332+let tokenize input =
333333+ let len = String.length input in
334334+ let pos = ref 0 in
335335+ let tokens = ref [] in
336336+ while !pos < len do
337337+ let c = input.[!pos] in
338338+ if c = ' ' || c = '\t' || c = '\n' then
339339+ incr pos
340340+ else if is_digit c then begin
341341+ let start = !pos in
342342+ while !pos < len && is_digit input.[!pos]
343343+ do incr pos done;
344344+ tokens := TNum (int_of_string
345345+ (String.sub input start
346346+ (!pos - start))) :: !tokens
347347+ end else if is_alpha c then begin
348348+ let start = !pos in
349349+ while !pos < len && is_alpha input.[!pos]
350350+ do incr pos done;
351351+ let s = String.sub input start
352352+ (!pos - start) in
353353+ tokens := (match s with
354354+ | "let" -> TLet | "in" -> TIn
355355+ | _ -> TIdent s) :: !tokens
356356+ end else begin
357357+ tokens := (match c with
358358+ | '+' -> TPlus | '=' -> TEqual
359359+ | '(' -> TLParen | ')' -> TRParen
360360+ | _ -> failwith "unexpected") :: !tokens;
361361+ incr pos
362362+ end
363363+ done; List.rev !tokens
364364+365365+let parse tokens =
366366+ let toks = ref tokens in
367367+ let next () = match !toks with
368368+ | [] -> failwith "end"
369369+ | t :: r -> toks := r; t in
370370+ let peek () = match !toks with
371371+ | [] -> None | t :: _ -> Some t in
372372+ let rec expr () =
373373+ let l = atom () in add l
374374+ and add left = match peek () with
375375+ | Some TPlus ->
376376+ ignore (next ());
377377+ add (Add (left, atom ()))
378378+ | _ -> left
379379+ and atom () = match next () with
380380+ | TNum n -> Lit n
381381+ | TIdent s -> Var s
382382+ | TLParen ->
383383+ let e = expr () in
384384+ ignore (next ()); e
385385+ | TLet ->
386386+ let (TIdent name) = next () in
387387+ ignore (next ());
388388+ let rhs = expr () in
389389+ ignore (next ());
390390+ Let (name, rhs, expr ())
391391+ | _ -> failwith "unexpected" in
392392+ expr ()
393393+394394+let print_value = function
395395+ | Int n -> Printf.printf "=> %d\n" n
396396+397397+let repl () =
398398+ let env = ref empty_env in
399399+ try while true do
400400+ print_string "> ";
401401+ let line = input_line stdin in
402402+ let tokens = tokenize line in
403403+ let ast = parse tokens in
404404+ let result = eval !env ast in
405405+ print_value result
406406+ done with End_of_file ->
407407+ print_endline "Goodbye."
408408+409409+let () = repl ()
410410+ ]}
411411+}
412412+}
···11+{0 Building a Test Framework}
22+33+@scrolly Building a Test Framework in OCaml
44+{ol
55+{li
66+ {b A Single Assertion}
77+88+ The simplest possible test: check that a condition holds.
99+ If it fails, raise an exception with a message. This is
1010+ the foundation everything else builds on.
1111+1212+ {[
1313+exception Test_failure of string
1414+1515+let assert_equal ~expected ~actual msg =
1616+ if expected <> actual then
1717+ raise (Test_failure
1818+ (Printf.sprintf "%s: expected %s, got %s"
1919+ msg
2020+ (string_of_int expected)
2121+ (string_of_int actual)))
2222+ ]}
2323+}
2424+{li
2525+ {b Collecting Tests}
2626+2727+ A test is a named function. We store tests in a mutable list
2828+ so they can be registered declaratively with a simple helper.
2929+ Each test is just a unit function that might raise.
3030+3131+ {[
3232+exception Test_failure of string
3333+3434+let assert_equal ~expected ~actual msg =
3535+ if expected <> actual then
3636+ raise (Test_failure
3737+ (Printf.sprintf "%s: expected %s, got %s"
3838+ msg
3939+ (string_of_int expected)
4040+ (string_of_int actual)))
4141+4242+type test = {
4343+ name : string;
4444+ fn : unit -> unit;
4545+}
4646+4747+let tests : test list ref = ref []
4848+4949+let register name fn =
5050+ tests := { name; fn } :: !tests
5151+5252+let () = register "addition" (fun () ->
5353+ assert_equal ~expected:4 ~actual:(2 + 2)
5454+ "two plus two")
5555+5656+let () = register "multiplication" (fun () ->
5757+ assert_equal ~expected:6 ~actual:(2 * 3)
5858+ "two times three")
5959+ ]}
6060+}
6161+{li
6262+ {b A Test Runner}
6363+6464+ The runner iterates through registered tests, catching
6565+ exceptions to report pass or fail. It counts results
6666+ and prints a summary at the end.
6767+6868+ {[
6969+exception Test_failure of string
7070+7171+let assert_equal ~expected ~actual msg =
7272+ if expected <> actual then
7373+ raise (Test_failure
7474+ (Printf.sprintf "%s: expected %s, got %s"
7575+ msg
7676+ (string_of_int expected)
7777+ (string_of_int actual)))
7878+7979+type test = {
8080+ name : string;
8181+ fn : unit -> unit;
8282+}
8383+8484+let tests : test list ref = ref []
8585+8686+let register name fn =
8787+ tests := { name; fn } :: !tests
8888+8989+type result =
9090+ | Pass
9191+ | Fail of string
9292+9393+let run_one test =
9494+ try test.fn (); Pass
9595+ with
9696+ | Test_failure msg -> Fail msg
9797+ | exn -> Fail (Printexc.to_string exn)
9898+9999+let run_all () =
100100+ let results =
101101+ List.rev !tests
102102+ |> List.map (fun t -> (t.name, run_one t))
103103+ in
104104+ let passed =
105105+ List.length
106106+ (List.filter
107107+ (fun (_, r) -> r = Pass) results)
108108+ in
109109+ let total = List.length results in
110110+ List.iter (fun (name, result) ->
111111+ match result with
112112+ | Pass ->
113113+ Printf.printf " PASS %s\n" name
114114+ | Fail msg ->
115115+ Printf.printf " FAIL %s: %s\n" name msg
116116+ ) results;
117117+ Printf.printf "\n%d/%d tests passed\n"
118118+ passed total;
119119+ if passed < total then exit 1
120120+ ]}
121121+}
122122+{li
123123+ {b Better Assertions}
124124+125125+ Real frameworks need more than integer equality. We add
126126+ string comparison, boolean checks, and a generic raises
127127+ assertion that checks an exception is thrown.
128128+129129+ {[
130130+exception Test_failure of string
131131+132132+let assert_equal ~expected ~actual msg =
133133+ if expected <> actual then
134134+ raise (Test_failure
135135+ (Printf.sprintf "%s: expected %s, got %s"
136136+ msg
137137+ (string_of_int expected)
138138+ (string_of_int actual)))
139139+140140+let assert_string_equal ~expected ~actual msg =
141141+ if expected <> actual then
142142+ raise (Test_failure
143143+ (Printf.sprintf
144144+ "%s: expected %S, got %S"
145145+ msg expected actual))
146146+147147+let assert_true condition msg =
148148+ if not condition then
149149+ raise (Test_failure msg)
150150+151151+let assert_raises fn msg =
152152+ try fn ();
153153+ raise (Test_failure
154154+ (msg ^ ": expected exception"))
155155+ with
156156+ | Test_failure _ as e -> raise e
157157+ | _ -> ()
158158+159159+type test = {
160160+ name : string;
161161+ fn : unit -> unit;
162162+}
163163+164164+let tests : test list ref = ref []
165165+166166+let register name fn =
167167+ tests := { name; fn } :: !tests
168168+169169+type result = Pass | Fail of string
170170+171171+let run_one test =
172172+ try test.fn (); Pass
173173+ with
174174+ | Test_failure msg -> Fail msg
175175+ | exn -> Fail (Printexc.to_string exn)
176176+177177+let run_all () =
178178+ let results =
179179+ List.rev !tests
180180+ |> List.map (fun t -> (t.name, run_one t))
181181+ in
182182+ let passed = List.length
183183+ (List.filter
184184+ (fun (_, r) -> r = Pass) results) in
185185+ let total = List.length results in
186186+ List.iter (fun (name, result) ->
187187+ match result with
188188+ | Pass ->
189189+ Printf.printf " PASS %s\n" name
190190+ | Fail msg ->
191191+ Printf.printf " FAIL %s: %s\n"
192192+ name msg
193193+ ) results;
194194+ Printf.printf "\n%d/%d tests passed\n"
195195+ passed total;
196196+ if passed < total then exit 1
197197+ ]}
198198+}
199199+{li
200200+ {b Test Suites}
201201+202202+ As projects grow, tests need organization. We add a suite
203203+ concept that groups related tests under a name. Suites
204204+ can be nested and run independently.
205205+206206+ {[
207207+exception Test_failure of string
208208+209209+let assert_equal ~expected ~actual msg =
210210+ if expected <> actual then
211211+ raise (Test_failure
212212+ (Printf.sprintf "%s: expected %s, got %s"
213213+ msg
214214+ (string_of_int expected)
215215+ (string_of_int actual)))
216216+217217+let assert_string_equal ~expected ~actual msg =
218218+ if expected <> actual then
219219+ raise (Test_failure
220220+ (Printf.sprintf "%s: expected %S, got %S"
221221+ msg expected actual))
222222+223223+let assert_true condition msg =
224224+ if not condition then
225225+ raise (Test_failure msg)
226226+227227+let assert_raises fn msg =
228228+ try fn ();
229229+ raise (Test_failure
230230+ (msg ^ ": expected exception"))
231231+ with Test_failure _ as e -> raise e | _ -> ()
232232+233233+type test = { name : string; fn : unit -> unit }
234234+type result = Pass | Fail of string
235235+236236+type suite = {
237237+ suite_name : string;
238238+ mutable suite_tests : test list;
239239+}
240240+241241+let suites : suite list ref = ref []
242242+243243+let create_suite name =
244244+ let s = { suite_name = name;
245245+ suite_tests = [] } in
246246+ suites := s :: !suites; s
247247+248248+let add_test suite name fn =
249249+ suite.suite_tests <-
250250+ { name; fn } :: suite.suite_tests
251251+252252+let run_one test =
253253+ try test.fn (); Pass
254254+ with
255255+ | Test_failure msg -> Fail msg
256256+ | exn -> Fail (Printexc.to_string exn)
257257+258258+let run_suite suite =
259259+ Printf.printf "Suite: %s\n" suite.suite_name;
260260+ let results =
261261+ List.rev suite.suite_tests
262262+ |> List.map (fun t ->
263263+ (t.name, run_one t)) in
264264+ let passed = List.length
265265+ (List.filter
266266+ (fun (_, r) -> r = Pass) results) in
267267+ let total = List.length results in
268268+ List.iter (fun (name, result) ->
269269+ match result with
270270+ | Pass ->
271271+ Printf.printf " PASS %s\n" name
272272+ | Fail msg ->
273273+ Printf.printf " FAIL %s: %s\n"
274274+ name msg
275275+ ) results;
276276+ Printf.printf " %d/%d passed\n\n"
277277+ passed total;
278278+ passed = total
279279+280280+let run_all_suites () =
281281+ let all_ok = List.for_all run_suite
282282+ (List.rev !suites) in
283283+ if not all_ok then exit 1
284284+ ]}
285285+}
286286+{li
287287+ {b Expect Tests}
288288+289289+ The crown jewel: expect tests capture actual output and
290290+ compare it to an expected snapshot. On first run, they
291291+ record the output. On later runs, they detect regressions.
292292+ This is how tools like ppx_expect and Cram tests work.
293293+294294+ {[
295295+exception Test_failure of string
296296+297297+let assert_equal ~expected ~actual msg =
298298+ if expected <> actual then
299299+ raise (Test_failure
300300+ (Printf.sprintf "%s: expected %s, got %s"
301301+ msg
302302+ (string_of_int expected)
303303+ (string_of_int actual)))
304304+305305+let assert_string_equal ~expected ~actual msg =
306306+ if expected <> actual then
307307+ raise (Test_failure
308308+ (Printf.sprintf "%s: expected %S, got %S"
309309+ msg expected actual))
310310+311311+let assert_true condition msg =
312312+ if not condition then
313313+ raise (Test_failure msg)
314314+315315+let assert_raises fn msg =
316316+ try fn ();
317317+ raise (Test_failure
318318+ (msg ^ ": expected exception"))
319319+ with Test_failure _ as e -> raise e | _ -> ()
320320+321321+type test = { name : string; fn : unit -> unit }
322322+type result = Pass | Fail of string
323323+324324+type suite = {
325325+ suite_name : string;
326326+ mutable suite_tests : test list;
327327+}
328328+329329+let suites : suite list ref = ref []
330330+331331+let create_suite name =
332332+ let s = { suite_name = name;
333333+ suite_tests = [] } in
334334+ suites := s :: !suites; s
335335+336336+let add_test suite name fn =
337337+ suite.suite_tests <-
338338+ { name; fn } :: suite.suite_tests
339339+340340+let run_one test =
341341+ try test.fn (); Pass
342342+ with
343343+ | Test_failure msg -> Fail msg
344344+ | exn -> Fail (Printexc.to_string exn)
345345+346346+(* Expect test infrastructure *)
347347+let expect_dir = "_expect"
348348+349349+let expect_test suite name fn =
350350+ add_test suite name (fun () ->
351351+ let buf = Buffer.create 256 in
352352+ fn (Buffer.add_string buf);
353353+ let actual = Buffer.contents buf in
354354+ let path = Printf.sprintf "%s/%s/%s.expected"
355355+ expect_dir suite.suite_name name in
356356+ if Sys.file_exists path then begin
357357+ let ic = open_in path in
358358+ let expected = really_input_string ic
359359+ (in_channel_length ic) in
360360+ close_in ic;
361361+ assert_string_equal
362362+ ~expected ~actual
363363+ (name ^ " snapshot")
364364+ end else begin
365365+ let dir = Filename.dirname path in
366366+ ignore (Sys.command
367367+ ("mkdir -p " ^ dir));
368368+ let oc = open_out path in
369369+ output_string oc actual;
370370+ close_out oc;
371371+ Printf.printf
372372+ " NEW %s (snapshot saved)\n" name
373373+ end)
374374+375375+let run_suite suite =
376376+ Printf.printf "Suite: %s\n" suite.suite_name;
377377+ let results =
378378+ List.rev suite.suite_tests
379379+ |> List.map (fun t ->
380380+ (t.name, run_one t)) in
381381+ let passed = List.length
382382+ (List.filter
383383+ (fun (_, r) -> r = Pass) results) in
384384+ let total = List.length results in
385385+ List.iter (fun (name, result) ->
386386+ match result with
387387+ | Pass ->
388388+ Printf.printf " PASS %s\n" name
389389+ | Fail msg ->
390390+ Printf.printf " FAIL %s: %s\n"
391391+ name msg
392392+ ) results;
393393+ Printf.printf " %d/%d passed\n\n"
394394+ passed total;
395395+ passed = total
396396+397397+let run_all_suites () =
398398+ let all_ok = List.for_all run_suite
399399+ (List.rev !suites) in
400400+ if not all_ok then exit 1
401401+ ]}
402402+}
403403+}
+384
doc/warm_parser.mld
···11+{0 Building a JSON Parser}
22+33+@scrolly Building a JSON Parser in OCaml
44+{ol
55+{li
66+ {b Defining the Value Type}
77+88+ Every parser starts with a type. JSON has six kinds of values:
99+ null, booleans, numbers, strings, arrays, and objects.
1010+ We encode this directly as an OCaml variant.
1111+1212+ {[
1313+(* >type json =
1414+(* > | Null
1515+(* > | Bool of bool
1616+(* > | Number of float
1717+(* > | String of string
1818+(* > | Array of json list
1919+(* > | Object of (string * json) list
2020+ ]}
2121+}
2222+{li
2323+ {b A Simple Scanner}
2424+2525+ Before parsing structure, we need to skip whitespace and
2626+ peek at the next meaningful character. Our scanner works
2727+ on a string with a mutable position index.
2828+2929+ {[
3030+type json =
3131+ | Null
3232+ | Bool of bool
3333+ | Number of float
3434+ | String of string
3535+ | Array of json list
3636+ | Object of (string * json) list
3737+3838+(* >type scanner = {
3939+(* > input : string;
4040+(* > mutable pos : int;
4141+(* >}
4242+(* >
4343+(* >let peek s =
4444+(* > while s.pos < String.length s.input
4545+(* > && s.input.[s.pos] = ' ' do
4646+(* > s.pos <- s.pos + 1
4747+(* > done;
4848+(* > if s.pos < String.length s.input
4949+(* > then Some s.input.[s.pos]
5050+(* > else None
5151+(* >
5252+(* >let advance s = s.pos <- s.pos + 1
5353+ ]}
5454+}
5555+{li
5656+ {b Parsing Strings}
5757+5858+ JSON strings are delimited by double quotes. We scan character
5959+ by character, collecting into a buffer. This handles the simple
6060+ case without escape sequences.
6161+6262+ {[
6363+type json =
6464+ | Null
6565+ | Bool of bool
6666+ | Number of float
6767+ | String of string
6868+ | Array of json list
6969+ | Object of (string * json) list
7070+7171+type scanner = {
7272+ input : string;
7373+ mutable pos : int;
7474+}
7575+7676+let peek s =
7777+ while s.pos < String.length s.input
7878+ && s.input.[s.pos] = ' ' do
7979+ s.pos <- s.pos + 1
8080+ done;
8181+ if s.pos < String.length s.input
8282+ then Some s.input.[s.pos]
8383+ else None
8484+8585+let advance s = s.pos <- s.pos + 1
8686+8787+(* >let parse_string s =
8888+(* > advance s;
8989+(* > let buf = Buffer.create 64 in
9090+(* > while s.pos < String.length s.input
9191+(* > && s.input.[s.pos] <> '"' do
9292+(* > Buffer.add_char buf s.input.[s.pos];
9393+(* > advance s
9494+(* > done;
9595+(* > advance s;
9696+(* > Buffer.contents buf
9797+ ]}
9898+}
9999+{li
100100+ {b Parsing Numbers}
101101+102102+ Numbers in JSON can be integers or floats. We collect consecutive
103103+ digit and dot characters, then use float_of_string to parse them.
104104+ A production parser would handle exponents too.
105105+106106+ {[
107107+type json =
108108+ | Null
109109+ | Bool of bool
110110+ | Number of float
111111+ | String of string
112112+ | Array of json list
113113+ | Object of (string * json) list
114114+115115+type scanner = {
116116+ input : string;
117117+ mutable pos : int;
118118+}
119119+120120+let peek s =
121121+ while s.pos < String.length s.input
122122+ && s.input.[s.pos] = ' ' do
123123+ s.pos <- s.pos + 1
124124+ done;
125125+ if s.pos < String.length s.input
126126+ then Some s.input.[s.pos]
127127+ else None
128128+129129+let advance s = s.pos <- s.pos + 1
130130+131131+let parse_string s =
132132+ advance s;
133133+ let buf = Buffer.create 64 in
134134+ while s.pos < String.length s.input
135135+ && s.input.[s.pos] <> '"' do
136136+ Buffer.add_char buf s.input.[s.pos];
137137+ advance s
138138+ done;
139139+ advance s;
140140+ Buffer.contents buf
141141+142142+(* >let is_digit c = c >= '0' && c <= '9'
143143+(* >
144144+(* >let parse_number s =
145145+(* > let start = s.pos in
146146+(* > while s.pos < String.length s.input
147147+(* > && (is_digit s.input.[s.pos]
148148+(* > || s.input.[s.pos] = '.'
149149+(* > || s.input.[s.pos] = '-') do
150150+(* > advance s
151151+(* > done;
152152+(* > float_of_string
153153+(* > (String.sub s.input start (s.pos - start))
154154+ ]}
155155+}
156156+{li
157157+ {b The Recursive Core}
158158+159159+ Now the magic: parse_value dispatches on the next character
160160+ to decide what kind of JSON value to parse. For atoms like
161161+ null, true, false we match literal strings. For compound
162162+ structures, we recurse.
163163+164164+ {[
165165+type json =
166166+ | Null
167167+ | Bool of bool
168168+ | Number of float
169169+ | String of string
170170+ | Array of json list
171171+ | Object of (string * json) list
172172+173173+type scanner = {
174174+ input : string;
175175+ mutable pos : int;
176176+}
177177+178178+let peek s =
179179+ while s.pos < String.length s.input
180180+ && s.input.[s.pos] = ' ' do
181181+ s.pos <- s.pos + 1
182182+ done;
183183+ if s.pos < String.length s.input
184184+ then Some s.input.[s.pos]
185185+ else None
186186+187187+let advance s = s.pos <- s.pos + 1
188188+189189+let parse_string s =
190190+ advance s;
191191+ let buf = Buffer.create 64 in
192192+ while s.pos < String.length s.input
193193+ && s.input.[s.pos] <> '"' do
194194+ Buffer.add_char buf s.input.[s.pos];
195195+ advance s
196196+ done;
197197+ advance s;
198198+ Buffer.contents buf
199199+200200+let is_digit c = c >= '0' && c <= '9'
201201+202202+let parse_number s =
203203+ let start = s.pos in
204204+ while s.pos < String.length s.input
205205+ && (is_digit s.input.[s.pos]
206206+ || s.input.[s.pos] = '.'
207207+ || s.input.[s.pos] = '-') do
208208+ advance s
209209+ done;
210210+ float_of_string
211211+ (String.sub s.input start (s.pos - start))
212212+213213+(* >let expect s c =
214214+(* > match peek s with
215215+(* > | Some c' when c' = c -> advance s
216216+(* > | _ -> failwith "unexpected character"
217217+(* >
218218+(* >let rec parse_value s =
219219+(* > match peek s with
220220+(* > | Some '"' -> String (parse_string s)
221221+(* > | Some c when is_digit c || c = '-' ->
222222+(* > Number (parse_number s)
223223+(* > | Some 't' ->
224224+(* > s.pos <- s.pos + 4; Bool true
225225+(* > | Some 'f' ->
226226+(* > s.pos <- s.pos + 5; Bool false
227227+(* > | Some 'n' ->
228228+(* > s.pos <- s.pos + 4; Null
229229+(* > | Some '[' -> parse_array s
230230+(* > | Some '{' -> parse_object s
231231+(* > | _ -> failwith "unexpected token"
232232+(* >
233233+(* >and parse_array s =
234234+(* > advance s;
235235+(* > let items = ref [] in
236236+(* > (match peek s with
237237+(* > | Some ']' -> advance s
238238+(* > | _ ->
239239+(* > items := [parse_value s];
240240+(* > while peek s = Some ',' do
241241+(* > advance s;
242242+(* > items := parse_value s :: !items
243243+(* > done;
244244+(* > expect s ']');
245245+(* > Array (List.rev !items)
246246+(* >
247247+(* >and parse_object s =
248248+(* > advance s;
249249+(* > let pairs = ref [] in
250250+(* > (match peek s with
251251+(* > | Some '}' -> advance s
252252+(* > | _ ->
253253+(* > let key = parse_string s in
254254+(* > expect s ':';
255255+(* > let value = parse_value s in
256256+(* > pairs := [(key, value)];
257257+(* > while peek s = Some ',' do
258258+(* > advance s;
259259+(* > let k = parse_string s in
260260+(* > expect s ':';
261261+(* > let v = parse_value s in
262262+(* > pairs := (k, v) :: !pairs
263263+(* > done;
264264+(* > expect s '}');
265265+(* > Object (List.rev !pairs)
266266+ ]}
267267+}
268268+{li
269269+ {b The Public API}
270270+271271+ Finally we wrap the scanner in a clean top-level function.
272272+ Pass a string in, get a JSON value out. The entire parser
273273+ is about 80 lines of OCaml — no dependencies, no magic.
274274+275275+ {[
276276+type json =
277277+ | Null
278278+ | Bool of bool
279279+ | Number of float
280280+ | String of string
281281+ | Array of json list
282282+ | Object of (string * json) list
283283+284284+type scanner = {
285285+ input : string;
286286+ mutable pos : int;
287287+}
288288+289289+let peek s =
290290+ while s.pos < String.length s.input
291291+ && s.input.[s.pos] = ' ' do
292292+ s.pos <- s.pos + 1
293293+ done;
294294+ if s.pos < String.length s.input
295295+ then Some s.input.[s.pos]
296296+ else None
297297+298298+let advance s = s.pos <- s.pos + 1
299299+300300+let parse_string s =
301301+ advance s;
302302+ let buf = Buffer.create 64 in
303303+ while s.pos < String.length s.input
304304+ && s.input.[s.pos] <> '"' do
305305+ Buffer.add_char buf s.input.[s.pos];
306306+ advance s
307307+ done;
308308+ advance s;
309309+ Buffer.contents buf
310310+311311+let is_digit c = c >= '0' && c <= '9'
312312+313313+let parse_number s =
314314+ let start = s.pos in
315315+ while s.pos < String.length s.input
316316+ && (is_digit s.input.[s.pos]
317317+ || s.input.[s.pos] = '.'
318318+ || s.input.[s.pos] = '-') do
319319+ advance s
320320+ done;
321321+ float_of_string
322322+ (String.sub s.input start (s.pos - start))
323323+324324+let expect s c =
325325+ match peek s with
326326+ | Some c' when c' = c -> advance s
327327+ | _ -> failwith "unexpected character"
328328+329329+let rec parse_value s =
330330+ match peek s with
331331+ | Some '"' -> String (parse_string s)
332332+ | Some c when is_digit c || c = '-' ->
333333+ Number (parse_number s)
334334+ | Some 't' ->
335335+ s.pos <- s.pos + 4; Bool true
336336+ | Some 'f' ->
337337+ s.pos <- s.pos + 5; Bool false
338338+ | Some 'n' ->
339339+ s.pos <- s.pos + 4; Null
340340+ | Some '[' -> parse_array s
341341+ | Some '{' -> parse_object s
342342+ | _ -> failwith "unexpected token"
343343+344344+and parse_array s =
345345+ advance s;
346346+ let items = ref [] in
347347+ (match peek s with
348348+ | Some ']' -> advance s
349349+ | _ ->
350350+ items := [parse_value s];
351351+ while peek s = Some ',' do
352352+ advance s;
353353+ items := parse_value s :: !items
354354+ done;
355355+ expect s ']');
356356+ Array (List.rev !items)
357357+358358+and parse_object s =
359359+ advance s;
360360+ let pairs = ref [] in
361361+ (match peek s with
362362+ | Some '}' -> advance s
363363+ | _ ->
364364+ let key = parse_string s in
365365+ expect s ':';
366366+ let value = parse_value s in
367367+ pairs := [(key, value)];
368368+ while peek s = Some ',' do
369369+ advance s;
370370+ let k = parse_string s in
371371+ expect s ':';
372372+ let v = parse_value s in
373373+ pairs := (k, v) :: !pairs
374374+ done;
375375+ expect s '}');
376376+ Object (List.rev !pairs)
377377+378378+(* >let parse input =
379379+(* > let s = { input; pos = 0 } in
380380+(* > let v = parse_value s in
381381+(* > v
382382+ ]}
383383+}
384384+}