A fork of mtelver's day10 project
1{0 Building a REPL}
2
3@scrolly Building a REPL in OCaml
4{ol
5{li
6 {b The Expression Type}
7
8 A REPL evaluates expressions. We start with a tiny language:
9 integer literals, addition, let-bindings, and variables.
10 Four constructors is all we need.
11
12 {[
13type expr =
14 | Lit of int
15 | Add of expr * expr
16 | Let of string * expr * expr
17 | Var of string
18 ]}
19}
20{li
21 {b Values and Environments}
22
23 Evaluation produces values. For now, just integers. An
24 environment maps variable names to their values using a
25 simple association list.
26
27 {[
28type expr =
29 | Lit of int
30 | Add of expr * expr
31 | Let of string * expr * expr
32 | Var of string
33
34type value = Int of int
35
36type env = (string * value) list
37
38let empty_env : env = []
39
40let extend env name v = (name, v) :: env
41
42let lookup env name =
43 match List.assoc_opt name env with
44 | Some v -> v
45 | None -> failwith ("unbound: " ^ name)
46 ]}
47}
48{li
49 {b The Evaluator}
50
51 Pattern matching makes the evaluator beautifully direct.
52 Each expression form maps to a straightforward computation.
53 Let-bindings extend the environment for the body expression.
54
55 {[
56type expr =
57 | Lit of int
58 | Add of expr * expr
59 | Let of string * expr * expr
60 | Var of string
61
62type value = Int of int
63
64type env = (string * value) list
65
66let empty_env : env = []
67
68let extend env name v = (name, v) :: env
69
70let lookup env name =
71 match List.assoc_opt name env with
72 | Some v -> v
73 | None -> failwith ("unbound: " ^ name)
74
75let rec eval env = function
76 | Lit n -> Int n
77 | Add (a, b) ->
78 let (Int x) = eval env a in
79 let (Int y) = eval env b in
80 Int (x + y)
81 | Let (name, rhs, body) ->
82 let v = eval env rhs in
83 eval (extend env name v) body
84 | Var name -> lookup env name
85 ]}
86}
87{li
88 {b A Tiny Tokenizer}
89
90 To read user input, we need a tokenizer. It splits a string
91 into meaningful chunks: numbers, identifiers, operators, and
92 parentheses. Whitespace is consumed but not produced.
93
94 {[
95type expr =
96 | Lit of int
97 | Add of expr * expr
98 | Let of string * expr * expr
99 | Var of string
100
101type value = Int of int
102type env = (string * value) list
103let empty_env : env = []
104let extend env name v = (name, v) :: env
105let lookup env name =
106 match List.assoc_opt name env with
107 | Some v -> v
108 | None -> failwith ("unbound: " ^ name)
109
110let rec eval env = function
111 | Lit n -> Int n
112 | Add (a, b) ->
113 let (Int x) = eval env a in
114 let (Int y) = eval env b in
115 Int (x + y)
116 | Let (name, rhs, body) ->
117 let v = eval env rhs in
118 eval (extend env name v) body
119 | Var name -> lookup env name
120
121type token =
122 | TNum of int
123 | TIdent of string
124 | TPlus | TEqual
125 | TLParen | TRParen
126 | TLet | TIn
127
128let is_alpha c =
129 (c >= 'a' && c <= 'z')
130 || (c >= 'A' && c <= 'Z')
131 || c = '_'
132
133let is_digit c = c >= '0' && c <= '9'
134
135let tokenize input =
136 let len = String.length input in
137 let pos = ref 0 in
138 let tokens = ref [] in
139 while !pos < len do
140 let c = input.[!pos] in
141 if c = ' ' || c = '\t' || c = '\n' then
142 incr pos
143 else if is_digit c then begin
144 let start = !pos in
145 while !pos < len && is_digit input.[!pos] do
146 incr pos done;
147 let s = String.sub input start (!pos - start) in
148 tokens := TNum (int_of_string s) :: !tokens
149 end else if is_alpha c then begin
150 let start = !pos in
151 while !pos < len && is_alpha input.[!pos] do
152 incr pos done;
153 let s = String.sub input start (!pos - start) in
154 let tok = match s with
155 | "let" -> TLet | "in" -> TIn
156 | _ -> TIdent s in
157 tokens := tok :: !tokens
158 end else begin
159 let tok = match c with
160 | '+' -> TPlus | '=' -> TEqual
161 | '(' -> TLParen | ')' -> TRParen
162 | _ -> failwith "unexpected char" in
163 tokens := tok :: !tokens;
164 incr pos
165 end
166 done;
167 List.rev !tokens
168 ]}
169}
170{li
171 {b The Parser}
172
173 A recursive descent parser turns tokens into our expression AST.
174 It handles operator precedence naturally: addition is parsed as
175 a left-associative chain of atoms.
176
177 {[
178type expr =
179 | Lit of int
180 | Add of expr * expr
181 | Let of string * expr * expr
182 | Var of string
183
184type value = Int of int
185type env = (string * value) list
186let empty_env : env = []
187let extend env name v = (name, v) :: env
188let lookup env name =
189 match List.assoc_opt name env with
190 | Some v -> v
191 | None -> failwith ("unbound: " ^ name)
192
193let rec eval env = function
194 | Lit n -> Int n
195 | Add (a, b) ->
196 let (Int x) = eval env a in
197 let (Int y) = eval env b in
198 Int (x + y)
199 | Let (name, rhs, body) ->
200 let v = eval env rhs in
201 eval (extend env name v) body
202 | Var name -> lookup env name
203
204type token =
205 | TNum of int | TIdent of string
206 | TPlus | TEqual
207 | TLParen | TRParen
208 | TLet | TIn
209
210let is_alpha c =
211 (c >= 'a' && c <= 'z')
212 || (c >= 'A' && c <= 'Z') || c = '_'
213let is_digit c = c >= '0' && c <= '9'
214
215let tokenize input =
216 let len = String.length input in
217 let pos = ref 0 in
218 let tokens = ref [] in
219 while !pos < len do
220 let c = input.[!pos] in
221 if c = ' ' || c = '\t' || c = '\n' then
222 incr pos
223 else if is_digit c then begin
224 let start = !pos in
225 while !pos < len && is_digit input.[!pos]
226 do incr pos done;
227 let s = String.sub input start
228 (!pos - start) in
229 tokens := TNum (int_of_string s) :: !tokens
230 end else if is_alpha c then begin
231 let start = !pos in
232 while !pos < len && is_alpha input.[!pos]
233 do incr pos done;
234 let s = String.sub input start
235 (!pos - start) in
236 let tok = match s with
237 | "let" -> TLet | "in" -> TIn
238 | _ -> TIdent s in
239 tokens := tok :: !tokens
240 end else begin
241 let tok = match c with
242 | '+' -> TPlus | '=' -> TEqual
243 | '(' -> TLParen | ')' -> TRParen
244 | _ -> failwith "unexpected char" in
245 tokens := tok :: !tokens; incr pos
246 end
247 done;
248 List.rev !tokens
249
250let parse tokens =
251 let toks = ref tokens in
252 let next () =
253 match !toks with
254 | [] -> failwith "unexpected end"
255 | t :: rest -> toks := rest; t in
256 let peek () =
257 match !toks with [] -> None | t :: _ -> Some t in
258 let rec parse_expr () =
259 let left = parse_atom () in
260 parse_add left
261 and parse_add left =
262 match peek () with
263 | Some TPlus ->
264 ignore (next ());
265 let right = parse_atom () in
266 parse_add (Add (left, right))
267 | _ -> left
268 and parse_atom () =
269 match next () with
270 | TNum n -> Lit n
271 | TIdent s -> Var s
272 | TLParen ->
273 let e = parse_expr () in
274 ignore (next ()); e
275 | TLet ->
276 let (TIdent name) = next () in
277 ignore (next ());
278 let rhs = parse_expr () in
279 ignore (next ());
280 let body = parse_expr () in
281 Let (name, rhs, body)
282 | _ -> failwith "unexpected token" in
283 parse_expr ()
284 ]}
285}
286{li
287 {b The Read-Eval-Print Loop}
288
289 Now we connect all the pieces. The REPL reads a line,
290 tokenizes it, parses the tokens, evaluates the expression,
291 and prints the result. A persistent environment accumulates
292 bindings across interactions.
293
294 {[
295type expr =
296 | Lit of int
297 | Add of expr * expr
298 | Let of string * expr * expr
299 | Var of string
300
301type value = Int of int
302type env = (string * value) list
303let empty_env : env = []
304let extend env name v = (name, v) :: env
305let lookup env name =
306 match List.assoc_opt name env with
307 | Some v -> v
308 | None -> failwith ("unbound: " ^ name)
309
310let rec eval env = function
311 | Lit n -> Int n
312 | Add (a, b) ->
313 let (Int x) = eval env a in
314 let (Int y) = eval env b in
315 Int (x + y)
316 | Let (name, rhs, body) ->
317 let v = eval env rhs in
318 eval (extend env name v) body
319 | Var name -> lookup env name
320
321type token =
322 | TNum of int | TIdent of string
323 | TPlus | TEqual
324 | TLParen | TRParen
325 | TLet | TIn
326
327let is_alpha c =
328 (c >= 'a' && c <= 'z')
329 || (c >= 'A' && c <= 'Z') || c = '_'
330let is_digit c = c >= '0' && c <= '9'
331
332let tokenize input =
333 let len = String.length input in
334 let pos = ref 0 in
335 let tokens = ref [] in
336 while !pos < len do
337 let c = input.[!pos] in
338 if c = ' ' || c = '\t' || c = '\n' then
339 incr pos
340 else if is_digit c then begin
341 let start = !pos in
342 while !pos < len && is_digit input.[!pos]
343 do incr pos done;
344 tokens := TNum (int_of_string
345 (String.sub input start
346 (!pos - start))) :: !tokens
347 end else if is_alpha c then begin
348 let start = !pos in
349 while !pos < len && is_alpha input.[!pos]
350 do incr pos done;
351 let s = String.sub input start
352 (!pos - start) in
353 tokens := (match s with
354 | "let" -> TLet | "in" -> TIn
355 | _ -> TIdent s) :: !tokens
356 end else begin
357 tokens := (match c with
358 | '+' -> TPlus | '=' -> TEqual
359 | '(' -> TLParen | ')' -> TRParen
360 | _ -> failwith "unexpected") :: !tokens;
361 incr pos
362 end
363 done; List.rev !tokens
364
365let parse tokens =
366 let toks = ref tokens in
367 let next () = match !toks with
368 | [] -> failwith "end"
369 | t :: r -> toks := r; t in
370 let peek () = match !toks with
371 | [] -> None | t :: _ -> Some t in
372 let rec expr () =
373 let l = atom () in add l
374 and add left = match peek () with
375 | Some TPlus ->
376 ignore (next ());
377 add (Add (left, atom ()))
378 | _ -> left
379 and atom () = match next () with
380 | TNum n -> Lit n
381 | TIdent s -> Var s
382 | TLParen ->
383 let e = expr () in
384 ignore (next ()); e
385 | TLet ->
386 let (TIdent name) = next () in
387 ignore (next ());
388 let rhs = expr () in
389 ignore (next ());
390 Let (name, rhs, expr ())
391 | _ -> failwith "unexpected" in
392 expr ()
393
394let print_value = function
395 | Int n -> Printf.printf "=> %d\n" n
396
397let repl () =
398 let env = ref empty_env in
399 try while true do
400 print_string "> ";
401 let line = input_line stdin in
402 let tokens = tokenize line in
403 let ast = parse tokens in
404 let result = eval !env ast in
405 print_value result
406 done with End_of_file ->
407 print_endline "Goodbye."
408
409let () = repl ()
410 ]}
411}
412}