A fork of mtelver's day10 project
at main2 412 lines 10 kB view raw
1{0 Building a REPL} 2 3@scrolly Building a REPL in OCaml 4{ol 5{li 6 {b The Expression Type} 7 8 A REPL evaluates expressions. We start with a tiny language: 9 integer literals, addition, let-bindings, and variables. 10 Four constructors is all we need. 11 12 {[ 13type expr = 14 | Lit of int 15 | Add of expr * expr 16 | Let of string * expr * expr 17 | Var of string 18 ]} 19} 20{li 21 {b Values and Environments} 22 23 Evaluation produces values. For now, just integers. An 24 environment maps variable names to their values using a 25 simple association list. 26 27 {[ 28type expr = 29 | Lit of int 30 | Add of expr * expr 31 | Let of string * expr * expr 32 | Var of string 33 34type value = Int of int 35 36type env = (string * value) list 37 38let empty_env : env = [] 39 40let extend env name v = (name, v) :: env 41 42let lookup env name = 43 match List.assoc_opt name env with 44 | Some v -> v 45 | None -> failwith ("unbound: " ^ name) 46 ]} 47} 48{li 49 {b The Evaluator} 50 51 Pattern matching makes the evaluator beautifully direct. 52 Each expression form maps to a straightforward computation. 53 Let-bindings extend the environment for the body expression. 54 55 {[ 56type expr = 57 | Lit of int 58 | Add of expr * expr 59 | Let of string * expr * expr 60 | Var of string 61 62type value = Int of int 63 64type env = (string * value) list 65 66let empty_env : env = [] 67 68let extend env name v = (name, v) :: env 69 70let lookup env name = 71 match List.assoc_opt name env with 72 | Some v -> v 73 | None -> failwith ("unbound: " ^ name) 74 75let rec eval env = function 76 | Lit n -> Int n 77 | Add (a, b) -> 78 let (Int x) = eval env a in 79 let (Int y) = eval env b in 80 Int (x + y) 81 | Let (name, rhs, body) -> 82 let v = eval env rhs in 83 eval (extend env name v) body 84 | Var name -> lookup env name 85 ]} 86} 87{li 88 {b A Tiny Tokenizer} 89 90 To read user input, we need a tokenizer. It splits a string 91 into meaningful chunks: numbers, identifiers, operators, and 92 parentheses. Whitespace is consumed but not produced. 93 94 {[ 95type expr = 96 | Lit of int 97 | Add of expr * expr 98 | Let of string * expr * expr 99 | Var of string 100 101type value = Int of int 102type env = (string * value) list 103let empty_env : env = [] 104let extend env name v = (name, v) :: env 105let lookup env name = 106 match List.assoc_opt name env with 107 | Some v -> v 108 | None -> failwith ("unbound: " ^ name) 109 110let rec eval env = function 111 | Lit n -> Int n 112 | Add (a, b) -> 113 let (Int x) = eval env a in 114 let (Int y) = eval env b in 115 Int (x + y) 116 | Let (name, rhs, body) -> 117 let v = eval env rhs in 118 eval (extend env name v) body 119 | Var name -> lookup env name 120 121type token = 122 | TNum of int 123 | TIdent of string 124 | TPlus | TEqual 125 | TLParen | TRParen 126 | TLet | TIn 127 128let is_alpha c = 129 (c >= 'a' && c <= 'z') 130 || (c >= 'A' && c <= 'Z') 131 || c = '_' 132 133let is_digit c = c >= '0' && c <= '9' 134 135let tokenize input = 136 let len = String.length input in 137 let pos = ref 0 in 138 let tokens = ref [] in 139 while !pos < len do 140 let c = input.[!pos] in 141 if c = ' ' || c = '\t' || c = '\n' then 142 incr pos 143 else if is_digit c then begin 144 let start = !pos in 145 while !pos < len && is_digit input.[!pos] do 146 incr pos done; 147 let s = String.sub input start (!pos - start) in 148 tokens := TNum (int_of_string s) :: !tokens 149 end else if is_alpha c then begin 150 let start = !pos in 151 while !pos < len && is_alpha input.[!pos] do 152 incr pos done; 153 let s = String.sub input start (!pos - start) in 154 let tok = match s with 155 | "let" -> TLet | "in" -> TIn 156 | _ -> TIdent s in 157 tokens := tok :: !tokens 158 end else begin 159 let tok = match c with 160 | '+' -> TPlus | '=' -> TEqual 161 | '(' -> TLParen | ')' -> TRParen 162 | _ -> failwith "unexpected char" in 163 tokens := tok :: !tokens; 164 incr pos 165 end 166 done; 167 List.rev !tokens 168 ]} 169} 170{li 171 {b The Parser} 172 173 A recursive descent parser turns tokens into our expression AST. 174 It handles operator precedence naturally: addition is parsed as 175 a left-associative chain of atoms. 176 177 {[ 178type expr = 179 | Lit of int 180 | Add of expr * expr 181 | Let of string * expr * expr 182 | Var of string 183 184type value = Int of int 185type env = (string * value) list 186let empty_env : env = [] 187let extend env name v = (name, v) :: env 188let lookup env name = 189 match List.assoc_opt name env with 190 | Some v -> v 191 | None -> failwith ("unbound: " ^ name) 192 193let rec eval env = function 194 | Lit n -> Int n 195 | Add (a, b) -> 196 let (Int x) = eval env a in 197 let (Int y) = eval env b in 198 Int (x + y) 199 | Let (name, rhs, body) -> 200 let v = eval env rhs in 201 eval (extend env name v) body 202 | Var name -> lookup env name 203 204type token = 205 | TNum of int | TIdent of string 206 | TPlus | TEqual 207 | TLParen | TRParen 208 | TLet | TIn 209 210let is_alpha c = 211 (c >= 'a' && c <= 'z') 212 || (c >= 'A' && c <= 'Z') || c = '_' 213let is_digit c = c >= '0' && c <= '9' 214 215let tokenize input = 216 let len = String.length input in 217 let pos = ref 0 in 218 let tokens = ref [] in 219 while !pos < len do 220 let c = input.[!pos] in 221 if c = ' ' || c = '\t' || c = '\n' then 222 incr pos 223 else if is_digit c then begin 224 let start = !pos in 225 while !pos < len && is_digit input.[!pos] 226 do incr pos done; 227 let s = String.sub input start 228 (!pos - start) in 229 tokens := TNum (int_of_string s) :: !tokens 230 end else if is_alpha c then begin 231 let start = !pos in 232 while !pos < len && is_alpha input.[!pos] 233 do incr pos done; 234 let s = String.sub input start 235 (!pos - start) in 236 let tok = match s with 237 | "let" -> TLet | "in" -> TIn 238 | _ -> TIdent s in 239 tokens := tok :: !tokens 240 end else begin 241 let tok = match c with 242 | '+' -> TPlus | '=' -> TEqual 243 | '(' -> TLParen | ')' -> TRParen 244 | _ -> failwith "unexpected char" in 245 tokens := tok :: !tokens; incr pos 246 end 247 done; 248 List.rev !tokens 249 250let parse tokens = 251 let toks = ref tokens in 252 let next () = 253 match !toks with 254 | [] -> failwith "unexpected end" 255 | t :: rest -> toks := rest; t in 256 let peek () = 257 match !toks with [] -> None | t :: _ -> Some t in 258 let rec parse_expr () = 259 let left = parse_atom () in 260 parse_add left 261 and parse_add left = 262 match peek () with 263 | Some TPlus -> 264 ignore (next ()); 265 let right = parse_atom () in 266 parse_add (Add (left, right)) 267 | _ -> left 268 and parse_atom () = 269 match next () with 270 | TNum n -> Lit n 271 | TIdent s -> Var s 272 | TLParen -> 273 let e = parse_expr () in 274 ignore (next ()); e 275 | TLet -> 276 let (TIdent name) = next () in 277 ignore (next ()); 278 let rhs = parse_expr () in 279 ignore (next ()); 280 let body = parse_expr () in 281 Let (name, rhs, body) 282 | _ -> failwith "unexpected token" in 283 parse_expr () 284 ]} 285} 286{li 287 {b The Read-Eval-Print Loop} 288 289 Now we connect all the pieces. The REPL reads a line, 290 tokenizes it, parses the tokens, evaluates the expression, 291 and prints the result. A persistent environment accumulates 292 bindings across interactions. 293 294 {[ 295type expr = 296 | Lit of int 297 | Add of expr * expr 298 | Let of string * expr * expr 299 | Var of string 300 301type value = Int of int 302type env = (string * value) list 303let empty_env : env = [] 304let extend env name v = (name, v) :: env 305let lookup env name = 306 match List.assoc_opt name env with 307 | Some v -> v 308 | None -> failwith ("unbound: " ^ name) 309 310let rec eval env = function 311 | Lit n -> Int n 312 | Add (a, b) -> 313 let (Int x) = eval env a in 314 let (Int y) = eval env b in 315 Int (x + y) 316 | Let (name, rhs, body) -> 317 let v = eval env rhs in 318 eval (extend env name v) body 319 | Var name -> lookup env name 320 321type token = 322 | TNum of int | TIdent of string 323 | TPlus | TEqual 324 | TLParen | TRParen 325 | TLet | TIn 326 327let is_alpha c = 328 (c >= 'a' && c <= 'z') 329 || (c >= 'A' && c <= 'Z') || c = '_' 330let is_digit c = c >= '0' && c <= '9' 331 332let tokenize input = 333 let len = String.length input in 334 let pos = ref 0 in 335 let tokens = ref [] in 336 while !pos < len do 337 let c = input.[!pos] in 338 if c = ' ' || c = '\t' || c = '\n' then 339 incr pos 340 else if is_digit c then begin 341 let start = !pos in 342 while !pos < len && is_digit input.[!pos] 343 do incr pos done; 344 tokens := TNum (int_of_string 345 (String.sub input start 346 (!pos - start))) :: !tokens 347 end else if is_alpha c then begin 348 let start = !pos in 349 while !pos < len && is_alpha input.[!pos] 350 do incr pos done; 351 let s = String.sub input start 352 (!pos - start) in 353 tokens := (match s with 354 | "let" -> TLet | "in" -> TIn 355 | _ -> TIdent s) :: !tokens 356 end else begin 357 tokens := (match c with 358 | '+' -> TPlus | '=' -> TEqual 359 | '(' -> TLParen | ')' -> TRParen 360 | _ -> failwith "unexpected") :: !tokens; 361 incr pos 362 end 363 done; List.rev !tokens 364 365let parse tokens = 366 let toks = ref tokens in 367 let next () = match !toks with 368 | [] -> failwith "end" 369 | t :: r -> toks := r; t in 370 let peek () = match !toks with 371 | [] -> None | t :: _ -> Some t in 372 let rec expr () = 373 let l = atom () in add l 374 and add left = match peek () with 375 | Some TPlus -> 376 ignore (next ()); 377 add (Add (left, atom ())) 378 | _ -> left 379 and atom () = match next () with 380 | TNum n -> Lit n 381 | TIdent s -> Var s 382 | TLParen -> 383 let e = expr () in 384 ignore (next ()); e 385 | TLet -> 386 let (TIdent name) = next () in 387 ignore (next ()); 388 let rhs = expr () in 389 ignore (next ()); 390 Let (name, rhs, expr ()) 391 | _ -> failwith "unexpected" in 392 expr () 393 394let print_value = function 395 | Int n -> Printf.printf "=> %d\n" n 396 397let repl () = 398 let env = ref empty_env in 399 try while true do 400 print_string "> "; 401 let line = input_line stdin in 402 let tokens = tokenize line in 403 let ast = parse tokens in 404 let result = eval !env ast in 405 print_value result 406 done with End_of_file -> 407 print_endline "Goodbye." 408 409let () = repl () 410 ]} 411} 412}