My working unpac repository
at opam/upstream/seq 1060 lines 39 kB view raw
1(**************************************************************************) 2(* *) 3(* OCaml *) 4(* *) 5(* Frédéric Bour *) 6(* Gabriel Scherer, projet Partout, INRIA Saclay *) 7(* Basile Clément, projet Cambium, INRIA Paris *) 8(* *) 9(* Copyright 2020 Institut National de Recherche en Informatique et *) 10(* en Automatique. *) 11(* *) 12(* All rights reserved. This file is distributed under the terms of *) 13(* the GNU Lesser General Public License version 2.1, with the *) 14(* special exception on linking described in the file LICENSE. *) 15(* *) 16(**************************************************************************) 17 18open Lambda 19 20(* Error-reporting information for ambiguous TMC calls *) 21type tmc_call_information = { 22 loc: scoped_location; 23 explicit: bool; 24} 25type subterm_information = { 26 tmc_calls: tmc_call_information list; 27} 28type ambiguous_arguments = { 29 explicit: bool; 30 (** When [explicit = true], we have an ambiguity between 31 arguments containing calls that have been explicitly 32 marked [@tailcall]. Otherwise we have an ambiguity 33 between un-annotated calls. *) 34 arguments: subterm_information list; 35} 36 37type error = 38 | Ambiguous_constructor_arguments of ambiguous_arguments 39 40exception Error of Location.t * error 41 42 43type 'offset destination = { 44 var: Ident.t; 45 offset: 'offset; 46 loc : Debuginfo.Scoped_location.t; 47} 48and offset = Offset of lambda 49(** In the OCaml value model, interior pointers are not allowed. To 50 represent the "placeholder to mutate" in DPS code, we thus use a pair 51 of the block containing the placeholder, and the offset of the 52 placeholder within the block. 53 54 In the common case, this offset is an arbitrary lambda expression, typically 55 a constant integer or a variable. We define ['a destination] as parametrized 56 over the offset type to represent formal destination parameters (where 57 the offset is an Ident.t), and maybe in the future statically-known 58 offsets (where the offset is an integer). 59*) 60 61let offset_code (Offset t) = t 62 63let add_dst_params ({var; offset} : Ident.t destination) params = 64 (var, Pgenval) :: (offset, Pintval) :: params 65 66let add_dst_args ({var; offset} : offset destination) args = 67 Lvar var :: offset_code offset :: args 68 69let assign_to_dst {var; offset; loc} lam = 70 Lprim(Psetfield_computed(Pointer, Heap_initialization), 71 [Lvar var; offset_code offset; lam], loc) 72 73module Constr : sig 74 (** The type [Constr.t] represents a reified constructor with 75 a single hole, which can be either directly applied to a [lambda] 76 term, or be used to create a fresh [lambda destination] with 77 a placeholder. *) 78 type t = { 79 tag : int; 80 flag: Asttypes.mutable_flag; 81 shape : block_shape; 82 before: lambda list; 83 after: lambda list; 84 loc : Debuginfo.Scoped_location.t; 85 } 86 87 (** [apply constr e] plugs the expression [e] in the hole of the 88 constructor [const]. *) 89 val apply : t -> lambda -> lambda 90 91 (** [with_placeholder constr body] binds a placeholder 92 for the constructor [constr] within the scope of [body]. *) 93 val with_placeholder : t -> (offset destination -> lambda) -> lambda 94 95 (** We may want to delay the application of a constructor to a later 96 time. This may move the constructor application below some 97 effectful expressions (for example if we move into a context of 98 the form [foo; bar_with_tmc_inside]), and we want to preserve 99 the evaluation order of the other arguments of the 100 constructor. So we bind them before proceeding, unless they are 101 obviously side-effect free. 102 103 [delay_impure ~block_id constr body] binds all inpure arguments 104 of the constructor [constr] within the scope of [body], which is 105 passed a pure constructor. 106 107 [block_id] is a counter that is used as a suffix in the generated 108 variable names, for readability purposes. *) 109 val delay_impure : block_id:int -> t -> (t -> lambda) -> lambda 110end = struct 111 type t = { 112 tag : int; 113 flag: Asttypes.mutable_flag; 114 shape : block_shape; 115 before: lambda list; 116 after: lambda list; 117 loc : Debuginfo.Scoped_location.t; 118 } 119 120 let apply constr t = 121 let block_args = List.append constr.before @@ t :: constr.after in 122 Lprim (Pmakeblock (constr.tag, constr.flag, constr.shape), 123 block_args, constr.loc) 124 125 let tmc_placeholder = 126 (* we choose a placeholder whose tagged representation will be 127 reconizable. *) 128 Lambda.dummy_constant 129 130 let with_placeholder constr (body : offset destination -> lambda) = 131 let k_with_placeholder = 132 apply { constr with flag = Mutable } tmc_placeholder in 133 let placeholder_pos = List.length constr.before in 134 let placeholder_pos_lam = Lconst (Const_int placeholder_pos) in 135 let block_var = Ident.create_local "block" in 136 Llet (Strict, Pgenval, block_var, k_with_placeholder, 137 body { 138 var = block_var; 139 offset = Offset placeholder_pos_lam ; 140 loc = constr.loc; 141 }) 142 143 let delay_impure : block_id:int -> t -> (t -> lambda) -> lambda = 144 let bind_list ~block_id ~arg_offset lambdas k = 145 let can_be_delayed = 146 (* Note that the delayed subterms will be used 147 exactly once in the linear-static subterm. So 148 we are happy to delay constants, which we would 149 not want to duplicate. *) 150 function 151 | Lvar _ | Lconst _ -> true 152 | _ -> false in 153 let bindings, args = 154 lambdas 155 |> List.mapi (fun i lam -> 156 if can_be_delayed lam then (None, lam) 157 else begin 158 let v = Ident.create_local 159 (Printf.sprintf "block%d_arg%d" block_id (arg_offset + i)) in 160 (Some (v, lam), Lvar v) 161 end) 162 |> List.split in 163 let body = k args in 164 List.fold_right (fun binding body -> 165 match binding with 166 | None -> body 167 | Some (v, lam) -> Llet(Strict, Pgenval, v, lam, body) 168 ) bindings body in 169 fun ~block_id constr body -> 170 bind_list ~block_id ~arg_offset:0 constr.before @@ fun vbefore -> 171 let arg_offset = List.length constr.before + 1 in 172 bind_list ~block_id ~arg_offset constr.after @@ fun vafter -> 173 body { constr with before = vbefore; after = vafter } 174end 175 176(** The type ['a Dps.t] (destination-passing-style) represents a 177 version of ['a] that is parametrized over a [lambda destination]. 178 A [lambda Dps.t] is a code fragment in destination-passing-style, 179 a [(lambda * lambda) Dps.t] represents two subterms parametrized 180 over the same destination. *) 181module Dps : sig 182 type 'a dps = tail:bool -> dst:offset destination -> 'a 183 (** A term parameterized over a destination. The [tail] argument 184 is passed by the caller to indicate whether the term will be placed 185 in tail-position -- this allows to generate correct @tailcall 186 annotations. *) 187 188 type 'a t 189 190 val make : lambda dps -> lambda t 191 val run : lambda t -> lambda dps 192 val delay_constructor : Constr.t -> lambda t -> lambda t 193 194 val lambda : lambda -> lambda t 195 val map : ('a -> 'b) -> 'a t -> 'b t 196 val pair : 'a t -> 'b t -> ('a * 'b) t 197 val unit : unit t 198end = struct 199 type 'a dps = tail:bool -> dst:offset destination -> 'a 200 201 type 'a t = { 202 code : delayed:Constr.t list -> 'a dps; 203 delayed_use_count : int; 204 } 205 (** We want to optimize nested constructors, for example: 206 207 {[ 208 (x () :: y () :: tmc call) 209 ]} 210 211 which would naively generate (in a DPS context parametrized 212 over a location dst.i): 213 214 {[ 215 let dstx = x () :: Placeholder in 216 dst.i <- dstx; 217 let dsty = y () :: Placeholder in 218 dstx.1 <- dsty; 219 tmc dsty.1 call 220 ]} 221 222 when we would rather hope for 223 224 {[ 225 let vx = x () in 226 let dsty = y () :: Placeholder in 227 dst.i <- vx :: dsty; 228 tmc dsty.1 call 229 ]} 230 231 The idea is that the unoptimized version first creates a 232 destination site [dstx], which is then used by the following 233 code. If we keep track of the current destination: 234 235 {[ 236 (* Destination is [dst.i] *) 237 let dstx = x () :: Placeholder in 238 dst.i (* Destination *) <- dstx; 239 (* Destination is [dstx.1] *) 240 let dsty = y () :: Placeholder in 241 dstx.1 (* Destination *) <- dsty; 242 (* Destination is [dsty.1] *) 243 tmc dsty.1 call 244 ]} 245 246 Instead of binding the whole newly-created destination, we can 247 simply let-bind the non-placeholder arguments (in order to 248 preserve execution order), and keep track of a list of blocks to 249 be created along with the current destination. Instead of seeing 250 a DPS fragment as writing to a destination, we see it as a term 251 with shape [dst.i <- C .] where [C .] is a linear context consisting 252 only of constructor applications. 253 254 {[ 255 (* Destination is [dst.i <- C .] *) 256 let vx = x () in 257 (* Destination is [dst.i <- C (vx :: .)] *) 258 let vy = y () in 259 (* Destination is [dst.i <- C (vx :: vy :: .)] *) 260 (* Making a call: reify the destination *) 261 let dsty = vy :: Placeholder in 262 dst.i <- vx :: dsty; 263 tmc dsty.1 call 264 ]} 265 266 The [delayed] argument represents the context [C] as a list of 267 reified constructors, to allow both to build the final holey 268 block ([vy :: Placeholder]) at the recursive call site, and 269 the delayed constructor applications ([vx :: dsty]). 270 271 In practice, it is not desirable to perform this simplification 272 when there are multiple TMC calls (e.g. in different branches of 273 an [if] block), because it would cause duplication of the nested 274 constructor applications. The [delayed_use_count] field keeps track 275 of this information, it counts the number of syntactic use sites 276 of the delayed constructors, if any, in the generated code. 277 *) 278 279 let write_to_dst dst delayed t = 280 assign_to_dst dst @@ 281 List.fold_left (fun t constr -> Constr.apply constr t) t delayed 282 283 let lambda (v : lambda) : lambda t = { 284 code = (fun ~delayed ~tail:_ ~dst -> 285 write_to_dst dst delayed v 286 ); 287 delayed_use_count = 1; 288 } 289 (** Create a new destination-passing-style term which is simply 290 setting the destination with the given [v], hence "returning" 291 it. 292 *) 293 294 let unit : unit t = { 295 code = (fun ~delayed:_ ~tail:_ ~dst:_ -> 296 () 297 ); 298 delayed_use_count = 0; 299 } 300 301 let map (f : 'a -> 'b) (d : 'a t) : 'b t = { 302 code = (fun ~delayed ~tail ~dst -> 303 f @@ d.code ~delayed ~tail ~dst); 304 delayed_use_count = d.delayed_use_count; 305 } 306 307 let pair (da : 'a t) (db : 'b t) : ('a * 'b) t = { 308 code = (fun ~delayed ~tail ~dst -> 309 (da.code ~delayed ~tail ~dst, db.code ~delayed ~tail ~dst)); 310 delayed_use_count = 311 da.delayed_use_count + db.delayed_use_count; 312 } 313 314 let run (d : 'a t) : 'a dps = 315 fun ~tail ~dst -> 316 d.code ~tail ~dst ~delayed:[] 317 318 let reify_delay (dps : lambda dps) : lambda t = { 319 code = (fun ~delayed ~tail ~dst -> 320 match delayed with 321 | [] -> dps ~tail ~dst 322 | x :: xs -> 323 Constr.with_placeholder x @@ fun new_dst -> 324 Lsequence ( 325 write_to_dst dst xs (Lvar new_dst.var), 326 dps ~tail ~dst:new_dst) 327 ); 328 delayed_use_count = 1; 329 } 330 331 let ensures_affine (d : lambda t) : lambda t = 332 if d.delayed_use_count <= 1 then 333 d 334 else 335 reify_delay (run d) 336 (** Ensures that the resulting term does not duplicate delayed 337 constructors by reifying them now if needed. 338 *) 339 340 let make (dps : 'a dps) : 'a t = 341 reify_delay dps 342 343 let delay_constructor constr d = 344 let d = ensures_affine d in { 345 code = (fun ~delayed ~tail ~dst -> 346 let block_id = List.length delayed in 347 Constr.delay_impure ~block_id constr @@ fun constr -> 348 d.code ~tail ~dst ~delayed:(constr :: delayed)); 349 delayed_use_count = d.delayed_use_count; 350 } 351end 352 353(** The TMC transformation requires information flows in two opposite 354 directions: the information of which callsites can be rewritten in 355 destination-passing-style flows from the leaves of the code to the 356 root, and the information on whether we remain in tail-position 357 flows from the root to the leaves -- and also the knowledge of 358 which version of the function we currently want to generate, the 359 direct version or a destination-passing-style version. 360 361 To clarify this double flow of information, we split the TMC 362 transform in two steps: 363 364 1. A function [choice t] that takes a term and processes it from 365 leaves to root; it produces a "code choice", a piece of data of 366 type [lambda Choice.t], that contains information on how to transform the 367 input term [t] *parameterized* over the (still missing) contextual 368 information. 369 370 2. Code-production operators that have contextual information 371 to transform a "code choice" into the final code. 372 373 The code-production choices for a single term have type [lambda Choice.t]; 374 using a parametrized type ['a Choice.t] is useful to represent 375 simultaneous choices over several subterms; for example 376 [(lambda * lambda) Choice.t] makes a choice for a pair of terms, 377 for example the [then] and [else] cases of a conditional. With 378 this parameter, ['a Choice.t] has an applicative structure, which 379 is useful to write the actual code transformation in the {!choice} 380 function. 381*) 382module Choice = struct 383 type 'a t = { 384 dps : 'a Dps.t; 385 direct : unit -> 'a; 386 tmc_calls : tmc_call_information list; 387 benefits_from_dps: bool; 388 explicit_tailcall_request: bool; 389 } 390 (** 391 An ['a Choice.t] represents code that may be written 392 in destination-passing style if its usage context allows it. 393 More precisely: 394 395 - If the surrounding context is already in destination-passing 396 style, it has a destination available, we should produce the 397 code in [dps] -- a function parametrized over the destination. 398 399 - If the surrounding context is in direct style (no destination 400 is available), we should produce the fallback code from 401 [direct]. 402 403 (Note: [direct] is also a function (on [unit]) to ensure that any 404 effects performed during code production will only happen once we 405 do know that we want to produce the direct-style code.) 406 407 - [tmc_calls] tracks the function calls in the subterms that are 408 in tail-modulo-cons position and get rewritten into tailcalls 409 in the [dps] version. 410 411 - [benefits_from_dps] is true when the [dps] calls strictly more 412 TMC functions than the [direct] version. See the 413 {!choice_makeblock} case. 414 415 - [explicit_tailcall_request] is true when the user 416 used a [@tailcall] annotation on the optimizable callsite. 417 When one of several calls could be optimized, we expect that 418 exactly one of them will be annotated by the user, or fail 419 because the situation is ambiguous. 420 *) 421 422 let lambda (v : lambda) : lambda t = { 423 dps = Dps.lambda v; 424 direct = (fun () -> v); 425 tmc_calls = []; 426 benefits_from_dps = false; 427 explicit_tailcall_request = false; 428 } 429 430 let map f s = { 431 dps = Dps.map f s.dps; 432 direct = (fun () -> f (s.direct ())); 433 tmc_calls = s.tmc_calls; 434 benefits_from_dps = s.benefits_from_dps; 435 explicit_tailcall_request = s.explicit_tailcall_request; 436 } 437 (** Apply function [f] to the transformed term. *) 438 439 let direct (c : 'a t) : 'a = 440 c.direct () 441 442 let dps (c : lambda t) ~tail ~dst = 443 Dps.run c.dps ~tail ~dst 444 445 let pair ((c1, c2) : 'a t * 'b t) : ('a * 'b) t = { 446 dps = Dps.pair c1.dps c2.dps; 447 direct = (fun () -> (c1.direct (), c2.direct ())); 448 tmc_calls = 449 c1.tmc_calls @ c2.tmc_calls; 450 benefits_from_dps = 451 c1.benefits_from_dps || c2.benefits_from_dps; 452 explicit_tailcall_request = 453 c1.explicit_tailcall_request || c2.explicit_tailcall_request; 454 } 455 456 let unit = { 457 dps = Dps.unit; 458 direct = (fun () -> ()); 459 tmc_calls = []; 460 benefits_from_dps = false; 461 explicit_tailcall_request = false; 462 } 463 (* Remark: we could define [pure v] as [map (fun () -> v) unit], 464 but we prefer to have the code explicit about using [unit], 465 in particular as it ignores the destination argument. *) 466 467 module Syntax = struct 468 let (let+) a f = map f a 469 let (and+) a1 a2 = pair (a1, a2) 470 end 471 open Syntax 472 473 let option (c : 'a t option) : 'a option t = 474 match c with 475 | None -> let+ () = unit in None 476 | Some c -> let+ v = c in Some v 477 478 let rec list (c : 'a t list) : 'a list t = 479 match c with 480 | [] -> let+ () = unit in [] 481 | c :: cs -> 482 let+ v = c 483 and+ vs = list cs 484 in v :: vs 485 486 (** The [find_*] machinery is used to locate a single subterm to 487 optimize among a list of subterms. If there are several possible 488 choices, we require that exactly one of them be annotated with 489 [@tailcall], or we report an ambiguity. *) 490 type 'a tmc_call_search = 491 | No_tmc_call of 'a list 492 | Nonambiguous of 'a zipper 493 | Ambiguous of { explicit: bool; subterms: 'a t list; } 494 495 and 'a zipper = { 496 rev_before : 'a list; 497 choice : 'a t; 498 after: 'a list 499 } 500 501 let find_nonambiguous_tmc_call choices = 502 let has_tmc_calls c = c.tmc_calls <> [] in 503 let is_explicit s = s.explicit_tailcall_request in 504 let nonambiguous ~only_explicit_calls choices = 505 (* here is how we will compute the result once we know that there 506 is an unambiguously-determined tmc call, and whether 507 an explicit request was necessary to disambiguate *) 508 let rec split rev_before : 'a t list -> 'a zipper = function 509 | [] -> assert false (* we know there is at least one choice *) 510 | c :: rest -> 511 if has_tmc_calls c && (not only_explicit_calls || is_explicit c) then 512 { rev_before; choice = c; after = List.map direct rest } 513 else 514 split (direct c :: rev_before) rest 515 in split [] choices 516 in 517 let tmc_call_subterms = 518 List.filter (fun c -> has_tmc_calls c) choices 519 in 520 match tmc_call_subterms with 521 | [] -> 522 No_tmc_call (List.map direct choices) 523 | [ _one ] -> 524 Nonambiguous (nonambiguous ~only_explicit_calls:false choices) 525 | several_subterms -> 526 let explicit_subterms = List.filter is_explicit several_subterms in 527 begin match explicit_subterms with 528 | [] -> 529 Ambiguous { 530 explicit = false; 531 subterms = several_subterms; 532 } 533 | [ _one ] -> 534 Nonambiguous (nonambiguous ~only_explicit_calls:true choices) 535 | several_explicit_subterms -> 536 Ambiguous { 537 explicit = true; 538 subterms = several_explicit_subterms; 539 } 540 end 541end 542 543open Choice.Syntax 544 545type context = { 546 specialized: specialized Ident.Map.t; 547} 548and specialized = { 549 arity: int; 550 dps_id: Ident.t; 551 direct_kind: function_kind; 552} 553 554let llets lk vk bindings body = 555 List.fold_right (fun (var, def) body -> 556 Llet (lk, vk, var, def, body) 557 ) bindings body 558 559let find_candidate = function 560 | Lfunction lfun when lfun.attr.tmc_candidate -> Some lfun 561 | _ -> None 562 563let declare_binding ctx (var, def) = 564 match find_candidate def with 565 | None -> ctx 566 | Some lfun -> 567 let arity = List.length lfun.params in 568 let dps_id = Ident.create_local (Ident.name var ^ "_dps") in 569 let direct_kind = lfun.kind in 570 let cand = { arity; dps_id; direct_kind; } in 571 { specialized = Ident.Map.add var cand ctx.specialized } 572 573let rec choice ctx t = 574 let rec choice ctx ~tail t = 575 match t with 576 | (Lvar _ | Lmutvar _ | Lconst _ | Lfunction _ | Lsend _ 577 | Lassign _ | Lfor _ | Lwhile _) -> 578 let t = traverse ctx t in 579 Choice.lambda t 580 581 (* [choice_prim] handles most primitives, but the important case 582 of construction [Lprim(Pmakeblock(...), ...)] is handled by 583 [choice_makeblock] *) 584 | Lprim (prim, primargs, loc) -> 585 choice_prim ctx ~tail prim primargs loc 586 587 (* [choice_apply] handles applications, in particular tail-calls which 588 generate Set choices at the leaves *) 589 | Lapply apply -> 590 choice_apply ctx ~tail apply 591 (* other cases use the [lift] helper that takes the sub-terms in tail 592 position and the context around them, and generates a choice for 593 the whole term from choices for the tail subterms. *) 594 | Lsequence (l1, l2) -> 595 let l1 = traverse ctx l1 in 596 let+ l2 = choice ctx ~tail l2 in 597 Lsequence (l1, l2) 598 | Lifthenelse (l1, l2, l3) -> 599 let l1 = traverse ctx l1 in 600 let+ (l2, l3) = choice_pair ctx ~tail (l2, l3) in 601 Lifthenelse (l1, l2, l3) 602 | Lmutlet (vk, var, def, body) -> 603 (* mutable bindings are not TMC-specialized *) 604 let def = traverse ctx def in 605 let+ body = choice ctx ~tail body in 606 Lmutlet (vk, var, def, body) 607 | Llet (lk, vk, var, def, body) -> 608 let ctx, bindings = traverse_let ctx var def in 609 let+ body = choice ctx ~tail body in 610 llets lk vk bindings body 611 | Lletrec (bindings, body) -> 612 let ctx, bindings = traverse_letrec ctx bindings in 613 let+ body = choice ctx ~tail body in 614 Lletrec(bindings, body) 615 | Lswitch (l1, sw, loc) -> 616 (* decompose *) 617 let consts_lhs, consts_rhs = List.split sw.sw_consts in 618 let blocks_lhs, blocks_rhs = List.split sw.sw_blocks in 619 (* transform *) 620 let l1 = traverse ctx l1 in 621 let+ consts_rhs = choice_list ctx ~tail consts_rhs 622 and+ blocks_rhs = choice_list ctx ~tail blocks_rhs 623 and+ sw_failaction = choice_option ctx ~tail sw.sw_failaction in 624 (* rebuild *) 625 let sw_consts = List.combine consts_lhs consts_rhs in 626 let sw_blocks = List.combine blocks_lhs blocks_rhs in 627 let sw = { sw with sw_consts; sw_blocks; sw_failaction; } in 628 Lswitch (l1, sw, loc) 629 | Lstringswitch (l1, cases, fail, loc) -> 630 (* decompose *) 631 let cases_lhs, cases_rhs = List.split cases in 632 (* transform *) 633 let l1 = traverse ctx l1 in 634 let+ cases_rhs = choice_list ctx ~tail cases_rhs 635 and+ fail = choice_option ctx ~tail fail in 636 (* rebuild *) 637 let cases = List.combine cases_lhs cases_rhs in 638 Lstringswitch (l1, cases, fail, loc) 639 | Lstaticraise (id, ls) -> 640 let ls = traverse_list ctx ls in 641 Choice.lambda (Lstaticraise (id, ls)) 642 | Ltrywith (l1, id, l2) -> 643 (* in [try l1 with id -> l2], the term [l1] is 644 not in tail-call position (after it returns 645 we need to remove the exception handler) *) 646 let+ l1 = choice ctx ~tail:false l1 647 and+ l2 = choice ctx ~tail l2 in 648 Ltrywith (l1, id, l2) 649 | Lstaticcatch (l1, ids, l2) -> 650 (* In [static-catch l1 with ids -> l2], 651 the term [l1] is in fact in tail-position *) 652 let+ l1 = choice ctx ~tail l1 653 and+ l2 = choice ctx ~tail l2 in 654 Lstaticcatch (l1, ids, l2) 655 | Levent (lam, lev) -> 656 let+ lam = choice ctx ~tail lam in 657 Levent (lam, lev) 658 | Lifused (x, lam) -> 659 let+ lam = choice ctx ~tail lam in 660 Lifused (x, lam) 661 662 and choice_apply ctx ~tail apply = 663 let exception No_tmc in 664 try 665 let explicit_tailcall_request = 666 match apply.ap_tailcall with 667 | Default_tailcall -> false 668 | Tailcall_expectation true -> true 669 | Tailcall_expectation false -> raise No_tmc 670 in 671 match apply.ap_func with 672 | Lvar f -> 673 let specialized = 674 try Ident.Map.find f ctx.specialized 675 with Not_found -> 676 if tail then 677 Location.prerr_warning 678 (Debuginfo.Scoped_location.to_location apply.ap_loc) 679 Warnings.Tmc_breaks_tailcall; 680 raise No_tmc; 681 in 682 let args = 683 (* Support of tupled functions: the [function_kind] of the 684 direct-style function is identical to the one of the 685 input function, which may be Tupled, but the dps 686 function is always Curried. 687 688 [find_exact_application] is in charge of recovering the 689 "real" argument list of a possibly-tupled call. *) 690 let kind, arity = specialized.direct_kind, specialized.arity in 691 match Lambda.find_exact_application kind ~arity apply.ap_args with 692 | None -> raise No_tmc 693 | Some args -> args 694 in 695 let tailcall tail = 696 (* If we are calling a tmc-specializable function in tail 697 context, then both the direct-style and dps-style calls 698 must be tailcalls. *) 699 if tail 700 then Tailcall_expectation true 701 else Default_tailcall 702 in 703 { 704 Choice.dps = Dps.make (fun ~tail ~dst -> 705 Lapply { apply with 706 ap_func = Lvar specialized.dps_id; 707 ap_args = add_dst_args dst args; 708 ap_tailcall = tailcall tail; 709 }); 710 direct = (fun () -> 711 Lapply { apply with ap_tailcall = tailcall tail }); 712 explicit_tailcall_request; 713 tmc_calls = [{ 714 loc = apply.ap_loc; 715 explicit = explicit_tailcall_request; 716 }]; 717 benefits_from_dps = true; 718 } 719 | _nontail -> raise No_tmc 720 with No_tmc -> 721 let apply_no_bailout = 722 (* [@tailcall false] is interpreted as a bailout annotation: "we 723 are (knowingly) leaving the dps calling convention". It only 724 has sense in the DPS version of the generated code, not in 725 direct style. *) 726 let ap_tailcall = 727 match apply.ap_tailcall with 728 | Tailcall_expectation false when tail -> Default_tailcall 729 | other -> other 730 in 731 { apply with ap_tailcall } in 732 { (Choice.lambda (Lapply apply)) with 733 direct = (fun () -> Lapply apply_no_bailout); 734 } 735 736 and choice_makeblock ctx ~tail:_ (tag, flag, shape) blockargs loc = 737 let choices = List.map (choice ctx ~tail:false) blockargs in 738 match Choice.find_nonambiguous_tmc_call choices with 739 | Choice.No_tmc_call args -> 740 Choice.lambda @@ Lprim (Pmakeblock (tag, flag, shape), args, loc) 741 | Choice.Ambiguous { explicit; subterms = ambiguous_subterms } -> 742 (* An ambiguous term should not lead to an error if it not 743 used in TMC position. Consider for example: 744 745 {[ 746 type t = ... | K of t * (t * t) 747 let[@tail_mod_cons] rec map f = function 748 | [...] 749 | K (t, (u, v)) -> K ((map[@tailcall]) f t, (map f u, map f v)) 750 ]} 751 752 Calling [choice_makeblock] on the K constructor, we need to 753 determine whether its two arguments are ambiguous, which is 754 done by calling [choice] on each argument to see if they 755 would be TMC-able and if they are explicitly annotated. 756 757 These calls give the following results: 758 - there is an explicitly-requested tailcall in the first 759 argument 760 - the second argument is a nested pair whose arguments 761 themselves are ambiguous -- with no explicit annotation. 762 763 This determines that the arguments of K are not ambiguous, 764 as only one of them is annotated. But note that the nested 765 pair, in isolation, is ambiguous. This inner ambiguity is 766 innocuous and should not result in an error, as we never 767 use this inner pair in TMC position, only in direct style. 768 769 This example shows that it would be incorrect to fail with 770 an error whenever [choice] finds an ambiguity. Instead we 771 only error when generating the [dps] version of the 772 corresponding code; requesting the [direct] version is 773 accepted and produces the expected direct code. 774 *) 775 let term_choice = 776 let+ args = Choice.list choices in 777 Lprim (Pmakeblock(tag, flag, shape), args, loc) 778 in 779 { term_choice with 780 Choice.dps = Dps.make (fun ~tail:_ ~dst:_ -> 781 let arguments = 782 let info (t : lambda Choice.t) : subterm_information = { 783 tmc_calls = t.tmc_calls; 784 } in 785 { 786 explicit; 787 arguments = List.map info ambiguous_subterms; 788 } 789 in 790 raise (Error (Debuginfo.Scoped_location.to_location loc, 791 Ambiguous_constructor_arguments arguments)) 792 ); 793 } 794 | Choice.Nonambiguous { Choice.rev_before; choice; after } -> 795 let constr = Constr.{ 796 tag; 797 flag; 798 shape; 799 before = List.rev rev_before; 800 after; 801 loc; 802 } in 803 assert (choice.tmc_calls <> []); 804 { 805 Choice.direct = (fun () -> 806 if not choice.benefits_from_dps then 807 Constr.apply constr (Choice.direct choice) 808 else 809 Constr.with_placeholder constr @@ fun new_dst -> 810 Lsequence(Choice.dps choice ~tail:false ~dst:new_dst, 811 Lvar new_dst.var)); 812 benefits_from_dps = 813 (* Whether or not the caller provides a destination, 814 we can always provide a destination to our settable 815 subterm, so the number of TMC sub-calls is identical 816 in the [direct] and [dps] versions. *) 817 false; 818 dps = Dps.delay_constructor constr choice.dps; 819 tmc_calls = 820 choice.tmc_calls; 821 explicit_tailcall_request = 822 choice.explicit_tailcall_request; 823 } 824 825 and choice_prim ctx ~tail prim primargs loc = 826 match prim with 827 (* The important case is the construction case *) 828 | Pmakeblock (tag, flag, shape) -> 829 choice_makeblock ctx ~tail (tag, flag, shape) primargs loc 830 831 (* Some primitives have arguments in tail-position *) 832 | Popaque -> 833 let l1 = match primargs with 834 | [l1] -> l1 835 | _ -> invalid_arg "choice_prim" in 836 let+ l1 = choice ctx ~tail l1 in 837 Lprim (Popaque, [l1], loc) 838 839 (* in common cases we just return *) 840 | Pbytes_to_string | Pbytes_of_string 841 | Pgetglobal _ | Psetglobal _ 842 | Pfield _ | Pfield_computed 843 | Psetfield _ | Psetfield_computed _ 844 | Pfloatfield _ | Psetfloatfield _ 845 | Pccall _ 846 | Praise _ 847 | Pnot 848 | Pnegint | Paddint | Psubint | Pmulint | Pdivint _ | Pmodint _ 849 | Pandint | Porint | Pxorint 850 | Plslint | Plsrint | Pasrint 851 | Pintcomp _ 852 | Poffsetint _ | Poffsetref _ 853 | Pintoffloat | Pfloatofint 854 | Pnegfloat | Pabsfloat 855 | Paddfloat | Psubfloat | Pmulfloat | Pdivfloat 856 | Pfloatcomp _ 857 | Pstringlength | Pstringrefu | Pstringrefs 858 | Pbyteslength | Pbytesrefu | Pbytessetu | Pbytesrefs | Pbytessets 859 | Parraylength _ | Parrayrefu _ | Parraysetu _ | Parrayrefs _ | Parraysets _ 860 | Pisint | Pisout 861 | Pignore 862 | Pcompare_ints | Pcompare_floats | Pcompare_bints _ 863 864 (* we don't handle effect or DLS primitives *) 865 | Prunstack | Pperform | Presume | Preperform | Pdls_get 866 867 (* we don't handle atomic primitives *) 868 | Patomic_load 869 870 (* we don't handle array indices as destinations yet *) 871 | (Pmakearray _ | Pduparray _) 872 873 (* we don't handle { foo with x = ...; y = recursive-call } *) 874 | Pduprecord _ 875 876 (* operations returning boxed values could be considered 877 constructions someday *) 878 | Pbintofint _ | Pintofbint _ 879 | Pcvtbint _ 880 | Pnegbint _ 881 | Paddbint _ | Psubbint _ | Pmulbint _ | Pdivbint _ | Pmodbint _ 882 | Pandbint _ | Porbint _ | Pxorbint _ | Plslbint _ | Plsrbint _ | Pasrbint _ 883 | Pbintcomp _ 884 885 (* Lazy blocks should never contain a recursive call directly: 886 either it's a closure (Lazy_tag), or a variable (Forward_tag). 887 The case 'let foo = recursive_call in lazy foo' could be translated to 888 use tmc in the cases where 'foo' might be of type lazy or float, but 889 given the fragility of such a transformation we choose not to. *) 890 | Pmakelazyblock _ 891 892 (* more common cases... *) 893 | Pbigarrayref _ | Pbigarrayset _ 894 | Pbigarraydim _ 895 | Pstring_load_16 _ | Pstring_load_32 _ | Pstring_load_64 _ 896 | Pbytes_load_16 _ | Pbytes_load_32 _ | Pbytes_load_64 _ 897 | Pbytes_set_16 _ | Pbytes_set_32 _ | Pbytes_set_64 _ 898 | Pbigstring_load_16 _ | Pbigstring_load_32 _ | Pbigstring_load_64 _ 899 | Pbigstring_set_16 _ | Pbigstring_set_32 _ | Pbigstring_set_64 _ 900 | Pctconst _ 901 | Pbswap16 902 | Pbbswap _ 903 | Pint_as_pointer 904 | Psequand | Psequor 905 | Ppoll 906 -> 907 let primargs = traverse_list ctx primargs in 908 Choice.lambda (Lprim (prim, primargs, loc)) 909 910 and choice_list ctx ~tail terms = 911 Choice.list (List.map (choice ctx ~tail) terms) 912 and choice_pair ctx ~tail (t1, t2) = 913 Choice.pair (choice ctx ~tail t1, choice ctx ~tail t2) 914 and choice_option ctx ~tail t = 915 Choice.option (Option.map (choice ctx ~tail) t) 916 917 in choice ctx t 918 919and traverse ctx = function 920 | Llet (lk, vk, var, def, body) -> 921 let ctx, bindings = traverse_let ctx var def in 922 let body = traverse ctx body in 923 llets lk vk bindings body 924 | Lletrec (bindings, body) -> 925 let ctx, bindings = traverse_letrec ctx bindings in 926 Lletrec (bindings, traverse ctx body) 927 | lam -> 928 shallow_map (traverse ctx) lam 929 930and traverse_lfunction ctx lfun = 931 map_lfunction (traverse ctx) lfun 932 933and traverse_let outer_ctx var def = 934 let inner_ctx = declare_binding outer_ctx (var, def) in 935 let bindings = 936 traverse_let_binding outer_ctx inner_ctx var def 937 in 938 inner_ctx, bindings 939 940and traverse_letrec ctx bindings = 941 let ctx = 942 List.fold_left (fun ctx { id; def } -> 943 declare_binding ctx (id, Lfunction def) 944 ) ctx bindings 945 in 946 let bindings = 947 List.concat_map (traverse_letrec_binding ctx) bindings 948 in 949 ctx, bindings 950 951and traverse_let_binding outer_ctx inner_ctx var def = 952 match find_candidate def with 953 | None -> [ var, traverse outer_ctx def ] 954 | Some lfun -> 955 let functions = make_dps_variant var inner_ctx outer_ctx lfun in 956 List.map (fun (var, lfun) -> var, Lfunction lfun) functions 957 958and traverse_letrec_binding ctx { id; def } = 959 if def.attr.tmc_candidate 960 then 961 let functions = make_dps_variant id ctx ctx def in 962 List.map (fun (id, def) -> { id; def }) functions 963 else 964 [ { id; def = traverse_lfunction ctx def } ] 965 966and make_dps_variant var inner_ctx outer_ctx (lfun : lfunction) = 967 let special = Ident.Map.find var inner_ctx.specialized in 968 let fun_choice = choice outer_ctx ~tail:true lfun.body in 969 if fun_choice.Choice.tmc_calls = [] then 970 Location.prerr_warning 971 (Debuginfo.Scoped_location.to_location lfun.loc) 972 Warnings.Unused_tmc_attribute; 973 let direct = 974 let { kind; params; return; body = _; attr; loc } = lfun in 975 let body = Choice.direct fun_choice in 976 lfunction' ~kind ~params ~return ~body ~attr ~loc in 977 let dps = 978 let dst_param = { 979 var = Ident.create_local "dst"; 980 offset = Ident.create_local "offset"; 981 loc = lfun.loc; 982 } in 983 let dst = { dst_param with offset = Offset (Lvar dst_param.offset) } in 984 Lambda.duplicate_function @@ lfunction' 985 ~kind: 986 (* Support of Tupled function: see [choice_apply]. *) 987 Curried 988 ~params:(add_dst_params dst_param lfun.params) 989 ~return:lfun.return 990 ~body:(Choice.dps ~tail:true ~dst:dst fun_choice) 991 ~attr:lfun.attr 992 ~loc:lfun.loc 993 in 994 let dps_var = special.dps_id in 995 [var, direct; dps_var, dps] 996 997and traverse_list ctx terms = 998 List.map (traverse ctx) terms 999 1000let rewrite t = 1001 let ctx = { specialized = Ident.Map.empty } in 1002 traverse ctx t 1003 1004module Style = Misc.Style 1005 1006let () = 1007 Location.register_error_of_exn 1008 (function 1009 | Error (loc, 1010 Ambiguous_constructor_arguments 1011 { explicit = false; arguments }) -> 1012 let print_msg ppf = 1013 Format_doc.fprintf ppf 1014 "%a:@ this@ constructor@ application@ may@ be@ \ 1015 TMC-transformed@ in@ several@ different@ ways.@ \ 1016 Please@ disambiguate@ by@ adding@ an@ explicit@ %a \ 1017 attribute@ to@ the@ call@ that@ should@ be@ made@ \ 1018 tail-recursive,@ or@ a@ %a attribute@ on@ \ 1019 calls@ that@ should@ not@ be@ transformed." 1020 Style.inline_code "[@tail_mod_cons]" 1021 Style.inline_code "[@tailcall]" 1022 Style.inline_code "[@tailcall false]" 1023 in 1024 let submgs = 1025 let sub (info : tmc_call_information) = 1026 let loc = Debuginfo.Scoped_location.to_location info.loc in 1027 Location.msg ~loc "This call could be annotated." in 1028 arguments 1029 |> List.map (fun t -> t.tmc_calls) 1030 |> List.flatten 1031 |> List.map sub 1032 in 1033 Some (Location.errorf ~loc ~sub:submgs "%t" print_msg) 1034 | Error (loc, 1035 Ambiguous_constructor_arguments 1036 { explicit = true; arguments }) -> 1037 let print_msg ppf = 1038 Format_doc.fprintf ppf 1039 "%a:@ this@ constructor@ application@ may@ be@ \ 1040 TMC-transformed@ in@ several@ different@ ways.@ Only@ one@ of@ \ 1041 the@ arguments@ may@ become@ a@ TMC@ call,@ but@ several@ \ 1042 arguments@ contain@ calls@ that@ are@ explicitly@ marked@ as@ \ 1043 tail-recursive.@ Please@ fix@ the@ conflict@ by@ reviewing@ \ 1044 and@ fixing@ the@ conflicting@ annotations." 1045 Style.inline_code "[@tail_mod_cons]" 1046 in 1047 let submgs = 1048 let sub (info : tmc_call_information) = 1049 let loc = Debuginfo.Scoped_location.to_location info.loc in 1050 Location.msg ~loc "This call is explicitly annotated." in 1051 arguments 1052 |> List.map (fun t -> t.tmc_calls) 1053 |> List.flatten 1054 |> List.filter (fun (info: tmc_call_information) -> info.explicit) 1055 |> List.map sub 1056 in 1057 Some (Location.errorf ~loc ~sub:submgs "%t" print_msg) 1058 | _ -> 1059 None 1060 )