My working unpac repository
1(**************************************************************************)
2(* *)
3(* OCaml *)
4(* *)
5(* Frédéric Bour *)
6(* Gabriel Scherer, projet Partout, INRIA Saclay *)
7(* Basile Clément, projet Cambium, INRIA Paris *)
8(* *)
9(* Copyright 2020 Institut National de Recherche en Informatique et *)
10(* en Automatique. *)
11(* *)
12(* All rights reserved. This file is distributed under the terms of *)
13(* the GNU Lesser General Public License version 2.1, with the *)
14(* special exception on linking described in the file LICENSE. *)
15(* *)
16(**************************************************************************)
17
18open Lambda
19
20(* Error-reporting information for ambiguous TMC calls *)
21type tmc_call_information = {
22 loc: scoped_location;
23 explicit: bool;
24}
25type subterm_information = {
26 tmc_calls: tmc_call_information list;
27}
28type ambiguous_arguments = {
29 explicit: bool;
30 (** When [explicit = true], we have an ambiguity between
31 arguments containing calls that have been explicitly
32 marked [@tailcall]. Otherwise we have an ambiguity
33 between un-annotated calls. *)
34 arguments: subterm_information list;
35}
36
37type error =
38 | Ambiguous_constructor_arguments of ambiguous_arguments
39
40exception Error of Location.t * error
41
42
43type 'offset destination = {
44 var: Ident.t;
45 offset: 'offset;
46 loc : Debuginfo.Scoped_location.t;
47}
48and offset = Offset of lambda
49(** In the OCaml value model, interior pointers are not allowed. To
50 represent the "placeholder to mutate" in DPS code, we thus use a pair
51 of the block containing the placeholder, and the offset of the
52 placeholder within the block.
53
54 In the common case, this offset is an arbitrary lambda expression, typically
55 a constant integer or a variable. We define ['a destination] as parametrized
56 over the offset type to represent formal destination parameters (where
57 the offset is an Ident.t), and maybe in the future statically-known
58 offsets (where the offset is an integer).
59*)
60
61let offset_code (Offset t) = t
62
63let add_dst_params ({var; offset} : Ident.t destination) params =
64 (var, Pgenval) :: (offset, Pintval) :: params
65
66let add_dst_args ({var; offset} : offset destination) args =
67 Lvar var :: offset_code offset :: args
68
69let assign_to_dst {var; offset; loc} lam =
70 Lprim(Psetfield_computed(Pointer, Heap_initialization),
71 [Lvar var; offset_code offset; lam], loc)
72
73module Constr : sig
74 (** The type [Constr.t] represents a reified constructor with
75 a single hole, which can be either directly applied to a [lambda]
76 term, or be used to create a fresh [lambda destination] with
77 a placeholder. *)
78 type t = {
79 tag : int;
80 flag: Asttypes.mutable_flag;
81 shape : block_shape;
82 before: lambda list;
83 after: lambda list;
84 loc : Debuginfo.Scoped_location.t;
85 }
86
87 (** [apply constr e] plugs the expression [e] in the hole of the
88 constructor [const]. *)
89 val apply : t -> lambda -> lambda
90
91 (** [with_placeholder constr body] binds a placeholder
92 for the constructor [constr] within the scope of [body]. *)
93 val with_placeholder : t -> (offset destination -> lambda) -> lambda
94
95 (** We may want to delay the application of a constructor to a later
96 time. This may move the constructor application below some
97 effectful expressions (for example if we move into a context of
98 the form [foo; bar_with_tmc_inside]), and we want to preserve
99 the evaluation order of the other arguments of the
100 constructor. So we bind them before proceeding, unless they are
101 obviously side-effect free.
102
103 [delay_impure ~block_id constr body] binds all inpure arguments
104 of the constructor [constr] within the scope of [body], which is
105 passed a pure constructor.
106
107 [block_id] is a counter that is used as a suffix in the generated
108 variable names, for readability purposes. *)
109 val delay_impure : block_id:int -> t -> (t -> lambda) -> lambda
110end = struct
111 type t = {
112 tag : int;
113 flag: Asttypes.mutable_flag;
114 shape : block_shape;
115 before: lambda list;
116 after: lambda list;
117 loc : Debuginfo.Scoped_location.t;
118 }
119
120 let apply constr t =
121 let block_args = List.append constr.before @@ t :: constr.after in
122 Lprim (Pmakeblock (constr.tag, constr.flag, constr.shape),
123 block_args, constr.loc)
124
125 let tmc_placeholder =
126 (* we choose a placeholder whose tagged representation will be
127 reconizable. *)
128 Lambda.dummy_constant
129
130 let with_placeholder constr (body : offset destination -> lambda) =
131 let k_with_placeholder =
132 apply { constr with flag = Mutable } tmc_placeholder in
133 let placeholder_pos = List.length constr.before in
134 let placeholder_pos_lam = Lconst (Const_int placeholder_pos) in
135 let block_var = Ident.create_local "block" in
136 Llet (Strict, Pgenval, block_var, k_with_placeholder,
137 body {
138 var = block_var;
139 offset = Offset placeholder_pos_lam ;
140 loc = constr.loc;
141 })
142
143 let delay_impure : block_id:int -> t -> (t -> lambda) -> lambda =
144 let bind_list ~block_id ~arg_offset lambdas k =
145 let can_be_delayed =
146 (* Note that the delayed subterms will be used
147 exactly once in the linear-static subterm. So
148 we are happy to delay constants, which we would
149 not want to duplicate. *)
150 function
151 | Lvar _ | Lconst _ -> true
152 | _ -> false in
153 let bindings, args =
154 lambdas
155 |> List.mapi (fun i lam ->
156 if can_be_delayed lam then (None, lam)
157 else begin
158 let v = Ident.create_local
159 (Printf.sprintf "block%d_arg%d" block_id (arg_offset + i)) in
160 (Some (v, lam), Lvar v)
161 end)
162 |> List.split in
163 let body = k args in
164 List.fold_right (fun binding body ->
165 match binding with
166 | None -> body
167 | Some (v, lam) -> Llet(Strict, Pgenval, v, lam, body)
168 ) bindings body in
169 fun ~block_id constr body ->
170 bind_list ~block_id ~arg_offset:0 constr.before @@ fun vbefore ->
171 let arg_offset = List.length constr.before + 1 in
172 bind_list ~block_id ~arg_offset constr.after @@ fun vafter ->
173 body { constr with before = vbefore; after = vafter }
174end
175
176(** The type ['a Dps.t] (destination-passing-style) represents a
177 version of ['a] that is parametrized over a [lambda destination].
178 A [lambda Dps.t] is a code fragment in destination-passing-style,
179 a [(lambda * lambda) Dps.t] represents two subterms parametrized
180 over the same destination. *)
181module Dps : sig
182 type 'a dps = tail:bool -> dst:offset destination -> 'a
183 (** A term parameterized over a destination. The [tail] argument
184 is passed by the caller to indicate whether the term will be placed
185 in tail-position -- this allows to generate correct @tailcall
186 annotations. *)
187
188 type 'a t
189
190 val make : lambda dps -> lambda t
191 val run : lambda t -> lambda dps
192 val delay_constructor : Constr.t -> lambda t -> lambda t
193
194 val lambda : lambda -> lambda t
195 val map : ('a -> 'b) -> 'a t -> 'b t
196 val pair : 'a t -> 'b t -> ('a * 'b) t
197 val unit : unit t
198end = struct
199 type 'a dps = tail:bool -> dst:offset destination -> 'a
200
201 type 'a t = {
202 code : delayed:Constr.t list -> 'a dps;
203 delayed_use_count : int;
204 }
205 (** We want to optimize nested constructors, for example:
206
207 {[
208 (x () :: y () :: tmc call)
209 ]}
210
211 which would naively generate (in a DPS context parametrized
212 over a location dst.i):
213
214 {[
215 let dstx = x () :: Placeholder in
216 dst.i <- dstx;
217 let dsty = y () :: Placeholder in
218 dstx.1 <- dsty;
219 tmc dsty.1 call
220 ]}
221
222 when we would rather hope for
223
224 {[
225 let vx = x () in
226 let dsty = y () :: Placeholder in
227 dst.i <- vx :: dsty;
228 tmc dsty.1 call
229 ]}
230
231 The idea is that the unoptimized version first creates a
232 destination site [dstx], which is then used by the following
233 code. If we keep track of the current destination:
234
235 {[
236 (* Destination is [dst.i] *)
237 let dstx = x () :: Placeholder in
238 dst.i (* Destination *) <- dstx;
239 (* Destination is [dstx.1] *)
240 let dsty = y () :: Placeholder in
241 dstx.1 (* Destination *) <- dsty;
242 (* Destination is [dsty.1] *)
243 tmc dsty.1 call
244 ]}
245
246 Instead of binding the whole newly-created destination, we can
247 simply let-bind the non-placeholder arguments (in order to
248 preserve execution order), and keep track of a list of blocks to
249 be created along with the current destination. Instead of seeing
250 a DPS fragment as writing to a destination, we see it as a term
251 with shape [dst.i <- C .] where [C .] is a linear context consisting
252 only of constructor applications.
253
254 {[
255 (* Destination is [dst.i <- C .] *)
256 let vx = x () in
257 (* Destination is [dst.i <- C (vx :: .)] *)
258 let vy = y () in
259 (* Destination is [dst.i <- C (vx :: vy :: .)] *)
260 (* Making a call: reify the destination *)
261 let dsty = vy :: Placeholder in
262 dst.i <- vx :: dsty;
263 tmc dsty.1 call
264 ]}
265
266 The [delayed] argument represents the context [C] as a list of
267 reified constructors, to allow both to build the final holey
268 block ([vy :: Placeholder]) at the recursive call site, and
269 the delayed constructor applications ([vx :: dsty]).
270
271 In practice, it is not desirable to perform this simplification
272 when there are multiple TMC calls (e.g. in different branches of
273 an [if] block), because it would cause duplication of the nested
274 constructor applications. The [delayed_use_count] field keeps track
275 of this information, it counts the number of syntactic use sites
276 of the delayed constructors, if any, in the generated code.
277 *)
278
279 let write_to_dst dst delayed t =
280 assign_to_dst dst @@
281 List.fold_left (fun t constr -> Constr.apply constr t) t delayed
282
283 let lambda (v : lambda) : lambda t = {
284 code = (fun ~delayed ~tail:_ ~dst ->
285 write_to_dst dst delayed v
286 );
287 delayed_use_count = 1;
288 }
289 (** Create a new destination-passing-style term which is simply
290 setting the destination with the given [v], hence "returning"
291 it.
292 *)
293
294 let unit : unit t = {
295 code = (fun ~delayed:_ ~tail:_ ~dst:_ ->
296 ()
297 );
298 delayed_use_count = 0;
299 }
300
301 let map (f : 'a -> 'b) (d : 'a t) : 'b t = {
302 code = (fun ~delayed ~tail ~dst ->
303 f @@ d.code ~delayed ~tail ~dst);
304 delayed_use_count = d.delayed_use_count;
305 }
306
307 let pair (da : 'a t) (db : 'b t) : ('a * 'b) t = {
308 code = (fun ~delayed ~tail ~dst ->
309 (da.code ~delayed ~tail ~dst, db.code ~delayed ~tail ~dst));
310 delayed_use_count =
311 da.delayed_use_count + db.delayed_use_count;
312 }
313
314 let run (d : 'a t) : 'a dps =
315 fun ~tail ~dst ->
316 d.code ~tail ~dst ~delayed:[]
317
318 let reify_delay (dps : lambda dps) : lambda t = {
319 code = (fun ~delayed ~tail ~dst ->
320 match delayed with
321 | [] -> dps ~tail ~dst
322 | x :: xs ->
323 Constr.with_placeholder x @@ fun new_dst ->
324 Lsequence (
325 write_to_dst dst xs (Lvar new_dst.var),
326 dps ~tail ~dst:new_dst)
327 );
328 delayed_use_count = 1;
329 }
330
331 let ensures_affine (d : lambda t) : lambda t =
332 if d.delayed_use_count <= 1 then
333 d
334 else
335 reify_delay (run d)
336 (** Ensures that the resulting term does not duplicate delayed
337 constructors by reifying them now if needed.
338 *)
339
340 let make (dps : 'a dps) : 'a t =
341 reify_delay dps
342
343 let delay_constructor constr d =
344 let d = ensures_affine d in {
345 code = (fun ~delayed ~tail ~dst ->
346 let block_id = List.length delayed in
347 Constr.delay_impure ~block_id constr @@ fun constr ->
348 d.code ~tail ~dst ~delayed:(constr :: delayed));
349 delayed_use_count = d.delayed_use_count;
350 }
351end
352
353(** The TMC transformation requires information flows in two opposite
354 directions: the information of which callsites can be rewritten in
355 destination-passing-style flows from the leaves of the code to the
356 root, and the information on whether we remain in tail-position
357 flows from the root to the leaves -- and also the knowledge of
358 which version of the function we currently want to generate, the
359 direct version or a destination-passing-style version.
360
361 To clarify this double flow of information, we split the TMC
362 transform in two steps:
363
364 1. A function [choice t] that takes a term and processes it from
365 leaves to root; it produces a "code choice", a piece of data of
366 type [lambda Choice.t], that contains information on how to transform the
367 input term [t] *parameterized* over the (still missing) contextual
368 information.
369
370 2. Code-production operators that have contextual information
371 to transform a "code choice" into the final code.
372
373 The code-production choices for a single term have type [lambda Choice.t];
374 using a parametrized type ['a Choice.t] is useful to represent
375 simultaneous choices over several subterms; for example
376 [(lambda * lambda) Choice.t] makes a choice for a pair of terms,
377 for example the [then] and [else] cases of a conditional. With
378 this parameter, ['a Choice.t] has an applicative structure, which
379 is useful to write the actual code transformation in the {!choice}
380 function.
381*)
382module Choice = struct
383 type 'a t = {
384 dps : 'a Dps.t;
385 direct : unit -> 'a;
386 tmc_calls : tmc_call_information list;
387 benefits_from_dps: bool;
388 explicit_tailcall_request: bool;
389 }
390 (**
391 An ['a Choice.t] represents code that may be written
392 in destination-passing style if its usage context allows it.
393 More precisely:
394
395 - If the surrounding context is already in destination-passing
396 style, it has a destination available, we should produce the
397 code in [dps] -- a function parametrized over the destination.
398
399 - If the surrounding context is in direct style (no destination
400 is available), we should produce the fallback code from
401 [direct].
402
403 (Note: [direct] is also a function (on [unit]) to ensure that any
404 effects performed during code production will only happen once we
405 do know that we want to produce the direct-style code.)
406
407 - [tmc_calls] tracks the function calls in the subterms that are
408 in tail-modulo-cons position and get rewritten into tailcalls
409 in the [dps] version.
410
411 - [benefits_from_dps] is true when the [dps] calls strictly more
412 TMC functions than the [direct] version. See the
413 {!choice_makeblock} case.
414
415 - [explicit_tailcall_request] is true when the user
416 used a [@tailcall] annotation on the optimizable callsite.
417 When one of several calls could be optimized, we expect that
418 exactly one of them will be annotated by the user, or fail
419 because the situation is ambiguous.
420 *)
421
422 let lambda (v : lambda) : lambda t = {
423 dps = Dps.lambda v;
424 direct = (fun () -> v);
425 tmc_calls = [];
426 benefits_from_dps = false;
427 explicit_tailcall_request = false;
428 }
429
430 let map f s = {
431 dps = Dps.map f s.dps;
432 direct = (fun () -> f (s.direct ()));
433 tmc_calls = s.tmc_calls;
434 benefits_from_dps = s.benefits_from_dps;
435 explicit_tailcall_request = s.explicit_tailcall_request;
436 }
437 (** Apply function [f] to the transformed term. *)
438
439 let direct (c : 'a t) : 'a =
440 c.direct ()
441
442 let dps (c : lambda t) ~tail ~dst =
443 Dps.run c.dps ~tail ~dst
444
445 let pair ((c1, c2) : 'a t * 'b t) : ('a * 'b) t = {
446 dps = Dps.pair c1.dps c2.dps;
447 direct = (fun () -> (c1.direct (), c2.direct ()));
448 tmc_calls =
449 c1.tmc_calls @ c2.tmc_calls;
450 benefits_from_dps =
451 c1.benefits_from_dps || c2.benefits_from_dps;
452 explicit_tailcall_request =
453 c1.explicit_tailcall_request || c2.explicit_tailcall_request;
454 }
455
456 let unit = {
457 dps = Dps.unit;
458 direct = (fun () -> ());
459 tmc_calls = [];
460 benefits_from_dps = false;
461 explicit_tailcall_request = false;
462 }
463 (* Remark: we could define [pure v] as [map (fun () -> v) unit],
464 but we prefer to have the code explicit about using [unit],
465 in particular as it ignores the destination argument. *)
466
467 module Syntax = struct
468 let (let+) a f = map f a
469 let (and+) a1 a2 = pair (a1, a2)
470 end
471 open Syntax
472
473 let option (c : 'a t option) : 'a option t =
474 match c with
475 | None -> let+ () = unit in None
476 | Some c -> let+ v = c in Some v
477
478 let rec list (c : 'a t list) : 'a list t =
479 match c with
480 | [] -> let+ () = unit in []
481 | c :: cs ->
482 let+ v = c
483 and+ vs = list cs
484 in v :: vs
485
486 (** The [find_*] machinery is used to locate a single subterm to
487 optimize among a list of subterms. If there are several possible
488 choices, we require that exactly one of them be annotated with
489 [@tailcall], or we report an ambiguity. *)
490 type 'a tmc_call_search =
491 | No_tmc_call of 'a list
492 | Nonambiguous of 'a zipper
493 | Ambiguous of { explicit: bool; subterms: 'a t list; }
494
495 and 'a zipper = {
496 rev_before : 'a list;
497 choice : 'a t;
498 after: 'a list
499 }
500
501 let find_nonambiguous_tmc_call choices =
502 let has_tmc_calls c = c.tmc_calls <> [] in
503 let is_explicit s = s.explicit_tailcall_request in
504 let nonambiguous ~only_explicit_calls choices =
505 (* here is how we will compute the result once we know that there
506 is an unambiguously-determined tmc call, and whether
507 an explicit request was necessary to disambiguate *)
508 let rec split rev_before : 'a t list -> 'a zipper = function
509 | [] -> assert false (* we know there is at least one choice *)
510 | c :: rest ->
511 if has_tmc_calls c && (not only_explicit_calls || is_explicit c) then
512 { rev_before; choice = c; after = List.map direct rest }
513 else
514 split (direct c :: rev_before) rest
515 in split [] choices
516 in
517 let tmc_call_subterms =
518 List.filter (fun c -> has_tmc_calls c) choices
519 in
520 match tmc_call_subterms with
521 | [] ->
522 No_tmc_call (List.map direct choices)
523 | [ _one ] ->
524 Nonambiguous (nonambiguous ~only_explicit_calls:false choices)
525 | several_subterms ->
526 let explicit_subterms = List.filter is_explicit several_subterms in
527 begin match explicit_subterms with
528 | [] ->
529 Ambiguous {
530 explicit = false;
531 subterms = several_subterms;
532 }
533 | [ _one ] ->
534 Nonambiguous (nonambiguous ~only_explicit_calls:true choices)
535 | several_explicit_subterms ->
536 Ambiguous {
537 explicit = true;
538 subterms = several_explicit_subterms;
539 }
540 end
541end
542
543open Choice.Syntax
544
545type context = {
546 specialized: specialized Ident.Map.t;
547}
548and specialized = {
549 arity: int;
550 dps_id: Ident.t;
551 direct_kind: function_kind;
552}
553
554let llets lk vk bindings body =
555 List.fold_right (fun (var, def) body ->
556 Llet (lk, vk, var, def, body)
557 ) bindings body
558
559let find_candidate = function
560 | Lfunction lfun when lfun.attr.tmc_candidate -> Some lfun
561 | _ -> None
562
563let declare_binding ctx (var, def) =
564 match find_candidate def with
565 | None -> ctx
566 | Some lfun ->
567 let arity = List.length lfun.params in
568 let dps_id = Ident.create_local (Ident.name var ^ "_dps") in
569 let direct_kind = lfun.kind in
570 let cand = { arity; dps_id; direct_kind; } in
571 { specialized = Ident.Map.add var cand ctx.specialized }
572
573let rec choice ctx t =
574 let rec choice ctx ~tail t =
575 match t with
576 | (Lvar _ | Lmutvar _ | Lconst _ | Lfunction _ | Lsend _
577 | Lassign _ | Lfor _ | Lwhile _) ->
578 let t = traverse ctx t in
579 Choice.lambda t
580
581 (* [choice_prim] handles most primitives, but the important case
582 of construction [Lprim(Pmakeblock(...), ...)] is handled by
583 [choice_makeblock] *)
584 | Lprim (prim, primargs, loc) ->
585 choice_prim ctx ~tail prim primargs loc
586
587 (* [choice_apply] handles applications, in particular tail-calls which
588 generate Set choices at the leaves *)
589 | Lapply apply ->
590 choice_apply ctx ~tail apply
591 (* other cases use the [lift] helper that takes the sub-terms in tail
592 position and the context around them, and generates a choice for
593 the whole term from choices for the tail subterms. *)
594 | Lsequence (l1, l2) ->
595 let l1 = traverse ctx l1 in
596 let+ l2 = choice ctx ~tail l2 in
597 Lsequence (l1, l2)
598 | Lifthenelse (l1, l2, l3) ->
599 let l1 = traverse ctx l1 in
600 let+ (l2, l3) = choice_pair ctx ~tail (l2, l3) in
601 Lifthenelse (l1, l2, l3)
602 | Lmutlet (vk, var, def, body) ->
603 (* mutable bindings are not TMC-specialized *)
604 let def = traverse ctx def in
605 let+ body = choice ctx ~tail body in
606 Lmutlet (vk, var, def, body)
607 | Llet (lk, vk, var, def, body) ->
608 let ctx, bindings = traverse_let ctx var def in
609 let+ body = choice ctx ~tail body in
610 llets lk vk bindings body
611 | Lletrec (bindings, body) ->
612 let ctx, bindings = traverse_letrec ctx bindings in
613 let+ body = choice ctx ~tail body in
614 Lletrec(bindings, body)
615 | Lswitch (l1, sw, loc) ->
616 (* decompose *)
617 let consts_lhs, consts_rhs = List.split sw.sw_consts in
618 let blocks_lhs, blocks_rhs = List.split sw.sw_blocks in
619 (* transform *)
620 let l1 = traverse ctx l1 in
621 let+ consts_rhs = choice_list ctx ~tail consts_rhs
622 and+ blocks_rhs = choice_list ctx ~tail blocks_rhs
623 and+ sw_failaction = choice_option ctx ~tail sw.sw_failaction in
624 (* rebuild *)
625 let sw_consts = List.combine consts_lhs consts_rhs in
626 let sw_blocks = List.combine blocks_lhs blocks_rhs in
627 let sw = { sw with sw_consts; sw_blocks; sw_failaction; } in
628 Lswitch (l1, sw, loc)
629 | Lstringswitch (l1, cases, fail, loc) ->
630 (* decompose *)
631 let cases_lhs, cases_rhs = List.split cases in
632 (* transform *)
633 let l1 = traverse ctx l1 in
634 let+ cases_rhs = choice_list ctx ~tail cases_rhs
635 and+ fail = choice_option ctx ~tail fail in
636 (* rebuild *)
637 let cases = List.combine cases_lhs cases_rhs in
638 Lstringswitch (l1, cases, fail, loc)
639 | Lstaticraise (id, ls) ->
640 let ls = traverse_list ctx ls in
641 Choice.lambda (Lstaticraise (id, ls))
642 | Ltrywith (l1, id, l2) ->
643 (* in [try l1 with id -> l2], the term [l1] is
644 not in tail-call position (after it returns
645 we need to remove the exception handler) *)
646 let+ l1 = choice ctx ~tail:false l1
647 and+ l2 = choice ctx ~tail l2 in
648 Ltrywith (l1, id, l2)
649 | Lstaticcatch (l1, ids, l2) ->
650 (* In [static-catch l1 with ids -> l2],
651 the term [l1] is in fact in tail-position *)
652 let+ l1 = choice ctx ~tail l1
653 and+ l2 = choice ctx ~tail l2 in
654 Lstaticcatch (l1, ids, l2)
655 | Levent (lam, lev) ->
656 let+ lam = choice ctx ~tail lam in
657 Levent (lam, lev)
658 | Lifused (x, lam) ->
659 let+ lam = choice ctx ~tail lam in
660 Lifused (x, lam)
661
662 and choice_apply ctx ~tail apply =
663 let exception No_tmc in
664 try
665 let explicit_tailcall_request =
666 match apply.ap_tailcall with
667 | Default_tailcall -> false
668 | Tailcall_expectation true -> true
669 | Tailcall_expectation false -> raise No_tmc
670 in
671 match apply.ap_func with
672 | Lvar f ->
673 let specialized =
674 try Ident.Map.find f ctx.specialized
675 with Not_found ->
676 if tail then
677 Location.prerr_warning
678 (Debuginfo.Scoped_location.to_location apply.ap_loc)
679 Warnings.Tmc_breaks_tailcall;
680 raise No_tmc;
681 in
682 let args =
683 (* Support of tupled functions: the [function_kind] of the
684 direct-style function is identical to the one of the
685 input function, which may be Tupled, but the dps
686 function is always Curried.
687
688 [find_exact_application] is in charge of recovering the
689 "real" argument list of a possibly-tupled call. *)
690 let kind, arity = specialized.direct_kind, specialized.arity in
691 match Lambda.find_exact_application kind ~arity apply.ap_args with
692 | None -> raise No_tmc
693 | Some args -> args
694 in
695 let tailcall tail =
696 (* If we are calling a tmc-specializable function in tail
697 context, then both the direct-style and dps-style calls
698 must be tailcalls. *)
699 if tail
700 then Tailcall_expectation true
701 else Default_tailcall
702 in
703 {
704 Choice.dps = Dps.make (fun ~tail ~dst ->
705 Lapply { apply with
706 ap_func = Lvar specialized.dps_id;
707 ap_args = add_dst_args dst args;
708 ap_tailcall = tailcall tail;
709 });
710 direct = (fun () ->
711 Lapply { apply with ap_tailcall = tailcall tail });
712 explicit_tailcall_request;
713 tmc_calls = [{
714 loc = apply.ap_loc;
715 explicit = explicit_tailcall_request;
716 }];
717 benefits_from_dps = true;
718 }
719 | _nontail -> raise No_tmc
720 with No_tmc ->
721 let apply_no_bailout =
722 (* [@tailcall false] is interpreted as a bailout annotation: "we
723 are (knowingly) leaving the dps calling convention". It only
724 has sense in the DPS version of the generated code, not in
725 direct style. *)
726 let ap_tailcall =
727 match apply.ap_tailcall with
728 | Tailcall_expectation false when tail -> Default_tailcall
729 | other -> other
730 in
731 { apply with ap_tailcall } in
732 { (Choice.lambda (Lapply apply)) with
733 direct = (fun () -> Lapply apply_no_bailout);
734 }
735
736 and choice_makeblock ctx ~tail:_ (tag, flag, shape) blockargs loc =
737 let choices = List.map (choice ctx ~tail:false) blockargs in
738 match Choice.find_nonambiguous_tmc_call choices with
739 | Choice.No_tmc_call args ->
740 Choice.lambda @@ Lprim (Pmakeblock (tag, flag, shape), args, loc)
741 | Choice.Ambiguous { explicit; subterms = ambiguous_subterms } ->
742 (* An ambiguous term should not lead to an error if it not
743 used in TMC position. Consider for example:
744
745 {[
746 type t = ... | K of t * (t * t)
747 let[@tail_mod_cons] rec map f = function
748 | [...]
749 | K (t, (u, v)) -> K ((map[@tailcall]) f t, (map f u, map f v))
750 ]}
751
752 Calling [choice_makeblock] on the K constructor, we need to
753 determine whether its two arguments are ambiguous, which is
754 done by calling [choice] on each argument to see if they
755 would be TMC-able and if they are explicitly annotated.
756
757 These calls give the following results:
758 - there is an explicitly-requested tailcall in the first
759 argument
760 - the second argument is a nested pair whose arguments
761 themselves are ambiguous -- with no explicit annotation.
762
763 This determines that the arguments of K are not ambiguous,
764 as only one of them is annotated. But note that the nested
765 pair, in isolation, is ambiguous. This inner ambiguity is
766 innocuous and should not result in an error, as we never
767 use this inner pair in TMC position, only in direct style.
768
769 This example shows that it would be incorrect to fail with
770 an error whenever [choice] finds an ambiguity. Instead we
771 only error when generating the [dps] version of the
772 corresponding code; requesting the [direct] version is
773 accepted and produces the expected direct code.
774 *)
775 let term_choice =
776 let+ args = Choice.list choices in
777 Lprim (Pmakeblock(tag, flag, shape), args, loc)
778 in
779 { term_choice with
780 Choice.dps = Dps.make (fun ~tail:_ ~dst:_ ->
781 let arguments =
782 let info (t : lambda Choice.t) : subterm_information = {
783 tmc_calls = t.tmc_calls;
784 } in
785 {
786 explicit;
787 arguments = List.map info ambiguous_subterms;
788 }
789 in
790 raise (Error (Debuginfo.Scoped_location.to_location loc,
791 Ambiguous_constructor_arguments arguments))
792 );
793 }
794 | Choice.Nonambiguous { Choice.rev_before; choice; after } ->
795 let constr = Constr.{
796 tag;
797 flag;
798 shape;
799 before = List.rev rev_before;
800 after;
801 loc;
802 } in
803 assert (choice.tmc_calls <> []);
804 {
805 Choice.direct = (fun () ->
806 if not choice.benefits_from_dps then
807 Constr.apply constr (Choice.direct choice)
808 else
809 Constr.with_placeholder constr @@ fun new_dst ->
810 Lsequence(Choice.dps choice ~tail:false ~dst:new_dst,
811 Lvar new_dst.var));
812 benefits_from_dps =
813 (* Whether or not the caller provides a destination,
814 we can always provide a destination to our settable
815 subterm, so the number of TMC sub-calls is identical
816 in the [direct] and [dps] versions. *)
817 false;
818 dps = Dps.delay_constructor constr choice.dps;
819 tmc_calls =
820 choice.tmc_calls;
821 explicit_tailcall_request =
822 choice.explicit_tailcall_request;
823 }
824
825 and choice_prim ctx ~tail prim primargs loc =
826 match prim with
827 (* The important case is the construction case *)
828 | Pmakeblock (tag, flag, shape) ->
829 choice_makeblock ctx ~tail (tag, flag, shape) primargs loc
830
831 (* Some primitives have arguments in tail-position *)
832 | Popaque ->
833 let l1 = match primargs with
834 | [l1] -> l1
835 | _ -> invalid_arg "choice_prim" in
836 let+ l1 = choice ctx ~tail l1 in
837 Lprim (Popaque, [l1], loc)
838
839 (* in common cases we just return *)
840 | Pbytes_to_string | Pbytes_of_string
841 | Pgetglobal _ | Psetglobal _
842 | Pfield _ | Pfield_computed
843 | Psetfield _ | Psetfield_computed _
844 | Pfloatfield _ | Psetfloatfield _
845 | Pccall _
846 | Praise _
847 | Pnot
848 | Pnegint | Paddint | Psubint | Pmulint | Pdivint _ | Pmodint _
849 | Pandint | Porint | Pxorint
850 | Plslint | Plsrint | Pasrint
851 | Pintcomp _
852 | Poffsetint _ | Poffsetref _
853 | Pintoffloat | Pfloatofint
854 | Pnegfloat | Pabsfloat
855 | Paddfloat | Psubfloat | Pmulfloat | Pdivfloat
856 | Pfloatcomp _
857 | Pstringlength | Pstringrefu | Pstringrefs
858 | Pbyteslength | Pbytesrefu | Pbytessetu | Pbytesrefs | Pbytessets
859 | Parraylength _ | Parrayrefu _ | Parraysetu _ | Parrayrefs _ | Parraysets _
860 | Pisint | Pisout
861 | Pignore
862 | Pcompare_ints | Pcompare_floats | Pcompare_bints _
863
864 (* we don't handle effect or DLS primitives *)
865 | Prunstack | Pperform | Presume | Preperform | Pdls_get
866
867 (* we don't handle atomic primitives *)
868 | Patomic_load
869
870 (* we don't handle array indices as destinations yet *)
871 | (Pmakearray _ | Pduparray _)
872
873 (* we don't handle { foo with x = ...; y = recursive-call } *)
874 | Pduprecord _
875
876 (* operations returning boxed values could be considered
877 constructions someday *)
878 | Pbintofint _ | Pintofbint _
879 | Pcvtbint _
880 | Pnegbint _
881 | Paddbint _ | Psubbint _ | Pmulbint _ | Pdivbint _ | Pmodbint _
882 | Pandbint _ | Porbint _ | Pxorbint _ | Plslbint _ | Plsrbint _ | Pasrbint _
883 | Pbintcomp _
884
885 (* Lazy blocks should never contain a recursive call directly:
886 either it's a closure (Lazy_tag), or a variable (Forward_tag).
887 The case 'let foo = recursive_call in lazy foo' could be translated to
888 use tmc in the cases where 'foo' might be of type lazy or float, but
889 given the fragility of such a transformation we choose not to. *)
890 | Pmakelazyblock _
891
892 (* more common cases... *)
893 | Pbigarrayref _ | Pbigarrayset _
894 | Pbigarraydim _
895 | Pstring_load_16 _ | Pstring_load_32 _ | Pstring_load_64 _
896 | Pbytes_load_16 _ | Pbytes_load_32 _ | Pbytes_load_64 _
897 | Pbytes_set_16 _ | Pbytes_set_32 _ | Pbytes_set_64 _
898 | Pbigstring_load_16 _ | Pbigstring_load_32 _ | Pbigstring_load_64 _
899 | Pbigstring_set_16 _ | Pbigstring_set_32 _ | Pbigstring_set_64 _
900 | Pctconst _
901 | Pbswap16
902 | Pbbswap _
903 | Pint_as_pointer
904 | Psequand | Psequor
905 | Ppoll
906 ->
907 let primargs = traverse_list ctx primargs in
908 Choice.lambda (Lprim (prim, primargs, loc))
909
910 and choice_list ctx ~tail terms =
911 Choice.list (List.map (choice ctx ~tail) terms)
912 and choice_pair ctx ~tail (t1, t2) =
913 Choice.pair (choice ctx ~tail t1, choice ctx ~tail t2)
914 and choice_option ctx ~tail t =
915 Choice.option (Option.map (choice ctx ~tail) t)
916
917 in choice ctx t
918
919and traverse ctx = function
920 | Llet (lk, vk, var, def, body) ->
921 let ctx, bindings = traverse_let ctx var def in
922 let body = traverse ctx body in
923 llets lk vk bindings body
924 | Lletrec (bindings, body) ->
925 let ctx, bindings = traverse_letrec ctx bindings in
926 Lletrec (bindings, traverse ctx body)
927 | lam ->
928 shallow_map (traverse ctx) lam
929
930and traverse_lfunction ctx lfun =
931 map_lfunction (traverse ctx) lfun
932
933and traverse_let outer_ctx var def =
934 let inner_ctx = declare_binding outer_ctx (var, def) in
935 let bindings =
936 traverse_let_binding outer_ctx inner_ctx var def
937 in
938 inner_ctx, bindings
939
940and traverse_letrec ctx bindings =
941 let ctx =
942 List.fold_left (fun ctx { id; def } ->
943 declare_binding ctx (id, Lfunction def)
944 ) ctx bindings
945 in
946 let bindings =
947 List.concat_map (traverse_letrec_binding ctx) bindings
948 in
949 ctx, bindings
950
951and traverse_let_binding outer_ctx inner_ctx var def =
952 match find_candidate def with
953 | None -> [ var, traverse outer_ctx def ]
954 | Some lfun ->
955 let functions = make_dps_variant var inner_ctx outer_ctx lfun in
956 List.map (fun (var, lfun) -> var, Lfunction lfun) functions
957
958and traverse_letrec_binding ctx { id; def } =
959 if def.attr.tmc_candidate
960 then
961 let functions = make_dps_variant id ctx ctx def in
962 List.map (fun (id, def) -> { id; def }) functions
963 else
964 [ { id; def = traverse_lfunction ctx def } ]
965
966and make_dps_variant var inner_ctx outer_ctx (lfun : lfunction) =
967 let special = Ident.Map.find var inner_ctx.specialized in
968 let fun_choice = choice outer_ctx ~tail:true lfun.body in
969 if fun_choice.Choice.tmc_calls = [] then
970 Location.prerr_warning
971 (Debuginfo.Scoped_location.to_location lfun.loc)
972 Warnings.Unused_tmc_attribute;
973 let direct =
974 let { kind; params; return; body = _; attr; loc } = lfun in
975 let body = Choice.direct fun_choice in
976 lfunction' ~kind ~params ~return ~body ~attr ~loc in
977 let dps =
978 let dst_param = {
979 var = Ident.create_local "dst";
980 offset = Ident.create_local "offset";
981 loc = lfun.loc;
982 } in
983 let dst = { dst_param with offset = Offset (Lvar dst_param.offset) } in
984 Lambda.duplicate_function @@ lfunction'
985 ~kind:
986 (* Support of Tupled function: see [choice_apply]. *)
987 Curried
988 ~params:(add_dst_params dst_param lfun.params)
989 ~return:lfun.return
990 ~body:(Choice.dps ~tail:true ~dst:dst fun_choice)
991 ~attr:lfun.attr
992 ~loc:lfun.loc
993 in
994 let dps_var = special.dps_id in
995 [var, direct; dps_var, dps]
996
997and traverse_list ctx terms =
998 List.map (traverse ctx) terms
999
1000let rewrite t =
1001 let ctx = { specialized = Ident.Map.empty } in
1002 traverse ctx t
1003
1004module Style = Misc.Style
1005
1006let () =
1007 Location.register_error_of_exn
1008 (function
1009 | Error (loc,
1010 Ambiguous_constructor_arguments
1011 { explicit = false; arguments }) ->
1012 let print_msg ppf =
1013 Format_doc.fprintf ppf
1014 "%a:@ this@ constructor@ application@ may@ be@ \
1015 TMC-transformed@ in@ several@ different@ ways.@ \
1016 Please@ disambiguate@ by@ adding@ an@ explicit@ %a \
1017 attribute@ to@ the@ call@ that@ should@ be@ made@ \
1018 tail-recursive,@ or@ a@ %a attribute@ on@ \
1019 calls@ that@ should@ not@ be@ transformed."
1020 Style.inline_code "[@tail_mod_cons]"
1021 Style.inline_code "[@tailcall]"
1022 Style.inline_code "[@tailcall false]"
1023 in
1024 let submgs =
1025 let sub (info : tmc_call_information) =
1026 let loc = Debuginfo.Scoped_location.to_location info.loc in
1027 Location.msg ~loc "This call could be annotated." in
1028 arguments
1029 |> List.map (fun t -> t.tmc_calls)
1030 |> List.flatten
1031 |> List.map sub
1032 in
1033 Some (Location.errorf ~loc ~sub:submgs "%t" print_msg)
1034 | Error (loc,
1035 Ambiguous_constructor_arguments
1036 { explicit = true; arguments }) ->
1037 let print_msg ppf =
1038 Format_doc.fprintf ppf
1039 "%a:@ this@ constructor@ application@ may@ be@ \
1040 TMC-transformed@ in@ several@ different@ ways.@ Only@ one@ of@ \
1041 the@ arguments@ may@ become@ a@ TMC@ call,@ but@ several@ \
1042 arguments@ contain@ calls@ that@ are@ explicitly@ marked@ as@ \
1043 tail-recursive.@ Please@ fix@ the@ conflict@ by@ reviewing@ \
1044 and@ fixing@ the@ conflicting@ annotations."
1045 Style.inline_code "[@tail_mod_cons]"
1046 in
1047 let submgs =
1048 let sub (info : tmc_call_information) =
1049 let loc = Debuginfo.Scoped_location.to_location info.loc in
1050 Location.msg ~loc "This call is explicitly annotated." in
1051 arguments
1052 |> List.map (fun t -> t.tmc_calls)
1053 |> List.flatten
1054 |> List.filter (fun (info: tmc_call_information) -> info.explicit)
1055 |> List.map sub
1056 in
1057 Some (Location.errorf ~loc ~sub:submgs "%t" print_msg)
1058 | _ ->
1059 None
1060 )