3 ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
23 #include "lj_target.h"
29 /* Pass IR on to next optimization in chain (FOLD). */
30 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
32 /* Emit raw IR without passing through optimizations. */
33 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
35 /* -- Snapshot buffer allocation ------------------------------------------ */
37 /* Grow snapshot buffer. */
38 void lj_snap_grow_buf_(jit_State
*J
, MSize need
)
40 MSize maxsnap
= (MSize
)J
->param
[JIT_P_maxsnap
];
42 lj_trace_err(J
, LJ_TRERR_SNAPOV
);
43 lj_mem_growvec(J
->L
, J
->snapbuf
, J
->sizesnap
, maxsnap
, SnapShot
);
44 J
->cur
.snap
= J
->snapbuf
;
47 /* Grow snapshot map buffer. */
48 void lj_snap_grow_map_(jit_State
*J
, MSize need
)
50 if (need
< 2*J
->sizesnapmap
)
51 need
= 2*J
->sizesnapmap
;
54 J
->snapmapbuf
= (SnapEntry
*)lj_mem_realloc(J
->L
, J
->snapmapbuf
,
55 J
->sizesnapmap
*sizeof(SnapEntry
), need
*sizeof(SnapEntry
));
56 J
->cur
.snapmap
= J
->snapmapbuf
;
57 J
->sizesnapmap
= need
;
60 /* -- Snapshot generation ------------------------------------------------- */
62 /* Add all modified slots to the snapshot. */
63 static MSize
snapshot_slots(jit_State
*J
, SnapEntry
*map
, BCReg nslots
)
65 IRRef retf
= J
->chain
[IR_RETF
]; /* Limits SLOAD restore elimination. */
68 for (s
= 0; s
< nslots
; s
++) {
70 IRRef ref
= tref_ref(tr
);
72 if (s
== 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
73 if ((tr
& TREF_FRAME
))
74 map
[n
++] = SNAP(1, SNAP_FRAME
| SNAP_NORESTORE
, REF_NIL
);
77 if ((tr
& (TREF_FRAME
| TREF_CONT
)) && !ref
) {
78 cTValue
*base
= J
->L
->base
- J
->baseslot
;
79 tr
= J
->slot
[s
] = (tr
& 0xff0000) | lj_ir_k64(J
, IR_KNUM
, base
[s
].u64
);
84 SnapEntry sn
= SNAP_TR(s
, tr
);
85 IRIns
*ir
= &J
->cur
.ir
[ref
];
86 if ((LJ_FR2
|| !(sn
& (SNAP_CONT
|SNAP_FRAME
))) &&
87 ir
->o
== IR_SLOAD
&& ir
->op1
== s
&& ref
> retf
) {
89 ** No need to snapshot unmodified non-inherited slots.
90 ** But always snapshot the function below a frame in LJ_FR2 mode.
92 if (!(ir
->op2
& IRSLOAD_INHERIT
) &&
93 (!LJ_FR2
|| s
== 0 || s
+1 == nslots
||
94 !(J
->slot
[s
+1] & (TREF_CONT
|TREF_FRAME
))))
96 /* No need to restore readonly slots and unmodified non-parent slots. */
97 if (!(LJ_DUALNUM
&& (ir
->op2
& IRSLOAD_CONVERT
)) &&
98 (ir
->op2
& (IRSLOAD_READONLY
|IRSLOAD_PARENT
)) != IRSLOAD_PARENT
)
101 if (LJ_SOFTFP32
&& irt_isnum(ir
->t
))
102 sn
|= SNAP_SOFTFPNUM
;
109 /* Add frame links at the end of the snapshot. */
110 static MSize
snapshot_framelinks(jit_State
*J
, SnapEntry
*map
, uint8_t *topslot
)
112 cTValue
*frame
= J
->L
->base
- 1;
113 cTValue
*lim
= J
->L
->base
- J
->baseslot
+ LJ_FR2
;
114 GCfunc
*fn
= frame_func(frame
);
115 cTValue
*ftop
= isluafunc(fn
) ? (frame
+funcproto(fn
)->framesize
) : J
->L
->top
;
117 uint64_t pcbase
= (u64ptr(J
->pc
) << 8) | (J
->baseslot
- 2);
118 lj_assertJ(2 <= J
->baseslot
&& J
->baseslot
<= 257, "bad baseslot");
119 memcpy(map
, &pcbase
, sizeof(uint64_t));
122 map
[f
++] = SNAP_MKPC(J
->pc
); /* The current PC is always the first entry. */
125 (J
->pc
>= proto_bc(J
->pt
) &&
126 J
->pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
), "bad snapshot PC");
127 while (frame
> lim
) { /* Backwards traversal of all frames above base. */
128 if (frame_islua(frame
)) {
130 map
[f
++] = SNAP_MKPC(frame_pc(frame
));
132 frame
= frame_prevl(frame
);
133 } else if (frame_iscont(frame
)) {
135 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
136 map
[f
++] = SNAP_MKPC(frame_contpc(frame
));
138 frame
= frame_prevd(frame
);
140 lj_assertJ(!frame_isc(frame
), "broken frame chain");
142 map
[f
++] = SNAP_MKFTSZ(frame_ftsz(frame
));
144 frame
= frame_prevd(frame
);
147 if (frame
+ funcproto(frame_func(frame
))->framesize
> ftop
)
148 ftop
= frame
+ funcproto(frame_func(frame
))->framesize
;
150 *topslot
= (uint8_t)(ftop
- lim
);
152 lj_assertJ(sizeof(SnapEntry
) * 2 == sizeof(uint64_t), "bad SnapEntry def");
155 lj_assertJ(f
== (MSize
)(1 + J
->framedepth
), "miscalculated snapshot size");
160 /* Take a snapshot of the current stack. */
161 static void snapshot_stack(jit_State
*J
, SnapShot
*snap
, MSize nsnapmap
)
163 BCReg nslots
= J
->baseslot
+ J
->maxslot
;
166 /* Conservative estimate. */
167 lj_snap_grow_map(J
, nsnapmap
+ nslots
+ (MSize
)(LJ_FR2
?2:J
->framedepth
+1));
168 p
= &J
->cur
.snapmap
[nsnapmap
];
169 nent
= snapshot_slots(J
, p
, nslots
);
170 snap
->nent
= (uint8_t)nent
;
171 nent
+= snapshot_framelinks(J
, p
+ nent
, &snap
->topslot
);
172 snap
->mapofs
= (uint32_t)nsnapmap
;
173 snap
->ref
= (IRRef1
)J
->cur
.nins
;
175 snap
->nslots
= (uint8_t)nslots
;
177 J
->cur
.nsnapmap
= (uint32_t)(nsnapmap
+ nent
);
180 /* Add or merge a snapshot. */
181 void lj_snap_add(jit_State
*J
)
183 MSize nsnap
= J
->cur
.nsnap
;
184 MSize nsnapmap
= J
->cur
.nsnapmap
;
185 /* Merge if no ins. inbetween or if requested and no guard inbetween. */
186 if ((nsnap
> 0 && J
->cur
.snap
[nsnap
-1].ref
== J
->cur
.nins
) ||
187 (J
->mergesnap
&& !irt_isguard(J
->guardemit
))) {
188 if (nsnap
== 1) { /* But preserve snap #0 PC. */
189 emitir_raw(IRT(IR_NOP
, IRT_NIL
), 0, 0);
192 nsnapmap
= J
->cur
.snap
[--nsnap
].mapofs
;
195 lj_snap_grow_buf(J
, nsnap
+1);
196 J
->cur
.nsnap
= (uint16_t)(nsnap
+1);
199 J
->guardemit
.irt
= 0;
200 snapshot_stack(J
, &J
->cur
.snap
[nsnap
], nsnapmap
);
203 /* -- Snapshot modification ----------------------------------------------- */
205 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
207 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
208 static BCReg
snap_usedef(jit_State
*J
, uint8_t *udf
,
209 const BCIns
*pc
, BCReg maxslot
)
214 if (maxslot
== 0) return 0;
215 #ifdef LUAJIT_USE_VALGRIND
216 /* Avoid errors for harmless reads beyond maxslot. */
217 memset(udf
, 1, SNAP_USEDEF_SLOTS
);
219 memset(udf
, 1, maxslot
);
222 /* Treat open upvalues as used. */
223 o
= gcref(J
->L
->openupval
);
225 if (uvval(gco2uv(o
)) < J
->L
->base
) break;
226 udf
[uvval(gco2uv(o
)) - J
->L
->base
] = 0;
227 o
= gcref(o
->gch
.nextgc
);
230 #define USE_SLOT(s) udf[(s)] &= ~1
231 #define DEF_SLOT(s) udf[(s)] *= 3
233 /* Scan through following bytecode and check for uses/defs. */
234 lj_assertJ(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
,
235 "snapshot PC out of range");
238 BCOp op
= bc_op(ins
);
239 switch (bcmode_b(op
)) {
240 case BCMvar
: USE_SLOT(bc_b(ins
)); break;
243 switch (bcmode_c(op
)) {
244 case BCMvar
: USE_SLOT(bc_c(ins
)); break;
246 lj_assertJ(op
== BC_CAT
, "unhandled op %d with RC rbase", op
);
247 for (s
= bc_b(ins
); s
<= bc_c(ins
); s
++) USE_SLOT(s
);
248 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
252 BCReg minslot
= bc_a(ins
);
253 if (op
>= BC_FORI
&& op
<= BC_JFORL
) minslot
+= FORL_EXT
;
254 else if (op
>= BC_ITERL
&& op
<= BC_JITERL
) minslot
+= bc_b(pc
[-2])-1;
255 else if (op
== BC_UCLO
) {
256 ptrdiff_t delta
= bc_j(ins
);
257 if (delta
< 0) return maxslot
; /* Prevent loop. */
261 for (s
= minslot
; s
< maxslot
; s
++) DEF_SLOT(s
);
262 return minslot
< maxslot
? minslot
: maxslot
;
265 if (op
== BC_JFORL
|| op
== BC_JITERL
|| op
== BC_JLOOP
) {
267 } else if (bc_isret(op
)) {
268 BCReg top
= op
== BC_RETM
? maxslot
: (bc_a(ins
) + bc_d(ins
)-1);
269 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
270 for (; s
< top
; s
++) USE_SLOT(s
);
271 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
275 case BCMfunc
: return maxslot
; /* NYI: will abort, anyway. */
278 switch (bcmode_a(op
)) {
279 case BCMvar
: USE_SLOT(bc_a(ins
)); break;
281 if (!(op
== BC_ISTC
|| op
== BC_ISFC
)) DEF_SLOT(bc_a(ins
));
284 if (op
>= BC_CALLM
&& op
<= BC_ITERN
) {
285 BCReg top
= (op
== BC_CALLM
|| op
== BC_CALLMT
|| bc_c(ins
) == 0) ?
286 maxslot
: (bc_a(ins
) + bc_c(ins
)+LJ_FR2
);
287 if (LJ_FR2
) DEF_SLOT(bc_a(ins
)+1);
288 s
= bc_a(ins
) - ((op
== BC_ITERC
|| op
== BC_ITERN
) ? 3 : 0);
289 for (; s
< top
; s
++) USE_SLOT(s
);
290 for (; s
< maxslot
; s
++) DEF_SLOT(s
);
291 if (op
== BC_CALLT
|| op
== BC_CALLMT
) {
292 for (s
= 0; s
< bc_a(ins
); s
++) DEF_SLOT(s
);
295 } else if (op
== BC_VARG
) {
296 return maxslot
; /* NYI: punt. */
297 } else if (op
== BC_KNIL
) {
298 for (s
= bc_a(ins
); s
<= bc_d(ins
); s
++) DEF_SLOT(s
);
299 } else if (op
== BC_TSETM
) {
300 for (s
= bc_a(ins
)-1; s
< maxslot
; s
++) USE_SLOT(s
);
305 lj_assertJ(pc
>= proto_bc(J
->pt
) && pc
< proto_bc(J
->pt
) + J
->pt
->sizebc
,
306 "use/def analysis PC out of range");
312 return 0; /* unreachable */
315 /* Mark slots used by upvalues of child prototypes as used. */
316 static void snap_useuv(GCproto
*pt
, uint8_t *udf
)
318 /* This is a coarse check, because it's difficult to correlate the lifetime
319 ** of slots and closures. But the number of false positives is quite low.
320 ** A false positive may cause a slot not to be purged, which is just
321 ** a missed optimization.
323 if ((pt
->flags
& PROTO_CHILD
)) {
324 ptrdiff_t i
, j
, n
= pt
->sizekgc
;
325 GCRef
*kr
= mref(pt
->k
, GCRef
) - 1;
326 for (i
= 0; i
< n
; i
++, kr
--) {
327 GCobj
*o
= gcref(*kr
);
328 if (o
->gch
.gct
== ~LJ_TPROTO
) {
329 for (j
= 0; j
< gco2pt(o
)->sizeuv
; j
++) {
330 uint32_t v
= proto_uv(gco2pt(o
))[j
];
331 if ((v
& PROTO_UV_LOCAL
)) {
340 /* Purge dead slots before the next snapshot. */
341 void lj_snap_purge(jit_State
*J
)
343 uint8_t udf
[SNAP_USEDEF_SLOTS
];
344 BCReg s
, maxslot
= J
->maxslot
;
345 if (bc_op(*J
->pc
) == BC_FUNCV
&& maxslot
> J
->pt
->numparams
)
346 maxslot
= J
->pt
->numparams
;
347 s
= snap_usedef(J
, udf
, J
->pc
, maxslot
);
349 snap_useuv(J
->pt
, udf
);
350 for (; s
< maxslot
; s
++)
352 J
->base
[s
] = 0; /* Purge dead slots. */
356 /* Shrink last snapshot. */
357 void lj_snap_shrink(jit_State
*J
)
359 SnapShot
*snap
= &J
->cur
.snap
[J
->cur
.nsnap
-1];
360 SnapEntry
*map
= &J
->cur
.snapmap
[snap
->mapofs
];
361 MSize n
, m
, nlim
, nent
= snap
->nent
;
362 uint8_t udf
[SNAP_USEDEF_SLOTS
];
363 BCReg maxslot
= J
->maxslot
;
364 BCReg baseslot
= J
->baseslot
;
365 BCReg minslot
= snap_usedef(J
, udf
, snap_pc(&map
[nent
]), maxslot
);
366 if (minslot
< maxslot
) snap_useuv(J
->pt
, udf
);
369 snap
->nslots
= (uint8_t)maxslot
;
370 for (n
= m
= 0; n
< nent
; n
++) { /* Remove unused slots from snapshot. */
371 BCReg s
= snap_slot(map
[n
]);
372 if (s
< minslot
|| (s
< maxslot
&& udf
[s
-baseslot
] == 0))
373 map
[m
++] = map
[n
]; /* Only copy used slots. */
375 snap
->nent
= (uint8_t)m
;
376 nlim
= J
->cur
.nsnapmap
- snap
->mapofs
- 1;
377 while (n
<= nlim
) map
[m
++] = map
[n
++]; /* Move PC + frame links down. */
378 J
->cur
.nsnapmap
= (uint32_t)(snap
->mapofs
+ m
); /* Free up space in map. */
381 /* -- Snapshot access ----------------------------------------------------- */
383 /* Initialize a Bloom Filter with all renamed refs.
384 ** There are very few renames (often none), so the filter has
385 ** very few bits set. This makes it suitable for negative filtering.
387 static BloomFilter
snap_renamefilter(GCtrace
*T
, SnapNo lim
)
389 BloomFilter rfilt
= 0;
391 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
393 bloomset(rfilt
, ir
->op1
);
397 /* Process matching renames to find the original RegSP. */
398 static RegSP
snap_renameref(GCtrace
*T
, SnapNo lim
, IRRef ref
, RegSP rs
)
401 for (ir
= &T
->ir
[T
->nins
-1]; ir
->o
== IR_RENAME
; ir
--)
402 if (ir
->op1
== ref
&& ir
->op2
<= lim
)
407 /* Copy RegSP from parent snapshot to the parent links of the IR. */
408 IRIns
*lj_snap_regspmap(jit_State
*J
, GCtrace
*T
, SnapNo snapno
, IRIns
*ir
)
410 SnapShot
*snap
= &T
->snap
[snapno
];
411 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
412 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
418 if (ir
->o
== IR_SLOAD
) {
419 if (!(ir
->op2
& IRSLOAD_PARENT
)) break;
421 lj_assertJ(n
< snap
->nent
, "slot %d not found in snapshot", ir
->op1
);
422 if (snap_slot(map
[n
]) == ir
->op1
) {
423 ref
= snap_ref(map
[n
++]);
427 } else if (LJ_SOFTFP32
&& ir
->o
== IR_HIOP
) {
429 } else if (ir
->o
== IR_PVAL
) {
430 ref
= ir
->op1
+ REF_BIAS
;
434 rs
= T
->ir
[ref
].prev
;
435 if (bloomtest(rfilt
, ref
))
436 rs
= snap_renameref(T
, snapno
, ref
, rs
);
437 ir
->prev
= (uint16_t)rs
;
438 lj_assertJ(regsp_used(rs
), "unused IR %04d in snapshot", ref
- REF_BIAS
);
443 /* -- Snapshot replay ----------------------------------------------------- */
445 /* Replay constant from parent trace. */
446 static TRef
snap_replay_const(jit_State
*J
, IRIns
*ir
)
448 /* Only have to deal with constants that can occur in stack slots. */
449 switch ((IROp
)ir
->o
) {
450 case IR_KPRI
: return TREF_PRI(irt_type(ir
->t
));
451 case IR_KINT
: return lj_ir_kint(J
, ir
->i
);
452 case IR_KGC
: return lj_ir_kgc(J
, ir_kgc(ir
), irt_t(ir
->t
));
453 case IR_KNUM
: case IR_KINT64
:
454 return lj_ir_k64(J
, (IROp
)ir
->o
, ir_k64(ir
)->u64
);
455 case IR_KPTR
: return lj_ir_kptr(J
, ir_kptr(ir
)); /* Continuation. */
456 case IR_KNULL
: return lj_ir_knull(J
, irt_type(ir
->t
));
457 default: lj_assertJ(0, "bad IR constant op %d", ir
->o
); return TREF_NIL
;
461 /* De-duplicate parent reference. */
462 static TRef
snap_dedup(jit_State
*J
, SnapEntry
*map
, MSize nmax
, IRRef ref
)
465 for (j
= 0; j
< nmax
; j
++)
466 if (snap_ref(map
[j
]) == ref
)
467 return J
->slot
[snap_slot(map
[j
])] & ~(SNAP_KEYINDEX
|SNAP_CONT
|SNAP_FRAME
);
471 /* Emit parent reference with de-duplication. */
472 static TRef
snap_pref(jit_State
*J
, GCtrace
*T
, SnapEntry
*map
, MSize nmax
,
473 BloomFilter seen
, IRRef ref
)
475 IRIns
*ir
= &T
->ir
[ref
];
478 tr
= snap_replay_const(J
, ir
);
479 else if (!regsp_used(ir
->prev
))
481 else if (!bloomtest(seen
, ref
) || (tr
= snap_dedup(J
, map
, nmax
, ref
)) == 0)
482 tr
= emitir(IRT(IR_PVAL
, irt_type(ir
->t
)), ref
- REF_BIAS
, 0);
486 /* Check whether a sunk store corresponds to an allocation. Slow path. */
487 static int snap_sunk_store2(GCtrace
*T
, IRIns
*ira
, IRIns
*irs
)
489 if (irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
490 irs
->o
== IR_FSTORE
|| irs
->o
== IR_XSTORE
) {
491 IRIns
*irk
= &T
->ir
[irs
->op1
];
492 if (irk
->o
== IR_AREF
|| irk
->o
== IR_HREFK
)
493 irk
= &T
->ir
[irk
->op1
];
494 return (&T
->ir
[irk
->op1
] == ira
);
499 /* Check whether a sunk store corresponds to an allocation. Fast path. */
500 static LJ_AINLINE
int snap_sunk_store(GCtrace
*T
, IRIns
*ira
, IRIns
*irs
)
503 return (ira
+ irs
->s
== irs
); /* Fast check. */
504 return snap_sunk_store2(T
, ira
, irs
);
507 /* Replay snapshot state to setup side trace. */
508 void lj_snap_replay(jit_State
*J
, GCtrace
*T
)
510 SnapShot
*snap
= &T
->snap
[J
->exitno
];
511 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
512 MSize n
, nent
= snap
->nent
;
513 BloomFilter seen
= 0;
516 /* Emit IR for slots inherited from parent snapshot. */
517 for (n
= 0; n
< nent
; n
++) {
518 SnapEntry sn
= map
[n
];
519 BCReg s
= snap_slot(sn
);
520 IRRef ref
= snap_ref(sn
);
521 IRIns
*ir
= &T
->ir
[ref
];
523 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
524 if (bloomtest(seen
, ref
) && (tr
= snap_dedup(J
, map
, n
, ref
)) != 0)
527 if (irref_isk(ref
)) {
528 /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
529 if (LJ_FR2
&& (sn
== SNAP(1, SNAP_FRAME
| SNAP_NORESTORE
, REF_NIL
)))
532 tr
= snap_replay_const(J
, ir
);
533 } else if (!regsp_used(ir
->prev
)) {
535 lj_assertJ(s
!= 0, "unused slot 0 in snapshot");
538 IRType t
= irt_type(ir
->t
);
539 uint32_t mode
= IRSLOAD_INHERIT
|IRSLOAD_PARENT
;
540 if (LJ_SOFTFP32
&& (sn
& SNAP_SOFTFPNUM
)) t
= IRT_NUM
;
541 if (ir
->o
== IR_SLOAD
) mode
|= (ir
->op2
& IRSLOAD_READONLY
);
542 if ((sn
& SNAP_KEYINDEX
)) mode
|= IRSLOAD_KEYINDEX
;
543 tr
= emitir_raw(IRT(IR_SLOAD
, t
), s
, mode
);
546 /* Same as TREF_* flags. */
547 J
->slot
[s
] = tr
| (sn
&(SNAP_KEYINDEX
|SNAP_CONT
|SNAP_FRAME
));
548 J
->framedepth
+= ((sn
& (SNAP_CONT
|SNAP_FRAME
)) && (s
!= LJ_FR2
));
549 if ((sn
& SNAP_FRAME
))
553 IRIns
*irlast
= &T
->ir
[snap
->ref
];
555 /* Emit dependent PVALs. */
556 for (n
= 0; n
< nent
; n
++) {
557 SnapEntry sn
= map
[n
];
558 IRRef refp
= snap_ref(sn
);
559 IRIns
*ir
= &T
->ir
[refp
];
560 if (regsp_reg(ir
->r
) == RID_SUNK
) {
562 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) continue;
564 lj_assertJ(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
565 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
,
566 "sunk parent IR %04d has bad op %d", refp
- REF_BIAS
, ir
->o
);
567 m
= lj_ir_mode
[ir
->o
];
568 if (irm_op1(m
) == IRMref
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
569 if (irm_op2(m
) == IRMref
) snap_pref(J
, T
, map
, nent
, seen
, ir
->op2
);
570 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
571 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
)
572 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
);
575 for (irs
= ir
+1; irs
< irlast
; irs
++)
576 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
577 if (snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
) == 0)
578 snap_pref(J
, T
, map
, nent
, seen
, T
->ir
[irs
->op2
].op1
);
579 else if ((LJ_SOFTFP32
|| (LJ_32
&& LJ_HASFFI
)) &&
580 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
)
581 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
);
584 } else if (!irref_isk(refp
) && !regsp_used(ir
->prev
)) {
585 lj_assertJ(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
,
586 "sunk parent IR %04d has bad op %d", refp
- REF_BIAS
, ir
->o
);
587 J
->slot
[snap_slot(sn
)] = snap_pref(J
, T
, map
, nent
, seen
, ir
->op1
);
590 /* Replay sunk instructions. */
591 for (n
= 0; pass23
&& n
< nent
; n
++) {
592 SnapEntry sn
= map
[n
];
593 IRRef refp
= snap_ref(sn
);
594 IRIns
*ir
= &T
->ir
[refp
];
595 if (regsp_reg(ir
->r
) == RID_SUNK
) {
598 if (J
->slot
[snap_slot(sn
)] != snap_slot(sn
)) { /* De-dup allocs. */
599 J
->slot
[snap_slot(sn
)] = J
->slot
[J
->slot
[snap_slot(sn
)]];
603 m
= lj_ir_mode
[ir
->o
];
604 if (irm_op1(m
) == IRMref
) op1
= snap_pref(J
, T
, map
, nent
, seen
, op1
);
606 if (irm_op2(m
) == IRMref
) op2
= snap_pref(J
, T
, map
, nent
, seen
, op2
);
607 if (LJ_HASFFI
&& ir
->o
== IR_CNEWI
) {
608 if (LJ_32
&& refp
+1 < T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
609 lj_needsplit(J
); /* Emit joining HIOP. */
610 op2
= emitir_raw(IRT(IR_HIOP
, IRT_I64
), op2
,
611 snap_pref(J
, T
, map
, nent
, seen
, (ir
+1)->op2
));
613 J
->slot
[snap_slot(sn
)] = emitir(ir
->ot
& ~(IRT_MARK
|IRT_ISPHI
), op1
, op2
);
616 TRef tr
= emitir(ir
->ot
, op1
, op2
);
617 J
->slot
[snap_slot(sn
)] = tr
;
618 for (irs
= ir
+1; irs
< irlast
; irs
++)
619 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
620 IRIns
*irr
= &T
->ir
[irs
->op1
];
621 TRef val
, key
= irr
->op2
, tmp
= tr
;
622 if (irr
->o
!= IR_FREF
) {
623 IRIns
*irk
= &T
->ir
[key
];
624 if (irr
->o
== IR_HREFK
)
625 key
= lj_ir_kslot(J
, snap_replay_const(J
, &T
->ir
[irk
->op1
]),
628 key
= snap_replay_const(J
, irk
);
629 if (irr
->o
== IR_HREFK
|| irr
->o
== IR_AREF
) {
630 IRIns
*irf
= &T
->ir
[irr
->op1
];
631 tmp
= emitir(irf
->ot
, tmp
, irf
->op2
);
632 } else if (irr
->o
== IR_NEWREF
) {
633 IRRef allocref
= tref_ref(tr
);
634 IRRef keyref
= tref_ref(key
);
635 IRRef newref_ref
= J
->chain
[IR_NEWREF
];
636 IRIns
*newref
= &J
->cur
.ir
[newref_ref
];
637 lj_assertJ(irref_isk(keyref
),
638 "sunk store for parent IR %04d with bad key %04d",
639 refp
- REF_BIAS
, keyref
- REF_BIAS
);
640 if (newref_ref
> allocref
&& newref
->op2
== keyref
) {
641 lj_assertJ(newref
->op1
== allocref
,
642 "sunk store for parent IR %04d with bad tab %04d",
643 refp
- REF_BIAS
, allocref
- REF_BIAS
);
649 tmp
= emitir(irr
->ot
, tmp
, key
);
651 val
= snap_pref(J
, T
, map
, nent
, seen
, irs
->op2
);
653 IRIns
*irc
= &T
->ir
[irs
->op2
];
654 lj_assertJ(irc
->o
== IR_CONV
&& irc
->op2
== IRCONV_NUM_INT
,
655 "sunk store for parent IR %04d with bad op %d",
656 refp
- REF_BIAS
, irc
->o
);
657 val
= snap_pref(J
, T
, map
, nent
, seen
, irc
->op1
);
658 val
= emitir(IRTN(IR_CONV
), val
, IRCONV_NUM_INT
);
659 } else if ((LJ_SOFTFP32
|| (LJ_32
&& LJ_HASFFI
)) &&
660 irs
+1 < irlast
&& (irs
+1)->o
== IR_HIOP
) {
662 if (LJ_SOFTFP32
&& irt_type((irs
+1)->t
) == IRT_SOFTFP
)
665 if (irref_isk(irs
->op2
) && irref_isk((irs
+1)->op2
)) {
666 uint64_t k
= (uint32_t)T
->ir
[irs
->op2
].i
+
667 ((uint64_t)T
->ir
[(irs
+1)->op2
].i
<< 32);
668 val
= lj_ir_k64(J
, t
== IRT_I64
? IR_KINT64
: IR_KNUM
, k
);
670 val
= emitir_raw(IRT(IR_HIOP
, t
), val
,
671 snap_pref(J
, T
, map
, nent
, seen
, (irs
+1)->op2
));
673 tmp
= emitir(IRT(irs
->o
, t
), tmp
, val
);
676 tmp
= emitir(irs
->ot
, tmp
, val
);
677 } else if (LJ_HASFFI
&& irs
->o
== IR_XBAR
&& ir
->o
== IR_CNEW
) {
678 emitir(IRT(IR_XBAR
, IRT_NIL
), 0, 0);
684 J
->base
= J
->slot
+ J
->baseslot
;
685 J
->maxslot
= snap
->nslots
- J
->baseslot
;
687 if (pass23
) /* Need explicit GC step _after_ initial snapshot. */
688 emitir_raw(IRTG(IR_GCSTEP
, IRT_NIL
), 0, 0);
691 /* -- Snapshot restore ---------------------------------------------------- */
693 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
694 SnapNo snapno
, BloomFilter rfilt
,
695 IRIns
*ir
, TValue
*o
);
697 /* Restore a value from the trace exit state. */
698 static void snap_restoreval(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
699 SnapNo snapno
, BloomFilter rfilt
,
700 IRRef ref
, TValue
*o
)
702 IRIns
*ir
= &T
->ir
[ref
];
705 if (irref_isk(ref
)) { /* Restore constant slot. */
706 if (ir
->o
== IR_KPTR
) {
707 o
->u64
= (uint64_t)(uintptr_t)ir_kptr(ir
);
709 lj_assertJ(!(ir
->o
== IR_KKPTR
|| ir
->o
== IR_KNULL
),
710 "restore of const from IR %04d with bad op %d",
711 ref
- REF_BIAS
, ir
->o
);
712 lj_ir_kvalue(J
->L
, o
, ir
);
716 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
717 rs
= snap_renameref(T
, snapno
, ref
, rs
);
718 if (ra_hasspill(regsp_spill(rs
))) { /* Restore from spill slot. */
719 int32_t *sps
= &ex
->spill
[regsp_spill(rs
)];
720 if (irt_isinteger(t
)) {
723 } else if (irt_isnum(t
)) {
724 o
->u64
= *(uint64_t *)sps
;
726 #if LJ_64 && !LJ_GC64
727 } else if (irt_islightud(t
)) {
728 /* 64 bit lightuserdata which may escape already has the tag bits. */
729 o
->u64
= *(uint64_t *)sps
;
732 lj_assertJ(!irt_ispri(t
), "PRI ref with spill slot");
733 setgcV(J
->L
, o
, (GCobj
*)(uintptr_t)*(GCSize
*)sps
, irt_toitype(t
));
735 } else { /* Restore from register. */
736 Reg r
= regsp_reg(rs
);
738 lj_assertJ(ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
,
739 "restore from IR %04d has no reg", ref
- REF_BIAS
);
740 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ir
->op1
, o
);
741 if (LJ_DUALNUM
) setnumV(o
, (lua_Number
)intV(o
));
743 } else if (irt_isinteger(t
)) {
744 setintV(o
, (int32_t)ex
->gpr
[r
-RID_MIN_GPR
]);
746 } else if (irt_isnum(t
)) {
747 setnumV(o
, ex
->fpr
[r
-RID_MIN_FPR
]);
748 #elif LJ_64 /* && LJ_SOFTFP */
749 } else if (irt_isnum(t
)) {
750 o
->u64
= ex
->gpr
[r
-RID_MIN_GPR
];
752 #if LJ_64 && !LJ_GC64
753 } else if (irt_is64(t
)) {
754 /* 64 bit values that already have the tag bits. */
755 o
->u64
= ex
->gpr
[r
-RID_MIN_GPR
];
757 } else if (irt_ispri(t
)) {
758 setpriV(o
, irt_toitype(t
));
760 setgcV(J
->L
, o
, (GCobj
*)ex
->gpr
[r
-RID_MIN_GPR
], irt_toitype(t
));
766 /* Restore raw data from the trace exit state. */
767 static void snap_restoredata(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
768 SnapNo snapno
, BloomFilter rfilt
,
769 IRRef ref
, void *dst
, CTSize sz
)
771 IRIns
*ir
= &T
->ir
[ref
];
776 if (irref_isk(ref
)) {
778 src
= (int32_t *)&ir
[1];
779 } else if (sz
== 8) {
780 tmp
= (uint64_t)(uint32_t)ir
->i
;
781 src
= (int32_t *)&tmp
;
786 if (LJ_UNLIKELY(bloomtest(rfilt
, ref
)))
787 rs
= snap_renameref(T
, snapno
, ref
, rs
);
788 if (ra_hasspill(regsp_spill(rs
))) {
789 src
= &ex
->spill
[regsp_spill(rs
)];
790 if (sz
== 8 && !irt_is64(ir
->t
)) {
791 tmp
= (uint64_t)(uint32_t)*src
;
792 src
= (int32_t *)&tmp
;
795 Reg r
= regsp_reg(rs
);
797 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
798 lj_assertJ(sz
== 8 && ir
->o
== IR_CONV
&& ir
->op2
== IRCONV_NUM_INT
,
799 "restore from IR %04d has no reg", ref
- REF_BIAS
);
800 snap_restoredata(J
, T
, ex
, snapno
, rfilt
, ir
->op1
, dst
, 4);
801 *(lua_Number
*)dst
= (lua_Number
)*(int32_t *)dst
;
805 if (r
>= RID_MAX_GPR
) {
806 src
= (int32_t *)&ex
->fpr
[r
-RID_MIN_FPR
];
808 if (sz
== 4) { /* PPC FPRs are always doubles. */
809 *(float *)dst
= (float)*(double *)src
;
813 if (LJ_BE
&& sz
== 4) src
++;
818 src
= (int32_t *)&ex
->gpr
[r
-RID_MIN_GPR
];
819 if (LJ_64
&& LJ_BE
&& sz
== 4) src
++;
823 lj_assertJ(sz
== 1 || sz
== 2 || sz
== 4 || sz
== 8,
824 "restore from IR %04d with bad size %d", ref
- REF_BIAS
, sz
);
825 if (sz
== 4) *(int32_t *)dst
= *src
;
826 else if (sz
== 8) *(int64_t *)dst
= *(int64_t *)src
;
827 else if (sz
== 1) *(int8_t *)dst
= (int8_t)*src
;
828 else *(int16_t *)dst
= (int16_t)*src
;
832 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
833 static void snap_unsink(jit_State
*J
, GCtrace
*T
, ExitState
*ex
,
834 SnapNo snapno
, BloomFilter rfilt
,
835 IRIns
*ir
, TValue
*o
)
837 lj_assertJ(ir
->o
== IR_TNEW
|| ir
->o
== IR_TDUP
||
838 ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
,
839 "sunk allocation with bad op %d", ir
->o
);
841 if (ir
->o
== IR_CNEW
|| ir
->o
== IR_CNEWI
) {
842 CTState
*cts
= ctype_cts(J
->L
);
843 CTypeID id
= (CTypeID
)T
->ir
[ir
->op1
].i
;
845 CTInfo info
= lj_ctype_info(cts
, id
, &sz
);
846 GCcdata
*cd
= lj_cdata_newx(cts
, id
, sz
, info
);
847 setcdataV(J
->L
, o
, cd
);
848 if (ir
->o
== IR_CNEWI
) {
849 uint8_t *p
= (uint8_t *)cdataptr(cd
);
850 lj_assertJ(sz
== 4 || sz
== 8, "sunk cdata with bad size %d", sz
);
851 if (LJ_32
&& sz
== 8 && ir
+1 < T
->ir
+ T
->nins
&& (ir
+1)->o
== IR_HIOP
) {
852 snap_restoredata(J
, T
, ex
, snapno
, rfilt
, (ir
+1)->op2
,
857 snap_restoredata(J
, T
, ex
, snapno
, rfilt
, ir
->op2
, p
, sz
);
859 IRIns
*irs
, *irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
860 for (irs
= ir
+1; irs
< irlast
; irs
++)
861 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
862 IRIns
*iro
= &T
->ir
[T
->ir
[irs
->op1
].op2
];
863 uint8_t *p
= (uint8_t *)cd
;
865 lj_assertJ(irs
->o
== IR_XSTORE
, "sunk store with bad op %d", irs
->o
);
866 lj_assertJ(T
->ir
[irs
->op1
].o
== IR_ADD
,
867 "sunk store with bad add op %d", T
->ir
[irs
->op1
].o
);
868 lj_assertJ(iro
->o
== IR_KINT
|| iro
->o
== IR_KINT64
,
869 "sunk store with bad const offset op %d", iro
->o
);
870 if (irt_is64(irs
->t
)) szs
= 8;
871 else if (irt_isi8(irs
->t
) || irt_isu8(irs
->t
)) szs
= 1;
872 else if (irt_isi16(irs
->t
) || irt_isu16(irs
->t
)) szs
= 2;
874 if (LJ_64
&& iro
->o
== IR_KINT64
)
875 p
+= (int64_t)ir_k64(iro
)->u64
;
878 lj_assertJ(p
>= (uint8_t *)cdataptr(cd
) &&
879 p
+ szs
<= (uint8_t *)cdataptr(cd
) + sz
,
880 "sunk store with offset out of range");
881 if (LJ_32
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
882 lj_assertJ(szs
== 4, "sunk store with bad size %d", szs
);
883 snap_restoredata(J
, T
, ex
, snapno
, rfilt
, (irs
+1)->op2
,
887 snap_restoredata(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, p
, szs
);
894 GCtab
*t
= ir
->o
== IR_TNEW
? lj_tab_new(J
->L
, ir
->op1
, ir
->op2
) :
895 lj_tab_dup(J
->L
, ir_ktab(&T
->ir
[ir
->op1
]));
897 irlast
= &T
->ir
[T
->snap
[snapno
].ref
];
898 for (irs
= ir
+1; irs
< irlast
; irs
++)
899 if (irs
->r
== RID_SINK
&& snap_sunk_store(T
, ir
, irs
)) {
900 IRIns
*irk
= &T
->ir
[irs
->op1
];
902 lj_assertJ(irs
->o
== IR_ASTORE
|| irs
->o
== IR_HSTORE
||
904 "sunk store with bad op %d", irs
->o
);
905 if (irk
->o
== IR_FREF
) {
908 if (T
->ir
[irs
->op2
].o
== IR_KNULL
) {
909 setgcrefnull(t
->metatable
);
911 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, &tmp
);
912 /* NOBARRIER: The table is new (marked white). */
913 setgcref(t
->metatable
, obj2gco(tabV(&tmp
)));
917 /* Negative metamethod cache invalidated by lj_tab_set() below. */
920 lj_assertJ(0, "sunk store with bad field %d", irk
->op2
);
924 irk
= &T
->ir
[irk
->op2
];
925 if (irk
->o
== IR_KSLOT
) irk
= &T
->ir
[irk
->op1
];
926 lj_ir_kvalue(J
->L
, &tmp
, irk
);
927 val
= lj_tab_set(J
->L
, t
, &tmp
);
928 /* NOBARRIER: The table is new (marked white). */
929 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, irs
->op2
, val
);
930 if (LJ_SOFTFP32
&& irs
+1 < T
->ir
+ T
->nins
&& (irs
+1)->o
== IR_HIOP
) {
931 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, (irs
+1)->op2
, &tmp
);
932 val
->u32
.hi
= tmp
.u32
.lo
;
939 /* Restore interpreter state from exit state with the help of a snapshot. */
940 const BCIns
*lj_snap_restore(jit_State
*J
, void *exptr
)
942 ExitState
*ex
= (ExitState
*)exptr
;
943 SnapNo snapno
= J
->exitno
; /* For now, snapno == exitno. */
944 GCtrace
*T
= traceref(J
, J
->parent
);
945 SnapShot
*snap
= &T
->snap
[snapno
];
946 MSize n
, nent
= snap
->nent
;
947 SnapEntry
*map
= &T
->snapmap
[snap
->mapofs
];
948 #if !LJ_FR2 || defined(LUA_USE_ASSERT)
949 SnapEntry
*flinks
= &T
->snapmap
[snap_nextofs(T
, snap
)-1-LJ_FR2
];
955 BloomFilter rfilt
= snap_renamefilter(T
, snapno
);
956 const BCIns
*pc
= snap_pc(&map
[nent
]);
959 /* Set interpreter PC to the next PC to get correct error messages.
960 ** But not for returns or tail calls, since pc+1 may be out-of-range.
962 setcframe_pc(L
->cframe
, bc_isret_or_tail(bc_op(*pc
)) ? pc
: pc
+1);
963 setcframe_pc(cframe_raw(cframe_prev(L
->cframe
)), pc
);
965 /* Make sure the stack is big enough for the slots from the snapshot. */
966 if (LJ_UNLIKELY(L
->base
+ snap
->topslot
>= tvref(L
->maxstack
))) {
967 L
->top
= curr_topL(L
);
968 lj_state_growstack(L
, snap
->topslot
- curr_proto(L
)->framesize
);
971 /* Fill stack slots with data from the registers and spill slots. */
972 frame
= L
->base
-1-LJ_FR2
;
974 ftsz0
= frame_ftsz(frame
); /* Preserve link to previous frame in slot #0. */
976 for (n
= 0; n
< nent
; n
++) {
977 SnapEntry sn
= map
[n
];
978 if (!(sn
& SNAP_NORESTORE
)) {
979 TValue
*o
= &frame
[snap_slot(sn
)];
980 IRRef ref
= snap_ref(sn
);
981 IRIns
*ir
= &T
->ir
[ref
];
982 if (ir
->r
== RID_SUNK
) {
984 for (j
= 0; j
< n
; j
++)
985 if (snap_ref(map
[j
]) == ref
) { /* De-duplicate sunk allocations. */
986 copyTV(L
, o
, &frame
[snap_slot(map
[j
])]);
989 snap_unsink(J
, T
, ex
, snapno
, rfilt
, ir
, o
);
993 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
, o
);
994 if (LJ_SOFTFP32
&& (sn
& SNAP_SOFTFPNUM
) && tvisint(o
)) {
996 snap_restoreval(J
, T
, ex
, snapno
, rfilt
, ref
+1, &tmp
);
997 o
->u32
.hi
= tmp
.u32
.lo
;
999 } else if ((sn
& (SNAP_CONT
|SNAP_FRAME
))) {
1000 /* Overwrite tag with frame link. */
1001 setframe_ftsz(o
, snap_slot(sn
) != 0 ? (int32_t)*flinks
-- : ftsz0
);
1004 } else if ((sn
& SNAP_KEYINDEX
)) {
1005 /* A IRT_INT key index slot is restored as a number. Undo this. */
1006 o
->u32
.lo
= (uint32_t)(LJ_DUALNUM
? intV(o
) : lj_num2int(numV(o
)));
1007 o
->u32
.hi
= LJ_KEYINDEX
;
1012 L
->base
+= (map
[nent
+LJ_BE
] & 0xff);
1014 lj_assertJ(map
+ nent
== flinks
, "inconsistent frames in snapshot");
1016 /* Compute current stack top. */
1017 switch (bc_op(*pc
)) {
1019 if (bc_op(*pc
) < BC_FUNCF
) {
1020 L
->top
= curr_topL(L
);
1024 case BC_CALLM
: case BC_CALLMT
: case BC_RETM
: case BC_TSETM
:
1025 L
->top
= frame
+ snap
->nslots
;