2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
22 #include "lj_ircall.h"
27 #include "lj_carith.h"
30 #include "lj_strscan.h"
31 #include "lj_strfmt.h"
33 /* Here's a short description how the FOLD engine processes instructions:
35 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
36 ** The instruction and its operands are used to select matching fold rules.
37 ** These are applied iteratively until a fixed point is reached.
39 ** The 8 bit opcode of the instruction itself plus the opcodes of the
40 ** two instructions referenced by its operands form a 24 bit key
41 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
43 ** This key is used for partial matching against the fold rules. The
44 ** left/right operand fields of the key are successively masked with
45 ** the 'any' wildcard, from most specific to least specific:
52 ** The masked key is used to lookup a matching fold rule in a semi-perfect
53 ** hash table. If a matching rule is found, the related fold function is run.
54 ** Multiple rules can share the same fold function. A fold rule may return
55 ** one of several special values:
57 ** - NEXTFOLD means no folding was applied, because an additional test
58 ** inside the fold function failed. Matching continues against less
59 ** specific fold rules. Finally the instruction is passed on to CSE.
61 ** - RETRYFOLD means the instruction was modified in-place. Folding is
62 ** retried as if this instruction had just been received.
64 ** All other return values are terminal actions -- no further folding is
67 ** - INTFOLD(i) returns a reference to the integer constant i.
69 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
70 ** without emitting an instruction.
72 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
73 ** it without passing through any further optimizations.
75 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
76 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
77 ** always fail, i.e. the current trace is pointless. DROPFOLD means
78 ** the guard is always true and has been eliminated. CONDFOLD is a
79 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
81 ** - Any other return value is interpreted as an IRRef or TRef. This
82 ** can be a reference to an existing or a newly created instruction.
83 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
84 ** which is finally returned to the caller.
86 ** The FOLD engine receives instructions both from the trace recorder and
87 ** substituted instructions from LOOP unrolling. This means all types
88 ** of instructions may end up here, even though the recorder bypasses
89 ** FOLD in some cases. Thus all loads, stores and allocations must have
90 ** an any/any rule to avoid being passed on to CSE.
92 ** Carefully read the following requirements before adding or modifying
95 ** Requirement #1: All fold rules must preserve their destination type.
97 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
98 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
100 ** Requirement #2: Fold rules should not create *new* instructions which
101 ** reference operands *across* PHIs.
103 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
104 ** left operand is a PHI. Then fleft->op1 would point across the PHI
105 ** frontier to an invariant instruction. Adding a PHI for this instruction
106 ** would be counterproductive. The solution is to add a barrier which
107 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
108 ** The only exception is for recurrences with high latencies like
109 ** repeated int->num->int conversions.
111 ** One could relax this condition a bit if the referenced instruction is
112 ** a PHI, too. But this often leads to worse code due to excessive
113 ** register shuffling.
115 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
116 ** Even returning fleft->op1 would be ok, because a new PHI will added,
117 ** if needed. But again, this leads to excessive register shuffling and
118 ** should be avoided.
120 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
123 ** The goal is optimization, so one primarily wants to add strength-reducing
124 ** rules. This means eliminating an instruction or replacing an instruction
125 ** with one or more simpler instructions. Don't add fold rules which point
126 ** into the other direction.
128 ** Some rules (like commutativity) do not directly reduce the strength of
129 ** an instruction, but enable other fold rules (e.g. by moving constants
130 ** to the right operand). These rules must be made unidirectional to avoid
133 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
136 /* Some local macros to save typing. Undef'd at the end. */
137 #define IR(ref) (&J->cur.ir[(ref)])
138 #define fins (&J->fold.ins)
139 #define fleft (J->fold.left)
140 #define fright (J->fold.right)
141 #define knumleft (ir_knum(fleft)->n)
142 #define knumright (ir_knum(fright)->n)
144 /* Pass IR on to next optimization in chain (FOLD). */
145 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
147 /* Fold function type. Fastcall on x86 significantly reduces their size. */
148 typedef IRRef (LJ_FASTCALL
*FoldFunc
)(jit_State
*J
);
150 /* Macros for the fold specs, so buildvm can recognize them. */
153 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
154 /* Note: They must be at the start of a line or buildvm ignores them! */
156 /* Barrier to prevent using operands across PHIs. */
157 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
159 /* Barrier to prevent folding across a GC step.
160 ** GC steps can only happen at the head of a trace and at LOOP.
161 ** And the GC is only driven forward if there's at least one allocation.
163 #define gcstep_barrier(J, ref) \
164 ((ref) < J->chain[IR_LOOP] && \
165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
170 /* -- Constant folding for FP numbers ------------------------------------- */
172 LJFOLD(ADD KNUM KNUM
)
173 LJFOLD(SUB KNUM KNUM
)
174 LJFOLD(MUL KNUM KNUM
)
175 LJFOLD(DIV KNUM KNUM
)
176 LJFOLD(LDEXP KNUM KNUM
)
177 LJFOLD(MIN KNUM KNUM
)
178 LJFOLD(MAX KNUM KNUM
)
179 LJFOLDF(kfold_numarith
)
181 lua_Number a
= knumleft
;
182 lua_Number b
= knumright
;
183 lua_Number y
= lj_vm_foldarith(a
, b
, fins
->o
- IR_ADD
);
184 return lj_ir_knum(J
, y
);
187 LJFOLD(NEG KNUM FLOAD
)
188 LJFOLD(ABS KNUM FLOAD
)
189 LJFOLDF(kfold_numabsneg
)
191 lua_Number a
= knumleft
;
192 lua_Number y
= lj_vm_foldarith(a
, a
, fins
->o
- IR_ADD
);
193 return lj_ir_knum(J
, y
);
196 LJFOLD(LDEXP KNUM KINT
)
199 #if LJ_TARGET_X86ORX64
203 return lj_ir_knum(J
, ldexp(knumleft
, fright
->i
));
207 LJFOLD(FPMATH KNUM any
)
208 LJFOLDF(kfold_fpmath
)
210 lua_Number a
= knumleft
;
211 lua_Number y
= lj_vm_foldfpm(a
, fins
->op2
);
212 return lj_ir_knum(J
, y
);
215 LJFOLD(CALLN KNUM any
)
216 LJFOLDF(kfold_fpcall1
)
218 const CCallInfo
*ci
= &lj_ir_callinfo
[fins
->op2
];
219 if (CCI_TYPE(ci
) == IRT_NUM
) {
220 double y
= ((double (*)(double))ci
->func
)(knumleft
);
221 return lj_ir_knum(J
, y
);
226 LJFOLD(CALLN CARG IRCALL_atan2
)
227 LJFOLDF(kfold_fpcall2
)
229 if (irref_isk(fleft
->op1
) && irref_isk(fleft
->op2
)) {
230 const CCallInfo
*ci
= &lj_ir_callinfo
[fins
->op2
];
231 double a
= ir_knum(IR(fleft
->op1
))->n
;
232 double b
= ir_knum(IR(fleft
->op2
))->n
;
233 double y
= ((double (*)(double, double))ci
->func
)(a
, b
);
234 return lj_ir_knum(J
, y
);
239 LJFOLD(POW KNUM KNUM
)
240 LJFOLDF(kfold_numpow
)
242 return lj_ir_knum(J
, lj_vm_foldarith(knumleft
, knumright
, IR_POW
- IR_ADD
));
245 /* Must not use kfold_kref for numbers (could be NaN). */
252 LJFOLD(ULT KNUM KNUM
)
253 LJFOLD(UGE KNUM KNUM
)
254 LJFOLD(ULE KNUM KNUM
)
255 LJFOLD(UGT KNUM KNUM
)
256 LJFOLDF(kfold_numcomp
)
258 return CONDFOLD(lj_ir_numcmp(knumleft
, knumright
, (IROp
)fins
->o
));
261 /* -- Constant folding for 32 bit integers -------------------------------- */
263 static int32_t kfold_intop(int32_t k1
, int32_t k2
, IROp op
)
266 case IR_ADD
: k1
+= k2
; break;
267 case IR_SUB
: k1
-= k2
; break;
268 case IR_MUL
: k1
*= k2
; break;
269 case IR_MOD
: k1
= lj_vm_modi(k1
, k2
); break;
270 case IR_NEG
: k1
= (int32_t)(~(uint32_t)k1
+1u); break;
271 case IR_BAND
: k1
&= k2
; break;
272 case IR_BOR
: k1
|= k2
; break;
273 case IR_BXOR
: k1
^= k2
; break;
274 case IR_BSHL
: k1
<<= (k2
& 31); break;
275 case IR_BSHR
: k1
= (int32_t)((uint32_t)k1
>> (k2
& 31)); break;
276 case IR_BSAR
: k1
>>= (k2
& 31); break;
277 case IR_BROL
: k1
= (int32_t)lj_rol((uint32_t)k1
, (k2
& 31)); break;
278 case IR_BROR
: k1
= (int32_t)lj_ror((uint32_t)k1
, (k2
& 31)); break;
279 case IR_MIN
: k1
= k1
< k2
? k1
: k2
; break;
280 case IR_MAX
: k1
= k1
> k2
? k1
: k2
; break;
281 default: lj_assertX(0, "bad IR op %d", op
); break;
286 LJFOLD(ADD KINT KINT
)
287 LJFOLD(SUB KINT KINT
)
288 LJFOLD(MUL KINT KINT
)
289 LJFOLD(MOD KINT KINT
)
290 LJFOLD(NEG KINT KINT
)
291 LJFOLD(BAND KINT KINT
)
292 LJFOLD(BOR KINT KINT
)
293 LJFOLD(BXOR KINT KINT
)
294 LJFOLD(BSHL KINT KINT
)
295 LJFOLD(BSHR KINT KINT
)
296 LJFOLD(BSAR KINT KINT
)
297 LJFOLD(BROL KINT KINT
)
298 LJFOLD(BROR KINT KINT
)
299 LJFOLD(MIN KINT KINT
)
300 LJFOLD(MAX KINT KINT
)
301 LJFOLDF(kfold_intarith
)
303 return INTFOLD(kfold_intop(fleft
->i
, fright
->i
, (IROp
)fins
->o
));
306 LJFOLD(ADDOV KINT KINT
)
307 LJFOLD(SUBOV KINT KINT
)
308 LJFOLD(MULOV KINT KINT
)
309 LJFOLDF(kfold_intovarith
)
311 lua_Number n
= lj_vm_foldarith((lua_Number
)fleft
->i
, (lua_Number
)fright
->i
,
313 int32_t k
= lj_num2int(n
);
314 if (n
!= (lua_Number
)k
)
322 return INTFOLD(~fleft
->i
);
328 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft
->i
));
335 LJFOLD(ULT KINT KINT
)
336 LJFOLD(UGE KINT KINT
)
337 LJFOLD(ULE KINT KINT
)
338 LJFOLD(UGT KINT KINT
)
339 LJFOLD(ABC KINT KINT
)
340 LJFOLDF(kfold_intcomp
)
342 int32_t a
= fleft
->i
, b
= fright
->i
;
343 switch ((IROp
)fins
->o
) {
344 case IR_LT
: return CONDFOLD(a
< b
);
345 case IR_GE
: return CONDFOLD(a
>= b
);
346 case IR_LE
: return CONDFOLD(a
<= b
);
347 case IR_GT
: return CONDFOLD(a
> b
);
348 case IR_ULT
: return CONDFOLD((uint32_t)a
< (uint32_t)b
);
349 case IR_UGE
: return CONDFOLD((uint32_t)a
>= (uint32_t)b
);
350 case IR_ULE
: return CONDFOLD((uint32_t)a
<= (uint32_t)b
);
352 case IR_UGT
: return CONDFOLD((uint32_t)a
> (uint32_t)b
);
353 default: lj_assertJ(0, "bad IR op %d", fins
->o
); return FAILFOLD
;
358 LJFOLDF(kfold_intcomp0
)
365 /* -- Constant folding for 64 bit integers -------------------------------- */
367 static uint64_t kfold_int64arith(jit_State
*J
, uint64_t k1
, uint64_t k2
,
373 case IR_ADD
: k1
+= k2
; break;
374 case IR_SUB
: k1
-= k2
; break;
375 case IR_MUL
: k1
*= k2
; break;
376 case IR_BAND
: k1
&= k2
; break;
377 case IR_BOR
: k1
|= k2
; break;
378 case IR_BXOR
: k1
^= k2
; break;
379 case IR_BSHL
: k1
<<= (k2
& 63); break;
380 case IR_BSHR
: k1
>>= (k2
& 63); break;
381 case IR_BSAR
: k1
= (uint64_t)((int64_t)k1
>> (k2
& 63)); break;
382 case IR_BROL
: k1
= lj_rol(k1
, (k2
& 63)); break;
383 case IR_BROR
: k1
= lj_ror(k1
, (k2
& 63)); break;
384 default: lj_assertJ(0, "bad IR op %d", op
); break;
387 UNUSED(k2
); UNUSED(op
);
388 lj_assertJ(0, "FFI IR op without FFI");
393 LJFOLD(ADD KINT64 KINT64
)
394 LJFOLD(SUB KINT64 KINT64
)
395 LJFOLD(MUL KINT64 KINT64
)
396 LJFOLD(BAND KINT64 KINT64
)
397 LJFOLD(BOR KINT64 KINT64
)
398 LJFOLD(BXOR KINT64 KINT64
)
399 LJFOLDF(kfold_int64arith
)
401 return INT64FOLD(kfold_int64arith(J
, ir_k64(fleft
)->u64
,
402 ir_k64(fright
)->u64
, (IROp
)fins
->o
));
405 LJFOLD(DIV KINT64 KINT64
)
406 LJFOLD(MOD KINT64 KINT64
)
407 LJFOLD(POW KINT64 KINT64
)
408 LJFOLDF(kfold_int64arith2
)
411 uint64_t k1
= ir_k64(fleft
)->u64
, k2
= ir_k64(fright
)->u64
;
412 if (irt_isi64(fins
->t
)) {
413 k1
= fins
->o
== IR_DIV
? lj_carith_divi64((int64_t)k1
, (int64_t)k2
) :
414 fins
->o
== IR_MOD
? lj_carith_modi64((int64_t)k1
, (int64_t)k2
) :
415 lj_carith_powi64((int64_t)k1
, (int64_t)k2
);
417 k1
= fins
->o
== IR_DIV
? lj_carith_divu64(k1
, k2
) :
418 fins
->o
== IR_MOD
? lj_carith_modu64(k1
, k2
) :
419 lj_carith_powu64(k1
, k2
);
421 return INT64FOLD(k1
);
423 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
427 LJFOLD(BSHL KINT64 KINT
)
428 LJFOLD(BSHR KINT64 KINT
)
429 LJFOLD(BSAR KINT64 KINT
)
430 LJFOLD(BROL KINT64 KINT
)
431 LJFOLD(BROR KINT64 KINT
)
432 LJFOLDF(kfold_int64shift
)
435 uint64_t k
= ir_k64(fleft
)->u64
;
436 int32_t sh
= (fright
->i
& 63);
437 return INT64FOLD(lj_carith_shift64(k
, sh
, fins
->o
- IR_BSHL
));
439 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
444 LJFOLDF(kfold_bnot64
)
447 return INT64FOLD(~ir_k64(fleft
)->u64
);
449 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
454 LJFOLDF(kfold_bswap64
)
457 return INT64FOLD(lj_bswap64(ir_k64(fleft
)->u64
));
459 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
463 LJFOLD(LT KINT64 KINT64
)
464 LJFOLD(GE KINT64 KINT64
)
465 LJFOLD(LE KINT64 KINT64
)
466 LJFOLD(GT KINT64 KINT64
)
467 LJFOLD(ULT KINT64 KINT64
)
468 LJFOLD(UGE KINT64 KINT64
)
469 LJFOLD(ULE KINT64 KINT64
)
470 LJFOLD(UGT KINT64 KINT64
)
471 LJFOLDF(kfold_int64comp
)
474 uint64_t a
= ir_k64(fleft
)->u64
, b
= ir_k64(fright
)->u64
;
475 switch ((IROp
)fins
->o
) {
476 case IR_LT
: return CONDFOLD((int64_t)a
< (int64_t)b
);
477 case IR_GE
: return CONDFOLD((int64_t)a
>= (int64_t)b
);
478 case IR_LE
: return CONDFOLD((int64_t)a
<= (int64_t)b
);
479 case IR_GT
: return CONDFOLD((int64_t)a
> (int64_t)b
);
480 case IR_ULT
: return CONDFOLD(a
< b
);
481 case IR_UGE
: return CONDFOLD(a
>= b
);
482 case IR_ULE
: return CONDFOLD(a
<= b
);
483 case IR_UGT
: return CONDFOLD(a
> b
);
484 default: lj_assertJ(0, "bad IR op %d", fins
->o
); return FAILFOLD
;
487 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
491 LJFOLD(UGE any KINT64
)
492 LJFOLDF(kfold_int64comp0
)
495 if (ir_k64(fright
)->u64
== 0)
499 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
503 /* -- Constant folding for strings ---------------------------------------- */
505 LJFOLD(SNEW KKPTR KINT
)
506 LJFOLDF(kfold_snew_kptr
)
508 GCstr
*s
= lj_str_new(J
->L
, (const char *)ir_kptr(fleft
), (size_t)fright
->i
);
509 return lj_ir_kstr(J
, s
);
512 LJFOLD(SNEW any KINT
)
513 LJFOLD(XSNEW any KINT
)
514 LJFOLDF(kfold_snew_empty
)
517 return lj_ir_kstr(J
, &J2G(J
)->strempty
);
521 LJFOLD(STRREF KGC KINT
)
522 LJFOLDF(kfold_strref
)
524 GCstr
*str
= ir_kstr(fleft
);
525 lj_assertJ((MSize
)fright
->i
<= str
->len
, "bad string ref");
526 return lj_ir_kkptr(J
, (char *)strdata(str
) + fright
->i
);
529 LJFOLD(STRREF SNEW any
)
530 LJFOLDF(kfold_strref_snew
)
533 if (irref_isk(fins
->op2
) && fright
->i
== 0) {
534 return fleft
->op1
; /* strref(snew(ptr, len), 0) ==> ptr */
536 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
537 IRIns
*ir
= IR(fleft
->op1
);
538 if (ir
->o
== IR_STRREF
) {
539 IRRef1 str
= ir
->op1
; /* IRIns * is not valid across emitir. */
541 fins
->op2
= emitir(IRTI(IR_ADD
), ir
->op2
, fins
->op2
); /* Clobbers fins! */
543 fins
->ot
= IRT(IR_STRREF
, IRT_PGC
);
550 LJFOLD(CALLN CARG IRCALL_lj_str_cmp
)
551 LJFOLDF(kfold_strcmp
)
553 if (irref_isk(fleft
->op1
) && irref_isk(fleft
->op2
)) {
554 GCstr
*a
= ir_kstr(IR(fleft
->op1
));
555 GCstr
*b
= ir_kstr(IR(fleft
->op2
));
556 return INTFOLD(lj_str_cmp(a
, b
));
561 /* -- Constant folding and forwarding for buffers ------------------------- */
564 ** Buffer ops perform stores, but their effect is limited to the buffer
565 ** itself. Also, buffer ops are chained: a use of an op implies a use of
566 ** all other ops up the chain. Conversely, if an op is unused, all ops
567 ** up the chain can go unsed. This largely eliminates the need to treat
570 ** Alas, treating them as normal (IRM_N) ops doesn't work, because they
571 ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
572 ** or if FOLD is disabled.
574 ** The compromise is to declare them as loads, emit them like stores and
575 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
576 ** fragments left over from CSE are eliminated by DCE.
578 ** The string buffer methods emit a USE instead of a BUFSTR to keep the
582 LJFOLD(BUFHDR any any
)
583 LJFOLDF(bufhdr_merge
)
585 return fins
->op2
== IRBUFHDR_WRITE
? CSEFOLD
: EMITFOLD
;
588 LJFOLD(BUFPUT any BUFSTR
)
589 LJFOLDF(bufput_bufstr
)
591 if ((J
->flags
& JIT_F_OPT_FWD
)) {
592 IRRef hdr
= fright
->op2
;
593 /* New buffer, no other buffer op inbetween and same buffer? */
594 if (fleft
->o
== IR_BUFHDR
&& fleft
->op2
== IRBUFHDR_RESET
&&
595 fleft
->prev
== hdr
&&
596 fleft
->op1
== IR(hdr
)->op1
&&
597 !(irt_isphi(fright
->t
) && IR(hdr
)->prev
) &&
598 (!LJ_HASBUFFER
|| J
->chain
[IR_CALLA
] < hdr
)) {
599 IRRef ref
= fins
->op1
;
600 IR(ref
)->op2
= IRBUFHDR_APPEND
; /* Modify BUFHDR. */
601 IR(ref
)->op1
= fright
->op1
;
604 /* Replay puts to global temporary buffer. */
605 if (IR(hdr
)->op2
== IRBUFHDR_RESET
&& !irt_isphi(fright
->t
)) {
606 IRIns
*ir
= IR(fright
->op1
);
607 /* For now only handle single string.reverse .lower .upper .rep. */
608 if (ir
->o
== IR_CALLL
&&
609 ir
->op2
>= IRCALL_lj_buf_putstr_reverse
&&
610 ir
->op2
<= IRCALL_lj_buf_putstr_rep
) {
611 IRIns
*carg1
= IR(ir
->op1
);
612 if (ir
->op2
== IRCALL_lj_buf_putstr_rep
) {
613 IRIns
*carg2
= IR(carg1
->op1
);
614 if (carg2
->op1
== hdr
) {
615 return lj_ir_call(J
, ir
->op2
, fins
->op1
, carg2
->op2
, carg1
->op2
);
617 } else if (carg1
->op1
== hdr
) {
618 return lj_ir_call(J
, ir
->op2
, fins
->op1
, carg1
->op2
);
623 return EMITFOLD
; /* Always emit, CSE later. */
626 LJFOLD(BUFPUT any any
)
629 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && fright
->o
== IR_KGC
) {
630 GCstr
*s2
= ir_kstr(fright
);
631 if (s2
->len
== 0) { /* Empty string? */
634 if (fleft
->o
== IR_BUFPUT
&& irref_isk(fleft
->op2
) &&
635 !irt_isphi(fleft
->t
)) { /* Join two constant string puts in a row. */
636 GCstr
*s1
= ir_kstr(IR(fleft
->op2
));
637 IRRef kref
= lj_ir_kstr(J
, lj_buf_cat2str(J
->L
, s1
, s2
));
638 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
639 IR(fins
->op1
)->op2
= kref
; /* Modify previous BUFPUT. */
644 return EMITFOLD
; /* Always emit, CSE later. */
647 LJFOLD(BUFSTR any any
)
648 LJFOLDF(bufstr_kfold_cse
)
650 lj_assertJ(fleft
->o
== IR_BUFHDR
|| fleft
->o
== IR_BUFPUT
||
651 fleft
->o
== IR_CALLL
,
652 "bad buffer constructor IR op %d", fleft
->o
);
653 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
)) {
654 if (fleft
->o
== IR_BUFHDR
) { /* No put operations? */
655 if (fleft
->op2
== IRBUFHDR_RESET
) /* Empty buffer? */
656 return lj_ir_kstr(J
, &J2G(J
)->strempty
);
657 fins
->op1
= fleft
->op1
;
658 fins
->op2
= fleft
->prev
; /* Relies on checks in bufput_append. */
660 } else if (fleft
->o
== IR_BUFPUT
) {
661 IRIns
*irb
= IR(fleft
->op1
);
662 if (irb
->o
== IR_BUFHDR
&& irb
->op2
== IRBUFHDR_RESET
)
663 return fleft
->op2
; /* Shortcut for a single put operation. */
666 /* Try to CSE the whole chain. */
667 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
668 IRRef ref
= J
->chain
[IR_BUFSTR
];
670 IRIns
*irs
= IR(ref
), *ira
= fleft
, *irb
= IR(irs
->op1
);
671 while (ira
->o
== irb
->o
&& ira
->op2
== irb
->op2
) {
672 lj_assertJ(ira
->o
== IR_BUFHDR
|| ira
->o
== IR_BUFPUT
||
673 ira
->o
== IR_CALLL
|| ira
->o
== IR_CARG
,
674 "bad buffer constructor IR op %d", ira
->o
);
675 if (ira
->o
== IR_BUFHDR
&& ira
->op2
== IRBUFHDR_RESET
)
676 return ref
; /* CSE succeeded. */
677 if (ira
->o
== IR_CALLL
&& ira
->op2
== IRCALL_lj_buf_puttab
)
685 return EMITFOLD
; /* No CSE possible. */
688 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse
)
689 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper
)
690 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower
)
691 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted
)
692 LJFOLDF(bufput_kfold_op
)
694 if (irref_isk(fleft
->op2
)) {
695 const CCallInfo
*ci
= &lj_ir_callinfo
[fins
->op2
];
696 SBuf
*sb
= lj_buf_tmp_(J
->L
);
697 sb
= ((SBuf
* (LJ_FASTCALL
*)(SBuf
*, GCstr
*))ci
->func
)(sb
,
698 ir_kstr(IR(fleft
->op2
)));
700 fins
->op1
= fleft
->op1
;
701 fins
->op2
= lj_ir_kstr(J
, lj_buf_tostr(sb
));
704 return EMITFOLD
; /* Always emit, CSE later. */
707 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep
)
708 LJFOLDF(bufput_kfold_rep
)
710 if (irref_isk(fleft
->op2
)) {
711 IRIns
*irc
= IR(fleft
->op1
);
712 if (irref_isk(irc
->op2
)) {
713 SBuf
*sb
= lj_buf_tmp_(J
->L
);
714 sb
= lj_buf_putstr_rep(sb
, ir_kstr(IR(irc
->op2
)), IR(fleft
->op2
)->i
);
716 fins
->op1
= irc
->op1
;
717 fins
->op2
= lj_ir_kstr(J
, lj_buf_tostr(sb
));
721 return EMITFOLD
; /* Always emit, CSE later. */
724 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint
)
725 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int
)
726 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint
)
727 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum
)
728 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr
)
729 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar
)
730 LJFOLDF(bufput_kfold_fmt
)
732 IRIns
*irc
= IR(fleft
->op1
);
733 lj_assertJ(irref_isk(irc
->op2
), "SFormat must be const");
734 if (irref_isk(fleft
->op2
)) {
735 SFormat sf
= (SFormat
)IR(irc
->op2
)->i
;
736 IRIns
*ira
= IR(fleft
->op2
);
737 SBuf
*sb
= lj_buf_tmp_(J
->L
);
739 case IRCALL_lj_strfmt_putfxint
:
740 sb
= lj_strfmt_putfxint(sb
, sf
, ir_k64(ira
)->u64
);
742 case IRCALL_lj_strfmt_putfstr
:
743 sb
= lj_strfmt_putfstr(sb
, sf
, ir_kstr(ira
));
745 case IRCALL_lj_strfmt_putfchar
:
746 sb
= lj_strfmt_putfchar(sb
, sf
, ira
->i
);
748 case IRCALL_lj_strfmt_putfnum_int
:
749 case IRCALL_lj_strfmt_putfnum_uint
:
750 case IRCALL_lj_strfmt_putfnum
:
752 const CCallInfo
*ci
= &lj_ir_callinfo
[fins
->op2
];
753 sb
= ((SBuf
* (*)(SBuf
*, SFormat
, lua_Number
))ci
->func
)(sb
, sf
,
759 fins
->op1
= irc
->op1
;
760 fins
->op2
= lj_ir_kstr(J
, lj_buf_tostr(sb
));
763 return EMITFOLD
; /* Always emit, CSE later. */
766 /* -- Constant folding of pointer arithmetic ------------------------------ */
769 LJFOLD(ADD KGC KINT64
)
770 LJFOLDF(kfold_add_kgc
)
772 GCobj
*o
= ir_kgc(fleft
);
774 ptrdiff_t ofs
= (ptrdiff_t)ir_kint64(fright
)->u64
;
776 ptrdiff_t ofs
= fright
->i
;
779 if (irt_iscdata(fleft
->t
)) {
780 CType
*ct
= ctype_raw(ctype_ctsG(J2G(J
)), gco2cd(o
)->ctypeid
);
781 if (ctype_isnum(ct
->info
) || ctype_isenum(ct
->info
) ||
782 ctype_isptr(ct
->info
) || ctype_isfunc(ct
->info
) ||
783 ctype_iscomplex(ct
->info
) || ctype_isvector(ct
->info
))
784 return lj_ir_kkptr(J
, (char *)o
+ ofs
);
787 return lj_ir_kptr(J
, (char *)o
+ ofs
);
790 LJFOLD(ADD KPTR KINT
)
791 LJFOLD(ADD KPTR KINT64
)
792 LJFOLD(ADD KKPTR KINT
)
793 LJFOLD(ADD KKPTR KINT64
)
794 LJFOLDF(kfold_add_kptr
)
796 void *p
= ir_kptr(fleft
);
798 ptrdiff_t ofs
= (ptrdiff_t)ir_kint64(fright
)->u64
;
800 ptrdiff_t ofs
= fright
->i
;
802 return lj_ir_kptr_(J
, fleft
->o
, (char *)p
+ ofs
);
807 LJFOLD(ADD any KKPTR
)
808 LJFOLDF(kfold_add_kright
)
810 if (fleft
->o
== IR_KINT
|| fleft
->o
== IR_KINT64
) {
811 IRRef1 tmp
= fins
->op1
; fins
->op1
= fins
->op2
; fins
->op2
= tmp
;
817 /* -- Constant folding of conversions ------------------------------------- */
819 LJFOLD(TOBIT KNUM KNUM
)
822 return INTFOLD(lj_num2bit(knumleft
));
825 LJFOLD(CONV KINT IRCONV_NUM_INT
)
826 LJFOLDF(kfold_conv_kint_num
)
828 return lj_ir_knum(J
, (lua_Number
)fleft
->i
);
831 LJFOLD(CONV KINT IRCONV_NUM_U32
)
832 LJFOLDF(kfold_conv_kintu32_num
)
834 return lj_ir_knum(J
, (lua_Number
)(uint32_t)fleft
->i
);
837 LJFOLD(CONV KINT IRCONV_INT_I8
)
838 LJFOLD(CONV KINT IRCONV_INT_U8
)
839 LJFOLD(CONV KINT IRCONV_INT_I16
)
840 LJFOLD(CONV KINT IRCONV_INT_U16
)
841 LJFOLDF(kfold_conv_kint_ext
)
843 int32_t k
= fleft
->i
;
844 if ((fins
->op2
& IRCONV_SRCMASK
) == IRT_I8
) k
= (int8_t)k
;
845 else if ((fins
->op2
& IRCONV_SRCMASK
) == IRT_U8
) k
= (uint8_t)k
;
846 else if ((fins
->op2
& IRCONV_SRCMASK
) == IRT_I16
) k
= (int16_t)k
;
847 else k
= (uint16_t)k
;
851 LJFOLD(CONV KINT IRCONV_I64_INT
)
852 LJFOLD(CONV KINT IRCONV_U64_INT
)
853 LJFOLD(CONV KINT IRCONV_I64_U32
)
854 LJFOLD(CONV KINT IRCONV_U64_U32
)
855 LJFOLDF(kfold_conv_kint_i64
)
857 if ((fins
->op2
& IRCONV_SEXT
))
858 return INT64FOLD((uint64_t)(int64_t)fleft
->i
);
860 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft
->i
);
863 LJFOLD(CONV KINT64 IRCONV_NUM_I64
)
864 LJFOLDF(kfold_conv_kint64_num_i64
)
866 return lj_ir_knum(J
, (lua_Number
)(int64_t)ir_kint64(fleft
)->u64
);
869 LJFOLD(CONV KINT64 IRCONV_NUM_U64
)
870 LJFOLDF(kfold_conv_kint64_num_u64
)
872 return lj_ir_knum(J
, (lua_Number
)ir_kint64(fleft
)->u64
);
875 LJFOLD(CONV KINT64 IRCONV_INT_I64
)
876 LJFOLD(CONV KINT64 IRCONV_U32_I64
)
877 LJFOLDF(kfold_conv_kint64_int_i64
)
879 return INTFOLD((int32_t)ir_kint64(fleft
)->u64
);
882 LJFOLD(CONV KNUM IRCONV_INT_NUM
)
883 LJFOLDF(kfold_conv_knum_int_num
)
885 lua_Number n
= knumleft
;
886 int32_t k
= lj_num2int(n
);
887 if (irt_isguard(fins
->t
) && n
!= (lua_Number
)k
) {
888 /* We're about to create a guard which always fails, like CONV +1.5.
889 ** Some pathological loops cause this during LICM, e.g.:
890 ** local x,k,t = 0,1.5,{1,[1.5]=2}
891 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
899 LJFOLD(CONV KNUM IRCONV_U32_NUM
)
900 LJFOLDF(kfold_conv_knum_u32_num
)
903 { /* Workaround for MSVC bug. */
904 volatile uint32_t u
= (uint32_t)knumleft
;
905 return INTFOLD((int32_t)u
);
908 return INTFOLD((int32_t)(uint32_t)knumleft
);
912 LJFOLD(CONV KNUM IRCONV_I64_NUM
)
913 LJFOLDF(kfold_conv_knum_i64_num
)
915 return INT64FOLD((uint64_t)(int64_t)knumleft
);
918 LJFOLD(CONV KNUM IRCONV_U64_NUM
)
919 LJFOLDF(kfold_conv_knum_u64_num
)
921 return INT64FOLD(lj_num2u64(knumleft
));
924 LJFOLD(TOSTR KNUM any
)
925 LJFOLDF(kfold_tostr_knum
)
927 return lj_ir_kstr(J
, lj_strfmt_num(J
->L
, ir_knum(fleft
)));
930 LJFOLD(TOSTR KINT any
)
931 LJFOLDF(kfold_tostr_kint
)
933 return lj_ir_kstr(J
, fins
->op2
== IRTOSTR_INT
?
934 lj_strfmt_int(J
->L
, fleft
->i
) :
935 lj_strfmt_char(J
->L
, fleft
->i
));
942 if (lj_strscan_num(ir_kstr(fleft
), &n
))
943 return lj_ir_knum(J
, numV(&n
));
947 /* -- Constant folding of equality checks --------------------------------- */
949 /* Don't constant-fold away FLOAD checks against KNULL. */
950 LJFOLD(EQ FLOAD KNULL
)
951 LJFOLD(NE FLOAD KNULL
)
954 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
959 LJFOLD(EQ KINT KINT
) /* Constants are unique, so same refs <==> same value. */
961 LJFOLD(EQ KINT64 KINT64
)
962 LJFOLD(NE KINT64 KINT64
)
967 return CONDFOLD((fins
->op1
== fins
->op2
) ^ (fins
->o
== IR_NE
));
970 /* -- Algebraic shortcuts ------------------------------------------------- */
972 LJFOLD(FPMATH FPMATH IRFPM_FLOOR
)
973 LJFOLD(FPMATH FPMATH IRFPM_CEIL
)
974 LJFOLD(FPMATH FPMATH IRFPM_TRUNC
)
975 LJFOLDF(shortcut_round
)
977 IRFPMathOp op
= (IRFPMathOp
)fleft
->op2
;
978 if (op
== IRFPM_FLOOR
|| op
== IRFPM_CEIL
|| op
== IRFPM_TRUNC
)
979 return LEFTFOLD
; /* round(round_left(x)) = round_left(x) */
983 LJFOLD(ABS ABS FLOAD
)
984 LJFOLDF(shortcut_left
)
986 return LEFTFOLD
; /* f(g(x)) ==> g(x) */
989 LJFOLD(ABS NEG FLOAD
)
990 LJFOLDF(shortcut_dropleft
)
993 fins
->op1
= fleft
->op1
; /* abs(neg(x)) ==> abs(x) */
997 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
1001 LJFOLDF(shortcut_leftleft
)
1003 PHIBARRIER(fleft
); /* See above. Fold would be ok, but not beneficial. */
1004 return fleft
->op1
; /* f(g(x)) ==> x */
1007 /* -- FP algebraic simplifications ---------------------------------------- */
1009 /* FP arithmetic is tricky -- there's not much to simplify.
1010 ** Please note the following common pitfalls before sending "improvements":
1011 ** x+0 ==> x is INVALID for x=-0
1012 ** 0-x ==> -x is INVALID for x=+0
1013 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
1017 LJFOLDF(simplify_numadd_negx
)
1020 fins
->o
= IR_SUB
; /* (-a) + b ==> b - a */
1021 fins
->op1
= fins
->op2
;
1022 fins
->op2
= fleft
->op1
;
1027 LJFOLDF(simplify_numadd_xneg
)
1030 fins
->o
= IR_SUB
; /* a + (-b) ==> a - b */
1031 fins
->op2
= fright
->op1
;
1035 LJFOLD(SUB any KNUM
)
1036 LJFOLDF(simplify_numsub_k
)
1038 if (ir_knum(fright
)->u64
== 0) /* x - (+0) ==> x */
1043 LJFOLD(SUB NEG KNUM
)
1044 LJFOLDF(simplify_numsub_negk
)
1047 fins
->op2
= fleft
->op1
; /* (-x) - k ==> (-k) - x */
1048 fins
->op1
= (IRRef1
)lj_ir_knum(J
, -knumright
);
1053 LJFOLDF(simplify_numsub_xneg
)
1056 fins
->o
= IR_ADD
; /* a - (-b) ==> a + b */
1057 fins
->op2
= fright
->op1
;
1061 LJFOLD(MUL any KNUM
)
1062 LJFOLD(DIV any KNUM
)
1063 LJFOLDF(simplify_nummuldiv_k
)
1065 lua_Number n
= knumright
;
1066 if (n
== 1.0) { /* x o 1 ==> x */
1068 } else if (n
== -1.0) { /* x o -1 ==> -x */
1069 IRRef op1
= fins
->op1
;
1070 fins
->op2
= (IRRef1
)lj_ir_ksimd(J
, LJ_KSIMD_NEG
); /* Modifies fins. */
1074 } else if (fins
->o
== IR_MUL
&& n
== 2.0) { /* x * 2 ==> x + x */
1076 fins
->op2
= fins
->op1
;
1078 } else if (fins
->o
== IR_DIV
) { /* x / 2^k ==> x * 2^-k */
1079 uint64_t u
= ir_knum(fright
)->u64
;
1080 uint32_t ex
= ((uint32_t)(u
>> 52) & 0x7ff);
1081 if ((u
& U64x(000fffff
,ffffffff
)) == 0 && ex
- 1 < 0x7fd) {
1082 u
= (u
& ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex
) << 52);
1083 fins
->o
= IR_MUL
; /* Multiply by exact reciprocal. */
1084 fins
->op2
= lj_ir_knum_u64(J
, u
);
1091 LJFOLD(MUL NEG KNUM
)
1092 LJFOLD(DIV NEG KNUM
)
1093 LJFOLDF(simplify_nummuldiv_negk
)
1096 fins
->op1
= fleft
->op1
; /* (-a) o k ==> a o (-k) */
1097 fins
->op2
= (IRRef1
)lj_ir_knum(J
, -knumright
);
1103 LJFOLDF(simplify_nummuldiv_negneg
)
1107 fins
->op1
= fleft
->op1
; /* (-a) o (-b) ==> a o b */
1108 fins
->op2
= fright
->op1
;
1112 LJFOLD(POW any KNUM
)
1113 LJFOLDF(simplify_numpow_k
)
1115 if (knumright
== 0.0) /* x ^ 0 ==> 1 */
1116 return lj_ir_knum_one(J
); /* Result must be a number, not an int. */
1117 else if (knumright
== 1.0) /* x ^ 1 ==> x */
1119 else if (knumright
== 2.0) /* x ^ 2 ==> x * x */
1120 return emitir(IRTN(IR_MUL
), fins
->op1
, fins
->op1
);
1125 /* -- Simplify conversions ------------------------------------------------ */
1127 LJFOLD(CONV CONV IRCONV_NUM_INT
) /* _NUM */
1128 LJFOLDF(shortcut_conv_num_int
)
1131 /* Only safe with a guarded conversion to int. */
1132 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_NUM
&& irt_isguard(fleft
->t
))
1133 return fleft
->op1
; /* f(g(x)) ==> x */
1137 LJFOLD(CONV CONV IRCONV_INT_NUM
) /* _INT */
1138 LJFOLD(CONV CONV IRCONV_U32_NUM
) /* _U32 */
1139 LJFOLDF(simplify_conv_int_num
)
1141 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1142 if ((fleft
->op2
& IRCONV_SRCMASK
) ==
1143 ((fins
->op2
& IRCONV_DSTMASK
) >> IRCONV_DSH
))
1148 LJFOLD(CONV CONV IRCONV_I64_NUM
) /* _INT or _U32 */
1149 LJFOLD(CONV CONV IRCONV_U64_NUM
) /* _INT or _U32 */
1150 LJFOLDF(simplify_conv_i64_num
)
1153 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_INT
) {
1154 /* Reduce to a sign-extension. */
1155 fins
->op1
= fleft
->op1
;
1156 fins
->op2
= ((IRT_I64
<<5)|IRT_INT
|IRCONV_SEXT
);
1158 } else if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_U32
) {
1162 /* Reduce to a zero-extension. */
1163 fins
->op1
= fleft
->op1
;
1164 fins
->op2
= (IRT_I64
<<5)|IRT_U32
;
1171 LJFOLD(CONV CONV IRCONV_INT_I64
) /* _INT or _U32 */
1172 LJFOLD(CONV CONV IRCONV_INT_U64
) /* _INT or _U32 */
1173 LJFOLD(CONV CONV IRCONV_INT_U32
) /* _INT or _U32 */
1174 LJFOLD(CONV CONV IRCONV_U32_I64
) /* _INT or _U32 */
1175 LJFOLD(CONV CONV IRCONV_U32_U64
) /* _INT or _U32 */
1176 LJFOLD(CONV CONV IRCONV_U32_INT
) /* _INT or _U32 */
1177 LJFOLDF(simplify_conv_int_i64
)
1181 src
= (fleft
->op2
& IRCONV_SRCMASK
);
1182 if (src
== IRT_INT
|| src
== IRT_U32
) {
1183 if (src
== ((fins
->op2
& IRCONV_DSTMASK
) >> IRCONV_DSH
)) {
1186 fins
->op2
= ((fins
->op2
& IRCONV_DSTMASK
) | src
);
1187 fins
->op1
= fleft
->op1
;
1194 LJFOLD(CONV CONV IRCONV_FLOAT_NUM
) /* _FLOAT */
1195 LJFOLDF(simplify_conv_flt_num
)
1198 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_FLOAT
)
1203 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1204 LJFOLD(TOBIT CONV KNUM
)
1205 LJFOLDF(simplify_tobit_conv
)
1207 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1208 if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_INT
) {
1209 lj_assertJ(irt_isnum(fleft
->t
), "expected TOBIT number arg");
1211 } else if ((fleft
->op2
& IRCONV_SRCMASK
) == IRT_U32
) {
1212 lj_assertJ(irt_isnum(fleft
->t
), "expected TOBIT number arg");
1214 fins
->op1
= fleft
->op1
;
1215 fins
->op2
= (IRT_INT
<<5)|IRT_U32
;
1221 /* Shortcut floor/ceil/trunc + IRT_NUM <- integer conversion. */
1222 LJFOLD(FPMATH CONV IRFPM_FLOOR
)
1223 LJFOLD(FPMATH CONV IRFPM_CEIL
)
1224 LJFOLD(FPMATH CONV IRFPM_TRUNC
)
1225 LJFOLDF(simplify_floor_conv
)
1227 if ((uint32_t)(fleft
->op2
& IRCONV_SRCMASK
) - (uint32_t)IRT_I8
<= (uint32_t)(IRT_U64
- IRT_U8
))
1232 /* Strength reduction of widening. */
1233 LJFOLD(CONV any IRCONV_I64_INT
)
1234 LJFOLD(CONV any IRCONV_U64_INT
)
1235 LJFOLDF(simplify_conv_sext
)
1237 IRRef ref
= fins
->op1
;
1239 if (!(fins
->op2
& IRCONV_SEXT
))
1242 if (fleft
->o
== IR_XLOAD
&& (irt_isu8(fleft
->t
) || irt_isu16(fleft
->t
)))
1244 if (fleft
->o
== IR_ADD
&& irref_isk(fleft
->op2
)) {
1245 ofs
= (int64_t)IR(fleft
->op2
)->i
;
1248 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1249 if (ref
== J
->scev
.idx
) {
1250 IRRef lo
= J
->scev
.dir
? J
->scev
.start
: J
->scev
.stop
;
1251 lj_assertJ(irt_isint(J
->scev
.t
), "only int SCEV supported");
1252 if (lo
&& IR(lo
)->o
== IR_KINT
&& IR(lo
)->i
+ ofs
>= 0) {
1255 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1258 /* Reduce to a (cheaper) zero-extension. */
1259 fins
->op2
&= ~IRCONV_SEXT
;
1267 /* Strength reduction of narrowing. */
1268 LJFOLD(CONV ADD IRCONV_INT_I64
)
1269 LJFOLD(CONV SUB IRCONV_INT_I64
)
1270 LJFOLD(CONV MUL IRCONV_INT_I64
)
1271 LJFOLD(CONV ADD IRCONV_INT_U64
)
1272 LJFOLD(CONV SUB IRCONV_INT_U64
)
1273 LJFOLD(CONV MUL IRCONV_INT_U64
)
1274 LJFOLD(CONV ADD IRCONV_U32_I64
)
1275 LJFOLD(CONV SUB IRCONV_U32_I64
)
1276 LJFOLD(CONV MUL IRCONV_U32_I64
)
1277 LJFOLD(CONV ADD IRCONV_U32_U64
)
1278 LJFOLD(CONV SUB IRCONV_U32_U64
)
1279 LJFOLD(CONV MUL IRCONV_U32_U64
)
1280 LJFOLDF(simplify_conv_narrow
)
1286 IROp op
= (IROp
)fleft
->o
;
1287 IRType t
= irt_type(fins
->t
);
1288 IRRef op1
= fleft
->op1
, op2
= fleft
->op2
, mode
= fins
->op2
;
1290 op1
= emitir(IRT(IR_CONV
, t
), op1
, mode
);
1291 op2
= emitir(IRT(IR_CONV
, t
), op2
, mode
);
1292 fins
->ot
= IRT(op
, t
);
1299 /* Special CSE rule for CONV. */
1300 LJFOLD(CONV any any
)
1303 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
1304 IRRef op1
= fins
->op1
, op2
= (fins
->op2
& IRCONV_MODEMASK
);
1305 uint8_t guard
= irt_isguard(fins
->t
);
1306 IRRef ref
= J
->chain
[IR_CONV
];
1308 IRIns
*ir
= IR(ref
);
1309 /* Commoning with stronger checks is ok. */
1310 if (ir
->op1
== op1
&& (ir
->op2
& IRCONV_MODEMASK
) == op2
&&
1311 irt_isguard(ir
->t
) >= guard
)
1316 return EMITFOLD
; /* No fallthrough to regular CSE. */
1319 /* FP conversion narrowing. */
1320 LJFOLD(TOBIT ADD KNUM
)
1321 LJFOLD(TOBIT SUB KNUM
)
1322 LJFOLD(CONV ADD IRCONV_INT_NUM
)
1323 LJFOLD(CONV SUB IRCONV_INT_NUM
)
1324 LJFOLD(CONV ADD IRCONV_I64_NUM
)
1325 LJFOLD(CONV SUB IRCONV_I64_NUM
)
1326 LJFOLDF(narrow_convert
)
1329 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1330 if (J
->chain
[IR_LOOP
])
1332 lj_assertJ(fins
->o
!= IR_CONV
|| (fins
->op2
&IRCONV_CONVMASK
) != IRCONV_TOBIT
,
1333 "unexpected CONV TOBIT");
1334 return lj_opt_narrow_convert(J
);
1337 /* -- Integer algebraic simplifications ----------------------------------- */
1339 LJFOLD(ADD any KINT
)
1340 LJFOLD(ADDOV any KINT
)
1341 LJFOLD(SUBOV any KINT
)
1342 LJFOLDF(simplify_intadd_k
)
1344 if (fright
->i
== 0) /* i o 0 ==> i */
1349 LJFOLD(MULOV any KINT
)
1350 LJFOLDF(simplify_intmul_k
)
1352 if (fright
->i
== 0) /* i * 0 ==> 0 */
1354 if (fright
->i
== 1) /* i * 1 ==> i */
1356 if (fright
->i
== 2) { /* i * 2 ==> i + i */
1358 fins
->op2
= fins
->op1
;
1364 LJFOLD(SUB any KINT
)
1365 LJFOLDF(simplify_intsub_k
)
1367 if (fright
->i
== 0) /* i - 0 ==> i */
1369 fins
->o
= IR_ADD
; /* i - k ==> i + (-k) */
1370 fins
->op2
= (IRRef1
)lj_ir_kint(J
, (int32_t)(~(uint32_t)fright
->i
+1u)); /* Overflow for -2^31 ok. */
1374 LJFOLD(SUB KINT any
)
1375 LJFOLD(SUB KINT64 any
)
1376 LJFOLDF(simplify_intsub_kleft
)
1378 if (fleft
->o
== IR_KINT
? (fleft
->i
== 0) : (ir_kint64(fleft
)->u64
== 0)) {
1379 fins
->o
= IR_NEG
; /* 0 - i ==> -i */
1380 fins
->op1
= fins
->op2
;
1386 LJFOLD(ADD any KINT64
)
1387 LJFOLDF(simplify_intadd_k64
)
1389 if (ir_kint64(fright
)->u64
== 0) /* i + 0 ==> i */
1394 LJFOLD(SUB any KINT64
)
1395 LJFOLDF(simplify_intsub_k64
)
1397 uint64_t k
= ir_kint64(fright
)->u64
;
1398 if (k
== 0) /* i - 0 ==> i */
1400 fins
->o
= IR_ADD
; /* i - k ==> i + (-k) */
1401 fins
->op2
= (IRRef1
)lj_ir_kint64(J
, ~k
+1u);
1405 static TRef
simplify_intmul_k(jit_State
*J
, int32_t k
)
1407 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1408 ** But this is mainly intended for simple address arithmetic.
1409 ** Also it's easier for the backend to optimize the original multiplies.
1411 if (k
== 0) { /* i * 0 ==> 0 */
1413 } else if (k
== 1) { /* i * 1 ==> i */
1415 } else if ((k
& (k
-1)) == 0) { /* i * 2^k ==> i << k */
1417 fins
->op2
= lj_ir_kint(J
, lj_fls((uint32_t)k
));
1423 LJFOLD(MUL any KINT
)
1424 LJFOLDF(simplify_intmul_k32
)
1427 return simplify_intmul_k(J
, fright
->i
);
1431 LJFOLD(MUL any KINT64
)
1432 LJFOLDF(simplify_intmul_k64
)
1435 if (ir_kint64(fright
)->u64
< 0x80000000u
)
1436 return simplify_intmul_k(J
, (int32_t)ir_kint64(fright
)->u64
);
1439 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
1443 LJFOLD(MOD any KINT
)
1444 LJFOLDF(simplify_intmod_k
)
1446 int32_t k
= fright
->i
;
1447 lj_assertJ(k
!= 0, "integer mod 0");
1448 if (k
> 0 && (k
& (k
-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1450 fins
->op2
= lj_ir_kint(J
, k
-1);
1456 LJFOLD(MOD KINT any
)
1457 LJFOLDF(simplify_intmod_kleft
)
1465 LJFOLD(SUBOV any any
)
1466 LJFOLDF(simplify_intsub
)
1468 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
)) /* i - i ==> 0 */
1469 return irt_is64(fins
->t
) ? INT64FOLD(0) : INTFOLD(0);
1474 LJFOLDF(simplify_intsubadd_leftcancel
)
1476 if (!irt_isnum(fins
->t
)) {
1478 if (fins
->op2
== fleft
->op1
) /* (i + j) - i ==> j */
1480 if (fins
->op2
== fleft
->op2
) /* (i + j) - j ==> i */
1487 LJFOLDF(simplify_intsubsub_leftcancel
)
1489 if (!irt_isnum(fins
->t
)) {
1491 if (fins
->op2
== fleft
->op1
) { /* (i - j) - i ==> 0 - j */
1492 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
1493 fins
->op2
= fleft
->op2
;
1501 LJFOLDF(simplify_intsubsub_rightcancel
)
1503 if (!irt_isnum(fins
->t
)) {
1505 if (fins
->op1
== fright
->op1
) /* i - (i - j) ==> j */
1512 LJFOLDF(simplify_intsubadd_rightcancel
)
1514 if (!irt_isnum(fins
->t
)) {
1516 if (fins
->op1
== fright
->op1
) { /* i - (i + j) ==> 0 - j */
1517 fins
->op2
= fright
->op2
;
1518 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
1521 if (fins
->op1
== fright
->op2
) { /* i - (j + i) ==> 0 - j */
1522 fins
->op2
= fright
->op1
;
1523 fins
->op1
= (IRRef1
)lj_ir_kint(J
, 0);
1531 LJFOLDF(simplify_intsubaddadd_cancel
)
1533 if (!irt_isnum(fins
->t
)) {
1536 if (fleft
->op1
== fright
->op1
) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1537 fins
->op1
= fleft
->op2
;
1538 fins
->op2
= fright
->op2
;
1541 if (fleft
->op1
== fright
->op2
) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1542 fins
->op1
= fleft
->op2
;
1543 fins
->op2
= fright
->op1
;
1546 if (fleft
->op2
== fright
->op1
) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1547 fins
->op1
= fleft
->op1
;
1548 fins
->op2
= fright
->op2
;
1551 if (fleft
->op2
== fright
->op2
) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1552 fins
->op1
= fleft
->op1
;
1553 fins
->op2
= fright
->op1
;
1560 LJFOLD(BAND any KINT
)
1561 LJFOLD(BAND any KINT64
)
1562 LJFOLDF(simplify_band_k
)
1564 int64_t k
= fright
->o
== IR_KINT
? (int64_t)fright
->i
:
1565 (int64_t)ir_k64(fright
)->u64
;
1566 if (k
== 0) /* i & 0 ==> 0 */
1568 if (k
== -1) /* i & -1 ==> i */
1573 LJFOLD(BOR any KINT
)
1574 LJFOLD(BOR any KINT64
)
1575 LJFOLDF(simplify_bor_k
)
1577 int64_t k
= fright
->o
== IR_KINT
? (int64_t)fright
->i
:
1578 (int64_t)ir_k64(fright
)->u64
;
1579 if (k
== 0) /* i | 0 ==> i */
1581 if (k
== -1) /* i | -1 ==> -1 */
1586 LJFOLD(BXOR any KINT
)
1587 LJFOLD(BXOR any KINT64
)
1588 LJFOLDF(simplify_bxor_k
)
1590 int64_t k
= fright
->o
== IR_KINT
? (int64_t)fright
->i
:
1591 (int64_t)ir_k64(fright
)->u64
;
1592 if (k
== 0) /* i xor 0 ==> i */
1594 if (k
== -1) { /* i xor -1 ==> ~i */
1602 LJFOLD(BSHL any KINT
)
1603 LJFOLD(BSHR any KINT
)
1604 LJFOLD(BSAR any KINT
)
1605 LJFOLD(BROL any KINT
)
1606 LJFOLD(BROR any KINT
)
1607 LJFOLDF(simplify_shift_ik
)
1609 int32_t mask
= irt_is64(fins
->t
) ? 63 : 31;
1610 int32_t k
= (fright
->i
& mask
);
1611 if (k
== 0) /* i o 0 ==> i */
1613 if (k
== 1 && fins
->o
== IR_BSHL
) { /* i << 1 ==> i + i */
1615 fins
->op2
= fins
->op1
;
1618 if (k
!= fright
->i
) { /* i o k ==> i o (k & mask) */
1619 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
1622 #ifndef LJ_TARGET_UNIFYROT
1623 if (fins
->o
== IR_BROR
) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1625 fins
->op2
= (IRRef1
)lj_ir_kint(J
, (-k
)&mask
);
1632 LJFOLD(BSHL any BAND
)
1633 LJFOLD(BSHR any BAND
)
1634 LJFOLD(BSAR any BAND
)
1635 LJFOLD(BROL any BAND
)
1636 LJFOLD(BROR any BAND
)
1637 LJFOLDF(simplify_shift_andk
)
1639 IRIns
*irk
= IR(fright
->op2
);
1641 if ((fins
->o
< IR_BROL
? LJ_TARGET_MASKSHIFT
: LJ_TARGET_MASKROT
) &&
1642 irk
->o
== IR_KINT
) { /* i o (j & mask) ==> i o j */
1643 int32_t mask
= irt_is64(fins
->t
) ? 63 : 31;
1644 int32_t k
= irk
->i
& mask
;
1646 fins
->op2
= fright
->op1
;
1653 LJFOLD(BSHL KINT any
)
1654 LJFOLD(BSHR KINT any
)
1655 LJFOLD(BSHL KINT64 any
)
1656 LJFOLD(BSHR KINT64 any
)
1657 LJFOLDF(simplify_shift1_ki
)
1659 int64_t k
= fleft
->o
== IR_KINT
? (int64_t)fleft
->i
:
1660 (int64_t)ir_k64(fleft
)->u64
;
1661 if (k
== 0) /* 0 o i ==> 0 */
1666 LJFOLD(BSAR KINT any
)
1667 LJFOLD(BROL KINT any
)
1668 LJFOLD(BROR KINT any
)
1669 LJFOLD(BSAR KINT64 any
)
1670 LJFOLD(BROL KINT64 any
)
1671 LJFOLD(BROR KINT64 any
)
1672 LJFOLDF(simplify_shift2_ki
)
1674 int64_t k
= fleft
->o
== IR_KINT
? (int64_t)fleft
->i
:
1675 (int64_t)ir_k64(fleft
)->u64
;
1676 if (k
== 0 || k
== -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1681 LJFOLD(BSHL BAND KINT
)
1682 LJFOLD(BSHR BAND KINT
)
1683 LJFOLD(BROL BAND KINT
)
1684 LJFOLD(BROR BAND KINT
)
1685 LJFOLDF(simplify_shiftk_andk
)
1687 IRIns
*irk
= IR(fleft
->op2
);
1689 if (irk
->o
== IR_KINT
) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1690 int32_t k
= kfold_intop(irk
->i
, fright
->i
, (IROp
)fins
->o
);
1691 fins
->op1
= fleft
->op1
;
1692 fins
->op1
= (IRRef1
)lj_opt_fold(J
);
1693 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
1694 fins
->ot
= IRTI(IR_BAND
);
1696 } else if (irk
->o
== IR_KINT64
) {
1697 uint64_t k
= kfold_int64arith(J
, ir_k64(irk
)->u64
, fright
->i
,
1699 IROpT ot
= fleft
->ot
;
1700 fins
->op1
= fleft
->op1
;
1701 fins
->op1
= (IRRef1
)lj_opt_fold(J
);
1702 fins
->op2
= (IRRef1
)lj_ir_kint64(J
, k
);
1709 LJFOLD(BAND BSHL KINT
)
1710 LJFOLD(BAND BSHR KINT
)
1711 LJFOLDF(simplify_andk_shiftk
)
1713 IRIns
*irk
= IR(fleft
->op2
);
1714 if (irk
->o
== IR_KINT
&&
1715 kfold_intop(-1, irk
->i
, (IROp
)fleft
->o
) == fright
->i
)
1716 return LEFTFOLD
; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1720 LJFOLD(BAND BOR KINT
)
1721 LJFOLD(BOR BAND KINT
)
1722 LJFOLDF(simplify_andor_k
)
1724 IRIns
*irk
= IR(fleft
->op2
);
1726 if (irk
->o
== IR_KINT
) {
1727 int32_t k
= kfold_intop(irk
->i
, fright
->i
, (IROp
)fins
->o
);
1728 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1729 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1730 if (k
== (fins
->o
== IR_BAND
? 0 : -1)) {
1731 fins
->op1
= fleft
->op1
;
1738 LJFOLD(BAND BOR KINT64
)
1739 LJFOLD(BOR BAND KINT64
)
1740 LJFOLDF(simplify_andor_k64
)
1743 IRIns
*irk
= IR(fleft
->op2
);
1745 if (irk
->o
== IR_KINT64
) {
1746 uint64_t k
= kfold_int64arith(J
, ir_k64(irk
)->u64
, ir_k64(fright
)->u64
,
1748 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1749 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1750 if (k
== (fins
->o
== IR_BAND
? (uint64_t)0 : ~(uint64_t)0)) {
1751 fins
->op1
= fleft
->op1
;
1757 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
1761 /* -- Reassociation ------------------------------------------------------- */
1763 LJFOLD(ADD ADD KINT
)
1764 LJFOLD(MUL MUL KINT
)
1765 LJFOLD(BAND BAND KINT
)
1766 LJFOLD(BOR BOR KINT
)
1767 LJFOLD(BXOR BXOR KINT
)
1768 LJFOLDF(reassoc_intarith_k
)
1770 IRIns
*irk
= IR(fleft
->op2
);
1771 if (irk
->o
== IR_KINT
) {
1772 int32_t k
= kfold_intop(irk
->i
, fright
->i
, (IROp
)fins
->o
);
1773 if (k
== irk
->i
) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1776 fins
->op1
= fleft
->op1
;
1777 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
1778 return RETRYFOLD
; /* (i o k1) o k2 ==> i o (k1 o k2) */
1783 LJFOLD(ADD ADD KINT64
)
1784 LJFOLD(MUL MUL KINT64
)
1785 LJFOLD(BAND BAND KINT64
)
1786 LJFOLD(BOR BOR KINT64
)
1787 LJFOLD(BXOR BXOR KINT64
)
1788 LJFOLDF(reassoc_intarith_k64
)
1791 IRIns
*irk
= IR(fleft
->op2
);
1792 if (irk
->o
== IR_KINT64
) {
1793 uint64_t k
= kfold_int64arith(J
, ir_k64(irk
)->u64
, ir_k64(fright
)->u64
,
1796 fins
->op1
= fleft
->op1
;
1797 fins
->op2
= (IRRef1
)lj_ir_kint64(J
, k
);
1798 return RETRYFOLD
; /* (i o k1) o k2 ==> i o (k1 o k2) */
1802 UNUSED(J
); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD
;
1806 LJFOLD(BAND BAND any
)
1808 LJFOLDF(reassoc_dup
)
1810 if (fins
->op2
== fleft
->op1
|| fins
->op2
== fleft
->op2
)
1811 return LEFTFOLD
; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1817 LJFOLDF(reassoc_dup_minmax
)
1819 if (fins
->op2
== fleft
->op2
)
1820 return LEFTFOLD
; /* (a o b) o b ==> a o b */
1824 LJFOLD(BXOR BXOR any
)
1825 LJFOLDF(reassoc_bxor
)
1828 if (fins
->op2
== fleft
->op1
) /* (a xor b) xor a ==> b */
1830 if (fins
->op2
== fleft
->op2
) /* (a xor b) xor b ==> a */
1835 LJFOLD(BSHL BSHL KINT
)
1836 LJFOLD(BSHR BSHR KINT
)
1837 LJFOLD(BSAR BSAR KINT
)
1838 LJFOLD(BROL BROL KINT
)
1839 LJFOLD(BROR BROR KINT
)
1840 LJFOLDF(reassoc_shift
)
1842 IRIns
*irk
= IR(fleft
->op2
);
1843 PHIBARRIER(fleft
); /* The (shift any KINT) rule covers k2 == 0 and more. */
1844 if (irk
->o
== IR_KINT
) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1845 int32_t mask
= irt_is64(fins
->t
) ? 63 : 31;
1846 int32_t k
= (irk
->i
& mask
) + (fright
->i
& mask
);
1847 if (k
> mask
) { /* Combined shift too wide? */
1848 if (fins
->o
== IR_BSHL
|| fins
->o
== IR_BSHR
)
1849 return mask
== 31 ? INTFOLD(0) : INT64FOLD(0);
1850 else if (fins
->o
== IR_BSAR
)
1855 fins
->op1
= fleft
->op1
;
1856 fins
->op2
= (IRRef1
)lj_ir_kint(J
, k
);
1862 LJFOLD(MIN MIN KINT
)
1863 LJFOLD(MAX MAX KINT
)
1864 LJFOLDF(reassoc_minmax_k
)
1866 IRIns
*irk
= IR(fleft
->op2
);
1867 if (irk
->o
== IR_KINT
) {
1869 int32_t y
= kfold_intop(a
, fright
->i
, fins
->o
);
1870 if (a
== y
) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1873 fins
->op1
= fleft
->op1
;
1874 fins
->op2
= (IRRef1
)lj_ir_kint(J
, y
);
1875 return RETRYFOLD
; /* (x o k1) o k2 ==> x o (k1 o k2) */
1880 /* -- Array bounds check elimination -------------------------------------- */
1882 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1883 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1884 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1889 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_ABC
)) {
1890 if (irref_isk(fright
->op2
)) {
1891 IRIns
*add2
= IR(fright
->op1
);
1892 if (add2
->o
== IR_ADD
&& irref_isk(add2
->op2
) &&
1893 IR(fright
->op2
)->i
== -IR(add2
->op2
)->i
) {
1894 IRRef ref
= J
->chain
[IR_ABC
];
1895 IRRef lim
= add2
->op1
;
1896 if (fins
->op1
> lim
) lim
= fins
->op1
;
1898 IRIns
*ir
= IR(ref
);
1899 if (ir
->op1
== fins
->op1
&& ir
->op2
== add2
->op1
)
1909 /* Eliminate ABC for constants.
1910 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1911 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1913 LJFOLD(ABC any KINT
)
1917 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_ABC
)) {
1918 IRRef ref
= J
->chain
[IR_ABC
];
1919 IRRef asize
= fins
->op1
;
1920 while (ref
> asize
) {
1921 IRIns
*ir
= IR(ref
);
1922 if (ir
->op1
== asize
&& irref_isk(ir
->op2
)) {
1923 uint32_t k
= (uint32_t)IR(ir
->op2
)->i
;
1924 if ((uint32_t)fright
->i
> k
)
1925 ir
->op2
= fins
->op2
;
1930 return EMITFOLD
; /* Already performed CSE. */
1935 /* Eliminate invariant ABC inside loop. */
1939 /* Invariant ABC marked as P32 or U32. Drop if op1 is invariant too. */
1940 if (!irt_isint(fins
->t
) && fins
->op1
< J
->chain
[IR_LOOP
] &&
1941 (irt_isu32(fins
->t
) ||
1942 (!irref_isk(fins
->op1
) && !irt_isphi(IR(fins
->op1
)->t
))))
1947 /* -- Commutativity ------------------------------------------------------- */
1949 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1950 ** Rationale behind this:
1951 ** - It (also) moves constants to the right.
1952 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1953 ** - It helps CSE to find more matches.
1954 ** - The assembler generates better code with constants at the right.
1959 LJFOLD(ADDOV any any
)
1960 LJFOLD(MULOV any any
)
1963 if (fins
->op1
< fins
->op2
) { /* Move lower ref to the right. */
1964 IRRef1 tmp
= fins
->op1
;
1965 fins
->op1
= fins
->op2
;
1976 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1977 if (fins
->op1
== fins
->op2
&&
1978 (!irt_isnum(fins
->t
) ||
1979 (fleft
->o
== IR_CONV
&& /* Converted integers cannot be NaN. */
1980 (uint32_t)(fleft
->op2
& IRCONV_SRCMASK
) - (uint32_t)IRT_I8
<= (uint32_t)(IRT_U64
- IRT_U8
))))
1981 return CONDFOLD(fins
->o
== IR_EQ
);
1982 return fold_comm_swap(J
);
1995 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1996 if (fins
->op1
== fins
->op2
&& !irt_isnum(fins
->t
))
1997 return CONDFOLD((fins
->o
^ (fins
->o
>> 1)) & 1);
1998 if (fins
->op1
< fins
->op2
) { /* Move lower ref to the right. */
1999 IRRef1 tmp
= fins
->op1
;
2000 fins
->op1
= fins
->op2
;
2002 fins
->o
^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
2008 LJFOLD(BAND any any
)
2012 if (fins
->op1
== fins
->op2
) /* x o x ==> x */
2014 return fold_comm_swap(J
);
2019 LJFOLDF(comm_dup_minmax
)
2021 if (fins
->op1
== fins
->op2
) /* x o x ==> x */
2026 LJFOLD(BXOR any any
)
2029 if (fins
->op1
== fins
->op2
) /* i xor i ==> 0 */
2030 return irt_is64(fins
->t
) ? INT64FOLD(0) : INTFOLD(0);
2031 return fold_comm_swap(J
);
2034 /* -- Simplification of compound expressions ------------------------------ */
2036 static TRef
kfold_xload(jit_State
*J
, IRIns
*ir
, const void *p
)
2039 switch (irt_type(ir
->t
)) {
2040 case IRT_NUM
: return lj_ir_knum_u64(J
, *(uint64_t *)p
);
2041 case IRT_I8
: k
= (int32_t)*(int8_t *)p
; break;
2042 case IRT_U8
: k
= (int32_t)*(uint8_t *)p
; break;
2043 case IRT_I16
: k
= (int32_t)(int16_t)lj_getu16(p
); break;
2044 case IRT_U16
: k
= (int32_t)(uint16_t)lj_getu16(p
); break;
2045 case IRT_INT
: case IRT_U32
: k
= (int32_t)lj_getu32(p
); break;
2046 case IRT_I64
: case IRT_U64
: return lj_ir_kint64(J
, *(uint64_t *)p
);
2049 return lj_ir_kint(J
, k
);
2052 /* Turn: string.sub(str, a, b) == kstr
2053 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
2054 ** Note: this creates unaligned XLOADs on x86/x64.
2058 LJFOLDF(merge_eqne_snew_kgc
)
2060 GCstr
*kstr
= ir_kstr(fright
);
2061 int32_t len
= (int32_t)kstr
->len
;
2062 lj_assertJ(irt_isstr(fins
->t
), "bad equality IR type");
2064 #if LJ_TARGET_UNALIGNED
2065 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
2066 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
2068 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
2069 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
2073 if (len
<= FOLD_SNEW_MAX_LEN
) {
2074 IROp op
= (IROp
)fins
->o
;
2075 IRRef strref
= fleft
->op1
;
2076 if (IR(strref
)->o
!= IR_STRREF
)
2079 emitir(IRTGI(IR_EQ
), fleft
->op2
, lj_ir_kint(J
, len
));
2080 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
2082 /* NE is not expanded since this would need an OR of two conds. */
2083 if (!irref_isk(fleft
->op2
)) /* Only handle the constant length case. */
2085 if (IR(fleft
->op2
)->i
!= len
)
2089 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
2090 uint16_t ot
= (uint16_t)(len
== 1 ? IRT(IR_XLOAD
, FOLD_SNEW_TYPE8
) :
2091 len
== 2 ? IRT(IR_XLOAD
, IRT_U16
) :
2093 TRef tmp
= emitir(ot
, strref
,
2094 IRXLOAD_READONLY
| (len
> 1 ? IRXLOAD_UNALIGNED
: 0));
2095 TRef val
= kfold_xload(J
, IR(tref_ref(tmp
)), strdata(kstr
));
2097 tmp
= emitir(IRTI(IR_BAND
), tmp
,
2098 lj_ir_kint(J
, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
2099 fins
->op1
= (IRRef1
)tmp
;
2100 fins
->op2
= (IRRef1
)val
;
2101 fins
->ot
= (IROpT
)IRTGI(op
);
2110 /* -- Loads --------------------------------------------------------------- */
2112 /* Loads cannot be folded or passed on to CSE in general.
2113 ** Alias analysis is needed to check for forwarding opportunities.
2115 ** Caveat: *all* loads must be listed here or they end up at CSE!
2119 LJFOLDX(lj_opt_fwd_aload
)
2121 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
2123 LJFOLDF(kfold_hload_kkptr
)
2126 lj_assertJ(ir_kptr(fleft
) == niltvg(J2G(J
)), "expected niltv");
2131 LJFOLDX(lj_opt_fwd_hload
)
2134 LJFOLDX(lj_opt_fwd_uload
)
2136 LJFOLD(ALEN any any
)
2137 LJFOLDX(lj_opt_fwd_alen
)
2139 /* Try to merge UREFO/UREFC into referenced instruction. */
2140 static TRef
merge_uref(jit_State
*J
, IRRef ref
, IRIns
* ir
)
2142 if (ir
->o
== IR_UREFO
&& irt_isguard(ir
->t
)) {
2143 /* Might be pointing to some other coroutine's stack.
2144 ** And GC might shrink said stack, thereby repointing the upvalue.
2145 ** GC might even collect said coroutine, thereby closing the upvalue.
2147 if (gcstep_barrier(J
, ref
))
2148 return EMITFOLD
; /* So cannot merge. */
2149 /* Current fins wants a check, but ir doesn't have one. */
2150 if ((irt_t(fins
->t
) & (IRT_GUARD
|IRT_TYPE
)) == (IRT_GUARD
|IRT_PGC
) &&
2151 irt_type(ir
->t
) == IRT_IGC
)
2152 ir
->t
.irt
+= IRT_PGC
-IRT_IGC
; /* So install a check. */
2154 return ref
; /* Not a TRef, but the caller doesn't care. */
2157 /* Upvalue refs are really loads, but there are no corresponding stores.
2158 ** So CSE is ok for them, except for guarded UREFO across a GC step.
2159 ** If the referenced function is const, its upvalue addresses are const, too.
2160 ** This can be used to improve CSE by looking for the same address,
2161 ** even if the upvalues originate from a different function.
2163 LJFOLD(UREFO KGC any
)
2164 LJFOLD(UREFC KGC any
)
2167 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
2168 IRRef ref
= J
->chain
[fins
->o
];
2169 GCfunc
*fn
= ir_kfunc(fleft
);
2170 GCupval
*uv
= gco2uv(gcref(fn
->l
.uvptr
[(fins
->op2
>> 8)]));
2172 IRIns
*ir
= IR(ref
);
2173 if (irref_isk(ir
->op1
)) {
2174 GCfunc
*fn2
= ir_kfunc(IR(ir
->op1
));
2175 if (gco2uv(gcref(fn2
->l
.uvptr
[(ir
->op2
>> 8)])) == uv
) {
2176 return merge_uref(J
, ref
, ir
);
2185 /* Custom CSE for UREFO. */
2186 LJFOLD(UREFO any any
)
2189 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
2190 IRRef ref
= J
->chain
[IR_UREFO
];
2191 IRRef lim
= fins
->op1
;
2192 IRRef2 op12
= (IRRef2
)fins
->op1
+ ((IRRef2
)fins
->op2
<< 16);
2194 IRIns
*ir
= IR(ref
);
2195 if (ir
->op12
== op12
)
2196 return merge_uref(J
, ref
, ir
);
2203 LJFOLD(HREFK any any
)
2204 LJFOLDX(lj_opt_fwd_hrefk
)
2206 LJFOLD(HREF TNEW any
)
2207 LJFOLDF(fwd_href_tnew
)
2209 if (lj_opt_fwd_href_nokey(J
))
2210 return lj_ir_kkptr(J
, niltvg(J2G(J
)));
2214 LJFOLD(HREF TDUP KPRI
)
2215 LJFOLD(HREF TDUP KGC
)
2216 LJFOLD(HREF TDUP KNUM
)
2217 LJFOLDF(fwd_href_tdup
)
2221 lj_ir_kvalue(J
->L
, &keyv
, fright
);
2222 val
= lj_tab_get(J
->L
, ir_ktab(IR(fleft
->op1
)), &keyv
);
2223 /* Check for either nil or the nil value marker in the template table. */
2224 if ((tvisnil(val
) || tvistab(val
)) && lj_opt_fwd_href_nokey(J
))
2225 return lj_ir_kkptr(J
, niltvg(J2G(J
)));
2229 /* We can safely FOLD/CSE array/hash refs and field loads, since there
2230 ** are no corresponding stores. But we need to check for any NEWREF with
2231 ** an aliased table, as it may invalidate all of the pointers and fields.
2232 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
2233 ** FLOADs. And NEWREF itself is treated like a store (see below).
2234 ** LREF is constant (per trace) since coroutine switches are not inlined.
2236 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE
)
2237 LJFOLDF(fload_tab_tnew_asize
)
2239 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
2240 return INTFOLD(fleft
->op1
);
2244 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK
)
2245 LJFOLDF(fload_tab_tnew_hmask
)
2247 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
2248 return INTFOLD((1 << fleft
->op2
)-1);
2252 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE
)
2253 LJFOLDF(fload_tab_tdup_asize
)
2255 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
2256 return INTFOLD((int32_t)ir_ktab(IR(fleft
->op1
))->asize
);
2260 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK
)
2261 LJFOLDF(fload_tab_tdup_hmask
)
2263 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && lj_opt_fwd_tptr(J
, fins
->op1
))
2264 return INTFOLD((int32_t)ir_ktab(IR(fleft
->op1
))->hmask
);
2268 LJFOLD(HREF any any
)
2269 LJFOLD(FLOAD any IRFL_TAB_ARRAY
)
2270 LJFOLD(FLOAD any IRFL_TAB_NODE
)
2271 LJFOLD(FLOAD any IRFL_TAB_ASIZE
)
2272 LJFOLD(FLOAD any IRFL_TAB_HMASK
)
2273 LJFOLDF(fload_tab_ah
)
2275 TRef tr
= lj_opt_cse(J
);
2276 return lj_opt_fwd_tptr(J
, tref_ref(tr
)) ? tr
: EMITFOLD
;
2279 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2280 LJFOLD(FLOAD KGC IRFL_STR_LEN
)
2281 LJFOLDF(fload_str_len_kgc
)
2283 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
2284 return INTFOLD((int32_t)ir_kstr(fleft
)->len
);
2288 LJFOLD(FLOAD SNEW IRFL_STR_LEN
)
2289 LJFOLDF(fload_str_len_snew
)
2291 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
)) {
2298 LJFOLD(FLOAD TOSTR IRFL_STR_LEN
)
2299 LJFOLDF(fload_str_len_tostr
)
2301 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
) && fleft
->op2
== IRTOSTR_CHAR
)
2306 LJFOLD(FLOAD any IRFL_SBUF_W
)
2307 LJFOLD(FLOAD any IRFL_SBUF_E
)
2308 LJFOLD(FLOAD any IRFL_SBUF_B
)
2309 LJFOLD(FLOAD any IRFL_SBUF_L
)
2310 LJFOLD(FLOAD any IRFL_SBUF_REF
)
2311 LJFOLD(FLOAD any IRFL_SBUF_R
)
2314 TRef tr
= lj_opt_fwd_fload(J
);
2315 return lj_opt_fwd_sbuf(J
, tref_ref(tr
)) ? tr
: EMITFOLD
;
2318 /* The fast function ID of function objects is immutable. */
2319 LJFOLD(FLOAD KGC IRFL_FUNC_FFID
)
2320 LJFOLDF(fload_func_ffid_kgc
)
2322 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
2323 return INTFOLD((int32_t)ir_kfunc(fleft
)->c
.ffid
);
2327 /* The C type ID of cdata objects is immutable. */
2328 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID
)
2329 LJFOLDF(fload_cdata_typeid_kgc
)
2331 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
2332 return INTFOLD((int32_t)ir_kcdata(fleft
)->ctypeid
);
2336 /* Get the contents of immutable cdata objects. */
2337 LJFOLD(FLOAD KGC IRFL_CDATA_PTR
)
2338 LJFOLD(FLOAD KGC IRFL_CDATA_INT
)
2339 LJFOLD(FLOAD KGC IRFL_CDATA_INT64
)
2340 LJFOLDF(fload_cdata_int64_kgc
)
2342 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
)) {
2343 void *p
= cdataptr(ir_kcdata(fleft
));
2344 if (irt_is64(fins
->t
))
2345 return INT64FOLD(*(uint64_t *)p
);
2347 return INTFOLD(*(int32_t *)p
);
2352 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID
)
2353 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID
)
2354 LJFOLDF(fload_cdata_typeid_cnew
)
2356 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
2357 return fleft
->op1
; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2361 /* Pointer, int and int64 cdata objects are immutable. */
2362 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR
)
2363 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT
)
2364 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64
)
2365 LJFOLDF(fload_cdata_ptr_int64_cnew
)
2367 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_FOLD
))
2368 return fleft
->op2
; /* Fold even across PHI to avoid allocations. */
2372 LJFOLD(FLOAD any IRFL_STR_LEN
)
2373 LJFOLD(FLOAD any IRFL_FUNC_ENV
)
2374 LJFOLD(FLOAD any IRFL_THREAD_ENV
)
2375 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID
)
2376 LJFOLD(FLOAD any IRFL_CDATA_PTR
)
2377 LJFOLD(FLOAD any IRFL_CDATA_INT
)
2378 LJFOLD(FLOAD any IRFL_CDATA_INT64
)
2379 LJFOLD(VLOAD any any
) /* Vararg loads have no corresponding stores. */
2382 /* All other field loads need alias analysis. */
2383 LJFOLD(FLOAD any any
)
2384 LJFOLDX(lj_opt_fwd_fload
)
2386 /* This is for LOOP only. Recording handles SLOADs internally. */
2387 LJFOLD(SLOAD any any
)
2390 if ((fins
->op2
& IRSLOAD_FRAME
)) {
2391 TRef tr
= lj_opt_cse(J
);
2392 return tref_ref(tr
) < J
->chain
[IR_RETF
] ? EMITFOLD
: tr
;
2394 lj_assertJ(J
->slot
[fins
->op1
] != 0, "uninitialized slot accessed");
2395 return J
->slot
[fins
->op1
];
2399 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2400 LJFOLD(XLOAD KKPTR any
)
2403 TRef tr
= kfold_xload(J
, fins
, ir_kptr(fleft
));
2404 return tr
? tr
: NEXTFOLD
;
2407 LJFOLD(XLOAD any any
)
2408 LJFOLDX(lj_opt_fwd_xload
)
2410 /* -- Frame handling ------------------------------------------------------ */
2412 /* Prevent CSE of a REF_BASE operand across IR_RETF. */
2413 LJFOLD(SUB any BASE
)
2414 LJFOLD(SUB BASE any
)
2418 return lj_opt_cselim(J
, J
->chain
[IR_RETF
]);
2421 /* -- Write barriers ------------------------------------------------------ */
2423 /* Write barriers are amenable to CSE, but not across any incremental
2427 LJFOLD(OBAR any any
)
2428 LJFOLDF(barrier_tab
)
2430 TRef tr
= lj_opt_cse(J
);
2431 if (gcstep_barrier(J
, tref_ref(tr
))) /* CSE across GC step? */
2432 return EMITFOLD
; /* Raw emit. Assumes fins is left intact by CSE. */
2438 LJFOLDF(barrier_tnew_tdup
)
2440 /* New tables are always white and never need a barrier. */
2441 if (fins
->op1
< J
->chain
[IR_LOOP
]) /* Except across a GC step. */
2446 /* -- Profiling ----------------------------------------------------------- */
2448 LJFOLD(PROF any any
)
2451 IRRef ref
= J
->chain
[IR_PROF
];
2452 if (ref
+1 == J
->cur
.nins
) /* Drop neighbouring IR_PROF. */
2457 /* -- Stores and allocations ---------------------------------------------- */
2459 /* Stores and allocations cannot be folded or passed on to CSE in general.
2460 ** But some stores can be eliminated with dead-store elimination (DSE).
2462 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2465 LJFOLD(ASTORE any any
)
2466 LJFOLD(HSTORE any any
)
2467 LJFOLDX(lj_opt_dse_ahstore
)
2469 LJFOLD(USTORE any any
)
2470 LJFOLDX(lj_opt_dse_ustore
)
2472 LJFOLD(FSTORE any any
)
2473 LJFOLDX(lj_opt_dse_fstore
)
2475 LJFOLD(XSTORE any any
)
2476 LJFOLDX(lj_opt_dse_xstore
)
2478 LJFOLD(NEWREF any any
) /* Treated like a store. */
2479 LJFOLD(TMPREF any any
)
2480 LJFOLD(CALLA any any
)
2481 LJFOLD(CALLL any any
) /* Safeguard fallback. */
2482 LJFOLD(CALLS any any
)
2483 LJFOLD(CALLXS any any
)
2485 LJFOLD(RETF any any
) /* Modifies BASE. */
2486 LJFOLD(TNEW any any
)
2488 LJFOLD(CNEW any any
)
2489 LJFOLD(XSNEW any any
)
2492 /* -- Miscellaneous ------------------------------------------------------- */
2494 LJFOLD(CARG any any
)
2497 TRef tr
= lj_opt_cse(J
);
2498 if (tref_ref(tr
) < J
->chain
[IR_LOOP
]) /* CSE across loop? */
2499 return EMITFOLD
; /* Raw emit. Assumes fins is left intact by CSE. */
2503 /* ------------------------------------------------------------------------ */
2505 /* Every entry in the generated hash table is a 32 bit pattern:
2507 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2509 ** xxxxxxxx = 8 bit index into fold function table
2510 ** iiiiiii = 7 bit folded instruction opcode
2511 ** lllllll = 7 bit left instruction opcode
2512 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2515 #include "lj_folddef.h"
2517 /* ------------------------------------------------------------------------ */
2519 /* Fold IR instruction. */
2520 TRef LJ_FASTCALL
lj_opt_fold(jit_State
*J
)
2525 if (LJ_UNLIKELY((J
->flags
& JIT_F_OPT_MASK
) != JIT_F_OPT_DEFAULT
)) {
2526 lj_assertJ(((JIT_F_OPT_FOLD
|JIT_F_OPT_FWD
|JIT_F_OPT_CSE
|JIT_F_OPT_DSE
) |
2527 JIT_F_OPT_DEFAULT
) == JIT_F_OPT_DEFAULT
,
2528 "bad JIT_F_OPT_DEFAULT");
2529 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2530 if (!(J
->flags
& JIT_F_OPT_FOLD
) && irm_kind(lj_ir_mode
[fins
->o
]) == IRM_N
)
2531 return lj_opt_cse(J
);
2533 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2534 if ((J
->flags
& (JIT_F_OPT_FOLD
|JIT_F_OPT_FWD
|JIT_F_OPT_CSE
)) !=
2535 (JIT_F_OPT_FOLD
|JIT_F_OPT_FWD
|JIT_F_OPT_CSE
) &&
2536 irm_kind(lj_ir_mode
[fins
->o
]) == IRM_L
&& fins
->o
!= IR_SLOAD
)
2537 return lj_ir_emit(J
);
2539 /* No FOLD or DSE? Emit raw IR for stores. */
2540 if ((J
->flags
& (JIT_F_OPT_FOLD
|JIT_F_OPT_DSE
)) !=
2541 (JIT_F_OPT_FOLD
|JIT_F_OPT_DSE
) &&
2542 irm_kind(lj_ir_mode
[fins
->o
]) == IRM_S
)
2543 return lj_ir_emit(J
);
2546 /* Fold engine start/retry point. */
2548 /* Construct key from opcode and operand opcodes (unless literal/none). */
2549 key
= ((uint32_t)fins
->o
<< 17);
2550 if (fins
->op1
>= J
->cur
.nk
) {
2551 key
+= (uint32_t)IR(fins
->op1
)->o
<< 10;
2552 *fleft
= *IR(fins
->op1
);
2553 if (fins
->op1
< REF_TRUE
)
2554 fleft
[1] = IR(fins
->op1
)[1];
2556 if (fins
->op2
>= J
->cur
.nk
) {
2557 key
+= (uint32_t)IR(fins
->op2
)->o
;
2558 *fright
= *IR(fins
->op2
);
2559 if (fins
->op2
< REF_TRUE
)
2560 fright
[1] = IR(fins
->op2
)[1];
2562 key
+= (fins
->op2
& 0x3ffu
); /* Literal mask. Must include IRCONV_*MASK. */
2565 /* Check for a match in order from most specific to least specific. */
2568 uint32_t k
= key
| (any
& 0x1ffff);
2569 uint32_t h
= fold_hashkey(k
);
2570 uint32_t fh
= fold_hash
[h
]; /* Lookup key in semi-perfect hash table. */
2571 if ((fh
& 0xffffff) == k
|| (fh
= fold_hash
[h
+1], (fh
& 0xffffff) == k
)) {
2572 ref
= (IRRef
)tref_ref(fold_func
[fh
>> 24](J
));
2573 if (ref
!= NEXTFOLD
)
2576 if (any
== 0xfffff) /* Exhausted folding. Pass on to CSE. */
2577 return lj_opt_cse(J
);
2578 any
= (any
| (any
>> 10)) ^ 0xffc00;
2581 /* Return value processing, ordered by frequency. */
2582 if (LJ_LIKELY(ref
>= MAX_FOLD
))
2583 return TREF(ref
, irt_t(IR(ref
)->t
));
2584 if (ref
== RETRYFOLD
)
2586 if (ref
== KINTFOLD
)
2587 return lj_ir_kint(J
, fins
->i
);
2588 if (ref
== FAILFOLD
)
2589 lj_trace_err(J
, LJ_TRERR_GFAIL
);
2590 lj_assertJ(ref
== DROPFOLD
, "bad fold result");
2594 /* -- Common-Subexpression Elimination ------------------------------------ */
2596 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2597 TRef LJ_FASTCALL
lj_opt_cse(jit_State
*J
)
2599 /* Avoid narrow to wide store-to-load forwarding stall */
2600 IRRef2 op12
= (IRRef2
)fins
->op1
+ ((IRRef2
)fins
->op2
<< 16);
2602 if (LJ_LIKELY(J
->flags
& JIT_F_OPT_CSE
)) {
2603 /* Limited search for same operands in per-opcode chain. */
2604 IRRef ref
= J
->chain
[op
];
2605 IRRef lim
= fins
->op1
;
2606 if (fins
->op2
> lim
) lim
= fins
->op2
; /* Relies on lit < REF_BIAS. */
2608 if (IR(ref
)->op12
== op12
)
2609 return TREF(ref
, irt_t(IR(ref
)->t
)); /* Common subexpression found. */
2610 ref
= IR(ref
)->prev
;
2613 /* Otherwise emit IR (inlined for speed). */
2615 IRRef ref
= lj_ir_nextins(J
);
2616 IRIns
*ir
= IR(ref
);
2617 ir
->prev
= J
->chain
[op
];
2619 J
->chain
[op
] = (IRRef1
)ref
;
2621 J
->guardemit
.irt
|= fins
->t
.irt
;
2622 return TREF(ref
, irt_t((ir
->t
= fins
->t
)));
2626 /* CSE with explicit search limit. */
2627 TRef LJ_FASTCALL
lj_opt_cselim(jit_State
*J
, IRRef lim
)
2629 IRRef ref
= J
->chain
[fins
->o
];
2630 IRRef2 op12
= (IRRef2
)fins
->op1
+ ((IRRef2
)fins
->op2
<< 16);
2632 if (IR(ref
)->op12
== op12
)
2634 ref
= IR(ref
)->prev
;
2636 return lj_ir_emit(J
);
2639 /* ------------------------------------------------------------------------ */