Merge branch 'master' into v2.1
[luajit-2.0.git] / src / lj_opt_fold.c
blob6fdf45663fa8fe3f3fe3d915e236d674c389341c
1 /*
2 ** FOLD: Constant Folding, Algebraic Simplifications and Reassociation.
3 ** ABCelim: Array Bounds Check Elimination.
4 ** CSE: Common-Subexpression Elimination.
5 ** Copyright (C) 2005-2025 Mike Pall. See Copyright Notice in luajit.h
6 */
8 #define lj_opt_fold_c
9 #define LUA_CORE
11 #include <math.h>
13 #include "lj_obj.h"
15 #if LJ_HASJIT
17 #include "lj_buf.h"
18 #include "lj_str.h"
19 #include "lj_tab.h"
20 #include "lj_ir.h"
21 #include "lj_jit.h"
22 #include "lj_ircall.h"
23 #include "lj_iropt.h"
24 #include "lj_trace.h"
25 #if LJ_HASFFI
26 #include "lj_ctype.h"
27 #include "lj_carith.h"
28 #endif
29 #include "lj_vm.h"
30 #include "lj_strscan.h"
31 #include "lj_strfmt.h"
33 /* Here's a short description how the FOLD engine processes instructions:
35 ** The FOLD engine receives a single instruction stored in fins (J->fold.ins).
36 ** The instruction and its operands are used to select matching fold rules.
37 ** These are applied iteratively until a fixed point is reached.
39 ** The 8 bit opcode of the instruction itself plus the opcodes of the
40 ** two instructions referenced by its operands form a 24 bit key
41 ** 'ins left right' (unused operands -> 0, literals -> lowest 8 bits).
43 ** This key is used for partial matching against the fold rules. The
44 ** left/right operand fields of the key are successively masked with
45 ** the 'any' wildcard, from most specific to least specific:
47 ** ins left right
48 ** ins any right
49 ** ins left any
50 ** ins any any
52 ** The masked key is used to lookup a matching fold rule in a semi-perfect
53 ** hash table. If a matching rule is found, the related fold function is run.
54 ** Multiple rules can share the same fold function. A fold rule may return
55 ** one of several special values:
57 ** - NEXTFOLD means no folding was applied, because an additional test
58 ** inside the fold function failed. Matching continues against less
59 ** specific fold rules. Finally the instruction is passed on to CSE.
61 ** - RETRYFOLD means the instruction was modified in-place. Folding is
62 ** retried as if this instruction had just been received.
64 ** All other return values are terminal actions -- no further folding is
65 ** applied:
67 ** - INTFOLD(i) returns a reference to the integer constant i.
69 ** - LEFTFOLD and RIGHTFOLD return the left/right operand reference
70 ** without emitting an instruction.
72 ** - CSEFOLD and EMITFOLD pass the instruction directly to CSE or emit
73 ** it without passing through any further optimizations.
75 ** - FAILFOLD, DROPFOLD and CONDFOLD only apply to instructions which have
76 ** no result (e.g. guarded assertions): FAILFOLD means the guard would
77 ** always fail, i.e. the current trace is pointless. DROPFOLD means
78 ** the guard is always true and has been eliminated. CONDFOLD is a
79 ** shortcut for FAILFOLD + cond (i.e. drop if true, otherwise fail).
81 ** - Any other return value is interpreted as an IRRef or TRef. This
82 ** can be a reference to an existing or a newly created instruction.
83 ** Only the least-significant 16 bits (IRRef1) are used to form a TRef
84 ** which is finally returned to the caller.
86 ** The FOLD engine receives instructions both from the trace recorder and
87 ** substituted instructions from LOOP unrolling. This means all types
88 ** of instructions may end up here, even though the recorder bypasses
89 ** FOLD in some cases. Thus all loads, stores and allocations must have
90 ** an any/any rule to avoid being passed on to CSE.
92 ** Carefully read the following requirements before adding or modifying
93 ** any fold rules:
95 ** Requirement #1: All fold rules must preserve their destination type.
97 ** Consistently use INTFOLD() (KINT result) or lj_ir_knum() (KNUM result).
98 ** Never use lj_ir_knumint() which can have either a KINT or KNUM result.
100 ** Requirement #2: Fold rules should not create *new* instructions which
101 ** reference operands *across* PHIs.
103 ** E.g. a RETRYFOLD with 'fins->op1 = fleft->op1' is invalid if the
104 ** left operand is a PHI. Then fleft->op1 would point across the PHI
105 ** frontier to an invariant instruction. Adding a PHI for this instruction
106 ** would be counterproductive. The solution is to add a barrier which
107 ** prevents folding across PHIs, i.e. 'PHIBARRIER(fleft)' in this case.
108 ** The only exception is for recurrences with high latencies like
109 ** repeated int->num->int conversions.
111 ** One could relax this condition a bit if the referenced instruction is
112 ** a PHI, too. But this often leads to worse code due to excessive
113 ** register shuffling.
115 ** Note: returning *existing* instructions (e.g. LEFTFOLD) is ok, though.
116 ** Even returning fleft->op1 would be ok, because a new PHI will added,
117 ** if needed. But again, this leads to excessive register shuffling and
118 ** should be avoided.
120 ** Requirement #3: The set of all fold rules must be monotonic to guarantee
121 ** termination.
123 ** The goal is optimization, so one primarily wants to add strength-reducing
124 ** rules. This means eliminating an instruction or replacing an instruction
125 ** with one or more simpler instructions. Don't add fold rules which point
126 ** into the other direction.
128 ** Some rules (like commutativity) do not directly reduce the strength of
129 ** an instruction, but enable other fold rules (e.g. by moving constants
130 ** to the right operand). These rules must be made unidirectional to avoid
131 ** cycles.
133 ** Rule of thumb: the trace recorder expands the IR and FOLD shrinks it.
136 /* Some local macros to save typing. Undef'd at the end. */
137 #define IR(ref) (&J->cur.ir[(ref)])
138 #define fins (&J->fold.ins)
139 #define fleft (J->fold.left)
140 #define fright (J->fold.right)
141 #define knumleft (ir_knum(fleft)->n)
142 #define knumright (ir_knum(fright)->n)
144 /* Pass IR on to next optimization in chain (FOLD). */
145 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
147 /* Fold function type. Fastcall on x86 significantly reduces their size. */
148 typedef IRRef (LJ_FASTCALL *FoldFunc)(jit_State *J);
150 /* Macros for the fold specs, so buildvm can recognize them. */
151 #define LJFOLD(x)
152 #define LJFOLDX(x)
153 #define LJFOLDF(name) static TRef LJ_FASTCALL fold_##name(jit_State *J)
154 /* Note: They must be at the start of a line or buildvm ignores them! */
156 /* Barrier to prevent using operands across PHIs. */
157 #define PHIBARRIER(ir) if (irt_isphi((ir)->t)) return NEXTFOLD
159 /* Barrier to prevent folding across a GC step.
160 ** GC steps can only happen at the head of a trace and at LOOP.
161 ** And the GC is only driven forward if there's at least one allocation.
163 #define gcstep_barrier(J, ref) \
164 ((ref) < J->chain[IR_LOOP] && \
165 (J->chain[IR_SNEW] || J->chain[IR_XSNEW] || \
166 J->chain[IR_TNEW] || J->chain[IR_TDUP] || \
167 J->chain[IR_CNEW] || J->chain[IR_CNEWI] || \
168 J->chain[IR_BUFSTR] || J->chain[IR_TOSTR] || J->chain[IR_CALLA]))
170 /* -- Constant folding for FP numbers ------------------------------------- */
172 LJFOLD(ADD KNUM KNUM)
173 LJFOLD(SUB KNUM KNUM)
174 LJFOLD(MUL KNUM KNUM)
175 LJFOLD(DIV KNUM KNUM)
176 LJFOLD(LDEXP KNUM KNUM)
177 LJFOLD(MIN KNUM KNUM)
178 LJFOLD(MAX KNUM KNUM)
179 LJFOLDF(kfold_numarith)
181 lua_Number a = knumleft;
182 lua_Number b = knumright;
183 lua_Number y = lj_vm_foldarith(a, b, fins->o - IR_ADD);
184 return lj_ir_knum(J, y);
187 LJFOLD(NEG KNUM FLOAD)
188 LJFOLD(ABS KNUM FLOAD)
189 LJFOLDF(kfold_numabsneg)
191 lua_Number a = knumleft;
192 lua_Number y = lj_vm_foldarith(a, a, fins->o - IR_ADD);
193 return lj_ir_knum(J, y);
196 LJFOLD(LDEXP KNUM KINT)
197 LJFOLDF(kfold_ldexp)
199 #if LJ_TARGET_X86ORX64
200 UNUSED(J);
201 return NEXTFOLD;
202 #else
203 return lj_ir_knum(J, ldexp(knumleft, fright->i));
204 #endif
207 LJFOLD(FPMATH KNUM any)
208 LJFOLDF(kfold_fpmath)
210 lua_Number a = knumleft;
211 lua_Number y = lj_vm_foldfpm(a, fins->op2);
212 return lj_ir_knum(J, y);
215 LJFOLD(CALLN KNUM any)
216 LJFOLDF(kfold_fpcall1)
218 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
219 if (CCI_TYPE(ci) == IRT_NUM) {
220 double y = ((double (*)(double))ci->func)(knumleft);
221 return lj_ir_knum(J, y);
223 return NEXTFOLD;
226 LJFOLD(CALLN CARG IRCALL_atan2)
227 LJFOLDF(kfold_fpcall2)
229 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
230 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
231 double a = ir_knum(IR(fleft->op1))->n;
232 double b = ir_knum(IR(fleft->op2))->n;
233 double y = ((double (*)(double, double))ci->func)(a, b);
234 return lj_ir_knum(J, y);
236 return NEXTFOLD;
239 LJFOLD(POW KNUM KNUM)
240 LJFOLDF(kfold_numpow)
242 return lj_ir_knum(J, lj_vm_foldarith(knumleft, knumright, IR_POW - IR_ADD));
245 /* Must not use kfold_kref for numbers (could be NaN). */
246 LJFOLD(EQ KNUM KNUM)
247 LJFOLD(NE KNUM KNUM)
248 LJFOLD(LT KNUM KNUM)
249 LJFOLD(GE KNUM KNUM)
250 LJFOLD(LE KNUM KNUM)
251 LJFOLD(GT KNUM KNUM)
252 LJFOLD(ULT KNUM KNUM)
253 LJFOLD(UGE KNUM KNUM)
254 LJFOLD(ULE KNUM KNUM)
255 LJFOLD(UGT KNUM KNUM)
256 LJFOLDF(kfold_numcomp)
258 return CONDFOLD(lj_ir_numcmp(knumleft, knumright, (IROp)fins->o));
261 /* -- Constant folding for 32 bit integers -------------------------------- */
263 static int32_t kfold_intop(int32_t k1, int32_t k2, IROp op)
265 switch (op) {
266 case IR_ADD: k1 += k2; break;
267 case IR_SUB: k1 -= k2; break;
268 case IR_MUL: k1 *= k2; break;
269 case IR_MOD: k1 = lj_vm_modi(k1, k2); break;
270 case IR_NEG: k1 = (int32_t)(~(uint32_t)k1+1u); break;
271 case IR_BAND: k1 &= k2; break;
272 case IR_BOR: k1 |= k2; break;
273 case IR_BXOR: k1 ^= k2; break;
274 case IR_BSHL: k1 <<= (k2 & 31); break;
275 case IR_BSHR: k1 = (int32_t)((uint32_t)k1 >> (k2 & 31)); break;
276 case IR_BSAR: k1 >>= (k2 & 31); break;
277 case IR_BROL: k1 = (int32_t)lj_rol((uint32_t)k1, (k2 & 31)); break;
278 case IR_BROR: k1 = (int32_t)lj_ror((uint32_t)k1, (k2 & 31)); break;
279 case IR_MIN: k1 = k1 < k2 ? k1 : k2; break;
280 case IR_MAX: k1 = k1 > k2 ? k1 : k2; break;
281 default: lj_assertX(0, "bad IR op %d", op); break;
283 return k1;
286 LJFOLD(ADD KINT KINT)
287 LJFOLD(SUB KINT KINT)
288 LJFOLD(MUL KINT KINT)
289 LJFOLD(MOD KINT KINT)
290 LJFOLD(NEG KINT KINT)
291 LJFOLD(BAND KINT KINT)
292 LJFOLD(BOR KINT KINT)
293 LJFOLD(BXOR KINT KINT)
294 LJFOLD(BSHL KINT KINT)
295 LJFOLD(BSHR KINT KINT)
296 LJFOLD(BSAR KINT KINT)
297 LJFOLD(BROL KINT KINT)
298 LJFOLD(BROR KINT KINT)
299 LJFOLD(MIN KINT KINT)
300 LJFOLD(MAX KINT KINT)
301 LJFOLDF(kfold_intarith)
303 return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
306 LJFOLD(ADDOV KINT KINT)
307 LJFOLD(SUBOV KINT KINT)
308 LJFOLD(MULOV KINT KINT)
309 LJFOLDF(kfold_intovarith)
311 lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
312 fins->o - IR_ADDOV);
313 int32_t k = lj_num2int(n);
314 if (n != (lua_Number)k)
315 return FAILFOLD;
316 return INTFOLD(k);
319 LJFOLD(BNOT KINT)
320 LJFOLDF(kfold_bnot)
322 return INTFOLD(~fleft->i);
325 LJFOLD(BSWAP KINT)
326 LJFOLDF(kfold_bswap)
328 return INTFOLD((int32_t)lj_bswap((uint32_t)fleft->i));
331 LJFOLD(LT KINT KINT)
332 LJFOLD(GE KINT KINT)
333 LJFOLD(LE KINT KINT)
334 LJFOLD(GT KINT KINT)
335 LJFOLD(ULT KINT KINT)
336 LJFOLD(UGE KINT KINT)
337 LJFOLD(ULE KINT KINT)
338 LJFOLD(UGT KINT KINT)
339 LJFOLD(ABC KINT KINT)
340 LJFOLDF(kfold_intcomp)
342 int32_t a = fleft->i, b = fright->i;
343 switch ((IROp)fins->o) {
344 case IR_LT: return CONDFOLD(a < b);
345 case IR_GE: return CONDFOLD(a >= b);
346 case IR_LE: return CONDFOLD(a <= b);
347 case IR_GT: return CONDFOLD(a > b);
348 case IR_ULT: return CONDFOLD((uint32_t)a < (uint32_t)b);
349 case IR_UGE: return CONDFOLD((uint32_t)a >= (uint32_t)b);
350 case IR_ULE: return CONDFOLD((uint32_t)a <= (uint32_t)b);
351 case IR_ABC:
352 case IR_UGT: return CONDFOLD((uint32_t)a > (uint32_t)b);
353 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
357 LJFOLD(UGE any KINT)
358 LJFOLDF(kfold_intcomp0)
360 if (fright->i == 0)
361 return DROPFOLD;
362 return NEXTFOLD;
365 /* -- Constant folding for 64 bit integers -------------------------------- */
367 static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
368 IROp op)
370 UNUSED(J);
371 #if LJ_HASFFI
372 switch (op) {
373 case IR_ADD: k1 += k2; break;
374 case IR_SUB: k1 -= k2; break;
375 case IR_MUL: k1 *= k2; break;
376 case IR_BAND: k1 &= k2; break;
377 case IR_BOR: k1 |= k2; break;
378 case IR_BXOR: k1 ^= k2; break;
379 case IR_BSHL: k1 <<= (k2 & 63); break;
380 case IR_BSHR: k1 >>= (k2 & 63); break;
381 case IR_BSAR: k1 = (uint64_t)((int64_t)k1 >> (k2 & 63)); break;
382 case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
383 case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
384 default: lj_assertJ(0, "bad IR op %d", op); break;
386 #else
387 UNUSED(k2); UNUSED(op);
388 lj_assertJ(0, "FFI IR op without FFI");
389 #endif
390 return k1;
393 LJFOLD(ADD KINT64 KINT64)
394 LJFOLD(SUB KINT64 KINT64)
395 LJFOLD(MUL KINT64 KINT64)
396 LJFOLD(BAND KINT64 KINT64)
397 LJFOLD(BOR KINT64 KINT64)
398 LJFOLD(BXOR KINT64 KINT64)
399 LJFOLDF(kfold_int64arith)
401 return INT64FOLD(kfold_int64arith(J, ir_k64(fleft)->u64,
402 ir_k64(fright)->u64, (IROp)fins->o));
405 LJFOLD(DIV KINT64 KINT64)
406 LJFOLD(MOD KINT64 KINT64)
407 LJFOLD(POW KINT64 KINT64)
408 LJFOLDF(kfold_int64arith2)
410 #if LJ_HASFFI
411 uint64_t k1 = ir_k64(fleft)->u64, k2 = ir_k64(fright)->u64;
412 if (irt_isi64(fins->t)) {
413 k1 = fins->o == IR_DIV ? lj_carith_divi64((int64_t)k1, (int64_t)k2) :
414 fins->o == IR_MOD ? lj_carith_modi64((int64_t)k1, (int64_t)k2) :
415 lj_carith_powi64((int64_t)k1, (int64_t)k2);
416 } else {
417 k1 = fins->o == IR_DIV ? lj_carith_divu64(k1, k2) :
418 fins->o == IR_MOD ? lj_carith_modu64(k1, k2) :
419 lj_carith_powu64(k1, k2);
421 return INT64FOLD(k1);
422 #else
423 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
424 #endif
427 LJFOLD(BSHL KINT64 KINT)
428 LJFOLD(BSHR KINT64 KINT)
429 LJFOLD(BSAR KINT64 KINT)
430 LJFOLD(BROL KINT64 KINT)
431 LJFOLD(BROR KINT64 KINT)
432 LJFOLDF(kfold_int64shift)
434 #if LJ_HASFFI
435 uint64_t k = ir_k64(fleft)->u64;
436 int32_t sh = (fright->i & 63);
437 return INT64FOLD(lj_carith_shift64(k, sh, fins->o - IR_BSHL));
438 #else
439 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
440 #endif
443 LJFOLD(BNOT KINT64)
444 LJFOLDF(kfold_bnot64)
446 #if LJ_HASFFI
447 return INT64FOLD(~ir_k64(fleft)->u64);
448 #else
449 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
450 #endif
453 LJFOLD(BSWAP KINT64)
454 LJFOLDF(kfold_bswap64)
456 #if LJ_HASFFI
457 return INT64FOLD(lj_bswap64(ir_k64(fleft)->u64));
458 #else
459 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
460 #endif
463 LJFOLD(LT KINT64 KINT64)
464 LJFOLD(GE KINT64 KINT64)
465 LJFOLD(LE KINT64 KINT64)
466 LJFOLD(GT KINT64 KINT64)
467 LJFOLD(ULT KINT64 KINT64)
468 LJFOLD(UGE KINT64 KINT64)
469 LJFOLD(ULE KINT64 KINT64)
470 LJFOLD(UGT KINT64 KINT64)
471 LJFOLDF(kfold_int64comp)
473 #if LJ_HASFFI
474 uint64_t a = ir_k64(fleft)->u64, b = ir_k64(fright)->u64;
475 switch ((IROp)fins->o) {
476 case IR_LT: return CONDFOLD((int64_t)a < (int64_t)b);
477 case IR_GE: return CONDFOLD((int64_t)a >= (int64_t)b);
478 case IR_LE: return CONDFOLD((int64_t)a <= (int64_t)b);
479 case IR_GT: return CONDFOLD((int64_t)a > (int64_t)b);
480 case IR_ULT: return CONDFOLD(a < b);
481 case IR_UGE: return CONDFOLD(a >= b);
482 case IR_ULE: return CONDFOLD(a <= b);
483 case IR_UGT: return CONDFOLD(a > b);
484 default: lj_assertJ(0, "bad IR op %d", fins->o); return FAILFOLD;
486 #else
487 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
488 #endif
491 LJFOLD(UGE any KINT64)
492 LJFOLDF(kfold_int64comp0)
494 #if LJ_HASFFI
495 if (ir_k64(fright)->u64 == 0)
496 return DROPFOLD;
497 return NEXTFOLD;
498 #else
499 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
500 #endif
503 /* -- Constant folding for strings ---------------------------------------- */
505 LJFOLD(SNEW KKPTR KINT)
506 LJFOLDF(kfold_snew_kptr)
508 GCstr *s = lj_str_new(J->L, (const char *)ir_kptr(fleft), (size_t)fright->i);
509 return lj_ir_kstr(J, s);
512 LJFOLD(SNEW any KINT)
513 LJFOLD(XSNEW any KINT)
514 LJFOLDF(kfold_snew_empty)
516 if (fright->i == 0)
517 return lj_ir_kstr(J, &J2G(J)->strempty);
518 return NEXTFOLD;
521 LJFOLD(STRREF KGC KINT)
522 LJFOLDF(kfold_strref)
524 GCstr *str = ir_kstr(fleft);
525 lj_assertJ((MSize)fright->i <= str->len, "bad string ref");
526 return lj_ir_kkptr(J, (char *)strdata(str) + fright->i);
529 LJFOLD(STRREF SNEW any)
530 LJFOLDF(kfold_strref_snew)
532 PHIBARRIER(fleft);
533 if (irref_isk(fins->op2) && fright->i == 0) {
534 return fleft->op1; /* strref(snew(ptr, len), 0) ==> ptr */
535 } else {
536 /* Reassociate: strref(snew(strref(str, a), len), b) ==> strref(str, a+b) */
537 IRIns *ir = IR(fleft->op1);
538 if (ir->o == IR_STRREF) {
539 IRRef1 str = ir->op1; /* IRIns * is not valid across emitir. */
540 PHIBARRIER(ir);
541 fins->op2 = emitir(IRTI(IR_ADD), ir->op2, fins->op2); /* Clobbers fins! */
542 fins->op1 = str;
543 fins->ot = IRT(IR_STRREF, IRT_PGC);
544 return RETRYFOLD;
547 return NEXTFOLD;
550 LJFOLD(CALLN CARG IRCALL_lj_str_cmp)
551 LJFOLDF(kfold_strcmp)
553 if (irref_isk(fleft->op1) && irref_isk(fleft->op2)) {
554 GCstr *a = ir_kstr(IR(fleft->op1));
555 GCstr *b = ir_kstr(IR(fleft->op2));
556 return INTFOLD(lj_str_cmp(a, b));
558 return NEXTFOLD;
561 /* -- Constant folding and forwarding for buffers ------------------------- */
564 ** Buffer ops perform stores, but their effect is limited to the buffer
565 ** itself. Also, buffer ops are chained: a use of an op implies a use of
566 ** all other ops up the chain. Conversely, if an op is unused, all ops
567 ** up the chain can go unsed. This largely eliminates the need to treat
568 ** them as stores.
570 ** Alas, treating them as normal (IRM_N) ops doesn't work, because they
571 ** cannot be CSEd in isolation. CSE for IRM_N is implicitly done in LOOP
572 ** or if FOLD is disabled.
574 ** The compromise is to declare them as loads, emit them like stores and
575 ** CSE whole chains manually when the BUFSTR is to be emitted. Any chain
576 ** fragments left over from CSE are eliminated by DCE.
578 ** The string buffer methods emit a USE instead of a BUFSTR to keep the
579 ** chain alive.
582 LJFOLD(BUFHDR any any)
583 LJFOLDF(bufhdr_merge)
585 return fins->op2 == IRBUFHDR_WRITE ? CSEFOLD : EMITFOLD;
588 LJFOLD(BUFPUT any BUFSTR)
589 LJFOLDF(bufput_bufstr)
591 if ((J->flags & JIT_F_OPT_FWD)) {
592 IRRef hdr = fright->op2;
593 /* New buffer, no other buffer op inbetween and same buffer? */
594 if (fleft->o == IR_BUFHDR && fleft->op2 == IRBUFHDR_RESET &&
595 fleft->prev == hdr &&
596 fleft->op1 == IR(hdr)->op1 &&
597 !(irt_isphi(fright->t) && IR(hdr)->prev) &&
598 (!LJ_HASBUFFER || J->chain[IR_CALLA] < hdr)) {
599 IRRef ref = fins->op1;
600 IR(ref)->op2 = IRBUFHDR_APPEND; /* Modify BUFHDR. */
601 IR(ref)->op1 = fright->op1;
602 return ref;
604 /* Replay puts to global temporary buffer. */
605 if (IR(hdr)->op2 == IRBUFHDR_RESET && !irt_isphi(fright->t)) {
606 IRIns *ir = IR(fright->op1);
607 /* For now only handle single string.reverse .lower .upper .rep. */
608 if (ir->o == IR_CALLL &&
609 ir->op2 >= IRCALL_lj_buf_putstr_reverse &&
610 ir->op2 <= IRCALL_lj_buf_putstr_rep) {
611 IRIns *carg1 = IR(ir->op1);
612 if (ir->op2 == IRCALL_lj_buf_putstr_rep) {
613 IRIns *carg2 = IR(carg1->op1);
614 if (carg2->op1 == hdr) {
615 return lj_ir_call(J, ir->op2, fins->op1, carg2->op2, carg1->op2);
617 } else if (carg1->op1 == hdr) {
618 return lj_ir_call(J, ir->op2, fins->op1, carg1->op2);
623 return EMITFOLD; /* Always emit, CSE later. */
626 LJFOLD(BUFPUT any any)
627 LJFOLDF(bufput_kgc)
629 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fright->o == IR_KGC) {
630 GCstr *s2 = ir_kstr(fright);
631 if (s2->len == 0) { /* Empty string? */
632 return LEFTFOLD;
633 } else {
634 if (fleft->o == IR_BUFPUT && irref_isk(fleft->op2) &&
635 !irt_isphi(fleft->t)) { /* Join two constant string puts in a row. */
636 GCstr *s1 = ir_kstr(IR(fleft->op2));
637 IRRef kref = lj_ir_kstr(J, lj_buf_cat2str(J->L, s1, s2));
638 /* lj_ir_kstr() may realloc the IR and invalidates any IRIns *. */
639 IR(fins->op1)->op2 = kref; /* Modify previous BUFPUT. */
640 return fins->op1;
644 return EMITFOLD; /* Always emit, CSE later. */
647 LJFOLD(BUFSTR any any)
648 LJFOLDF(bufstr_kfold_cse)
650 lj_assertJ(fleft->o == IR_BUFHDR || fleft->o == IR_BUFPUT ||
651 fleft->o == IR_CALLL,
652 "bad buffer constructor IR op %d", fleft->o);
653 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
654 if (fleft->o == IR_BUFHDR) { /* No put operations? */
655 if (fleft->op2 == IRBUFHDR_RESET) /* Empty buffer? */
656 return lj_ir_kstr(J, &J2G(J)->strempty);
657 fins->op1 = fleft->op1;
658 fins->op2 = fleft->prev; /* Relies on checks in bufput_append. */
659 return CSEFOLD;
660 } else if (fleft->o == IR_BUFPUT) {
661 IRIns *irb = IR(fleft->op1);
662 if (irb->o == IR_BUFHDR && irb->op2 == IRBUFHDR_RESET)
663 return fleft->op2; /* Shortcut for a single put operation. */
666 /* Try to CSE the whole chain. */
667 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
668 IRRef ref = J->chain[IR_BUFSTR];
669 while (ref) {
670 IRIns *irs = IR(ref), *ira = fleft, *irb = IR(irs->op1);
671 while (ira->o == irb->o && ira->op2 == irb->op2) {
672 lj_assertJ(ira->o == IR_BUFHDR || ira->o == IR_BUFPUT ||
673 ira->o == IR_CALLL || ira->o == IR_CARG,
674 "bad buffer constructor IR op %d", ira->o);
675 if (ira->o == IR_BUFHDR && ira->op2 == IRBUFHDR_RESET)
676 return ref; /* CSE succeeded. */
677 if (ira->o == IR_CALLL && ira->op2 == IRCALL_lj_buf_puttab)
678 break;
679 ira = IR(ira->op1);
680 irb = IR(irb->op1);
682 ref = irs->prev;
685 return EMITFOLD; /* No CSE possible. */
688 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_reverse)
689 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_upper)
690 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_lower)
691 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putquoted)
692 LJFOLDF(bufput_kfold_op)
694 if (irref_isk(fleft->op2)) {
695 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
696 SBuf *sb = lj_buf_tmp_(J->L);
697 sb = ((SBuf * (LJ_FASTCALL *)(SBuf *, GCstr *))ci->func)(sb,
698 ir_kstr(IR(fleft->op2)));
699 fins->o = IR_BUFPUT;
700 fins->op1 = fleft->op1;
701 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
702 return RETRYFOLD;
704 return EMITFOLD; /* Always emit, CSE later. */
707 LJFOLD(CALLL CARG IRCALL_lj_buf_putstr_rep)
708 LJFOLDF(bufput_kfold_rep)
710 if (irref_isk(fleft->op2)) {
711 IRIns *irc = IR(fleft->op1);
712 if (irref_isk(irc->op2)) {
713 SBuf *sb = lj_buf_tmp_(J->L);
714 sb = lj_buf_putstr_rep(sb, ir_kstr(IR(irc->op2)), IR(fleft->op2)->i);
715 fins->o = IR_BUFPUT;
716 fins->op1 = irc->op1;
717 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
718 return RETRYFOLD;
721 return EMITFOLD; /* Always emit, CSE later. */
724 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfxint)
725 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_int)
726 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum_uint)
727 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfnum)
728 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfstr)
729 LJFOLD(CALLL CARG IRCALL_lj_strfmt_putfchar)
730 LJFOLDF(bufput_kfold_fmt)
732 IRIns *irc = IR(fleft->op1);
733 lj_assertJ(irref_isk(irc->op2), "SFormat must be const");
734 if (irref_isk(fleft->op2)) {
735 SFormat sf = (SFormat)IR(irc->op2)->i;
736 IRIns *ira = IR(fleft->op2);
737 SBuf *sb = lj_buf_tmp_(J->L);
738 switch (fins->op2) {
739 case IRCALL_lj_strfmt_putfxint:
740 sb = lj_strfmt_putfxint(sb, sf, ir_k64(ira)->u64);
741 break;
742 case IRCALL_lj_strfmt_putfstr:
743 sb = lj_strfmt_putfstr(sb, sf, ir_kstr(ira));
744 break;
745 case IRCALL_lj_strfmt_putfchar:
746 sb = lj_strfmt_putfchar(sb, sf, ira->i);
747 break;
748 case IRCALL_lj_strfmt_putfnum_int:
749 case IRCALL_lj_strfmt_putfnum_uint:
750 case IRCALL_lj_strfmt_putfnum:
751 default: {
752 const CCallInfo *ci = &lj_ir_callinfo[fins->op2];
753 sb = ((SBuf * (*)(SBuf *, SFormat, lua_Number))ci->func)(sb, sf,
754 ir_knum(ira)->n);
755 break;
758 fins->o = IR_BUFPUT;
759 fins->op1 = irc->op1;
760 fins->op2 = lj_ir_kstr(J, lj_buf_tostr(sb));
761 return RETRYFOLD;
763 return EMITFOLD; /* Always emit, CSE later. */
766 /* -- Constant folding of pointer arithmetic ------------------------------ */
768 LJFOLD(ADD KGC KINT)
769 LJFOLD(ADD KGC KINT64)
770 LJFOLDF(kfold_add_kgc)
772 GCobj *o = ir_kgc(fleft);
773 #if LJ_64
774 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
775 #else
776 ptrdiff_t ofs = fright->i;
777 #endif
778 #if LJ_HASFFI
779 if (irt_iscdata(fleft->t)) {
780 CType *ct = ctype_raw(ctype_ctsG(J2G(J)), gco2cd(o)->ctypeid);
781 if (ctype_isnum(ct->info) || ctype_isenum(ct->info) ||
782 ctype_isptr(ct->info) || ctype_isfunc(ct->info) ||
783 ctype_iscomplex(ct->info) || ctype_isvector(ct->info))
784 return lj_ir_kkptr(J, (char *)o + ofs);
786 #endif
787 return lj_ir_kptr(J, (char *)o + ofs);
790 LJFOLD(ADD KPTR KINT)
791 LJFOLD(ADD KPTR KINT64)
792 LJFOLD(ADD KKPTR KINT)
793 LJFOLD(ADD KKPTR KINT64)
794 LJFOLDF(kfold_add_kptr)
796 void *p = ir_kptr(fleft);
797 #if LJ_64
798 ptrdiff_t ofs = (ptrdiff_t)ir_kint64(fright)->u64;
799 #else
800 ptrdiff_t ofs = fright->i;
801 #endif
802 return lj_ir_kptr_(J, fleft->o, (char *)p + ofs);
805 LJFOLD(ADD any KGC)
806 LJFOLD(ADD any KPTR)
807 LJFOLD(ADD any KKPTR)
808 LJFOLDF(kfold_add_kright)
810 if (fleft->o == IR_KINT || fleft->o == IR_KINT64) {
811 IRRef1 tmp = fins->op1; fins->op1 = fins->op2; fins->op2 = tmp;
812 return RETRYFOLD;
814 return NEXTFOLD;
817 /* -- Constant folding of conversions ------------------------------------- */
819 LJFOLD(TOBIT KNUM KNUM)
820 LJFOLDF(kfold_tobit)
822 return INTFOLD(lj_num2bit(knumleft));
825 LJFOLD(CONV KINT IRCONV_NUM_INT)
826 LJFOLDF(kfold_conv_kint_num)
828 return lj_ir_knum(J, (lua_Number)fleft->i);
831 LJFOLD(CONV KINT IRCONV_NUM_U32)
832 LJFOLDF(kfold_conv_kintu32_num)
834 return lj_ir_knum(J, (lua_Number)(uint32_t)fleft->i);
837 LJFOLD(CONV KINT IRCONV_INT_I8)
838 LJFOLD(CONV KINT IRCONV_INT_U8)
839 LJFOLD(CONV KINT IRCONV_INT_I16)
840 LJFOLD(CONV KINT IRCONV_INT_U16)
841 LJFOLDF(kfold_conv_kint_ext)
843 int32_t k = fleft->i;
844 if ((fins->op2 & IRCONV_SRCMASK) == IRT_I8) k = (int8_t)k;
845 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_U8) k = (uint8_t)k;
846 else if ((fins->op2 & IRCONV_SRCMASK) == IRT_I16) k = (int16_t)k;
847 else k = (uint16_t)k;
848 return INTFOLD(k);
851 LJFOLD(CONV KINT IRCONV_I64_INT)
852 LJFOLD(CONV KINT IRCONV_U64_INT)
853 LJFOLD(CONV KINT IRCONV_I64_U32)
854 LJFOLD(CONV KINT IRCONV_U64_U32)
855 LJFOLDF(kfold_conv_kint_i64)
857 if ((fins->op2 & IRCONV_SEXT))
858 return INT64FOLD((uint64_t)(int64_t)fleft->i);
859 else
860 return INT64FOLD((uint64_t)(int64_t)(uint32_t)fleft->i);
863 LJFOLD(CONV KINT64 IRCONV_NUM_I64)
864 LJFOLDF(kfold_conv_kint64_num_i64)
866 return lj_ir_knum(J, (lua_Number)(int64_t)ir_kint64(fleft)->u64);
869 LJFOLD(CONV KINT64 IRCONV_NUM_U64)
870 LJFOLDF(kfold_conv_kint64_num_u64)
872 return lj_ir_knum(J, (lua_Number)ir_kint64(fleft)->u64);
875 LJFOLD(CONV KINT64 IRCONV_INT_I64)
876 LJFOLD(CONV KINT64 IRCONV_U32_I64)
877 LJFOLDF(kfold_conv_kint64_int_i64)
879 return INTFOLD((int32_t)ir_kint64(fleft)->u64);
882 LJFOLD(CONV KNUM IRCONV_INT_NUM)
883 LJFOLDF(kfold_conv_knum_int_num)
885 lua_Number n = knumleft;
886 int32_t k = lj_num2int(n);
887 if (irt_isguard(fins->t) && n != (lua_Number)k) {
888 /* We're about to create a guard which always fails, like CONV +1.5.
889 ** Some pathological loops cause this during LICM, e.g.:
890 ** local x,k,t = 0,1.5,{1,[1.5]=2}
891 ** for i=1,200 do x = x+ t[k]; k = k == 1 and 1.5 or 1 end
892 ** assert(x == 300)
894 return FAILFOLD;
896 return INTFOLD(k);
899 LJFOLD(CONV KNUM IRCONV_U32_NUM)
900 LJFOLDF(kfold_conv_knum_u32_num)
902 #ifdef _MSC_VER
903 { /* Workaround for MSVC bug. */
904 volatile uint32_t u = (uint32_t)knumleft;
905 return INTFOLD((int32_t)u);
907 #else
908 return INTFOLD((int32_t)(uint32_t)knumleft);
909 #endif
912 LJFOLD(CONV KNUM IRCONV_I64_NUM)
913 LJFOLDF(kfold_conv_knum_i64_num)
915 return INT64FOLD((uint64_t)(int64_t)knumleft);
918 LJFOLD(CONV KNUM IRCONV_U64_NUM)
919 LJFOLDF(kfold_conv_knum_u64_num)
921 return INT64FOLD(lj_num2u64(knumleft));
924 LJFOLD(TOSTR KNUM any)
925 LJFOLDF(kfold_tostr_knum)
927 return lj_ir_kstr(J, lj_strfmt_num(J->L, ir_knum(fleft)));
930 LJFOLD(TOSTR KINT any)
931 LJFOLDF(kfold_tostr_kint)
933 return lj_ir_kstr(J, fins->op2 == IRTOSTR_INT ?
934 lj_strfmt_int(J->L, fleft->i) :
935 lj_strfmt_char(J->L, fleft->i));
938 LJFOLD(STRTO KGC)
939 LJFOLDF(kfold_strto)
941 TValue n;
942 if (lj_strscan_num(ir_kstr(fleft), &n))
943 return lj_ir_knum(J, numV(&n));
944 return FAILFOLD;
947 /* -- Constant folding of equality checks --------------------------------- */
949 /* Don't constant-fold away FLOAD checks against KNULL. */
950 LJFOLD(EQ FLOAD KNULL)
951 LJFOLD(NE FLOAD KNULL)
952 LJFOLDX(lj_opt_cse)
954 /* But fold all other KNULL compares, since only KNULL is equal to KNULL. */
955 LJFOLD(EQ any KNULL)
956 LJFOLD(NE any KNULL)
957 LJFOLD(EQ KNULL any)
958 LJFOLD(NE KNULL any)
959 LJFOLD(EQ KINT KINT) /* Constants are unique, so same refs <==> same value. */
960 LJFOLD(NE KINT KINT)
961 LJFOLD(EQ KINT64 KINT64)
962 LJFOLD(NE KINT64 KINT64)
963 LJFOLD(EQ KGC KGC)
964 LJFOLD(NE KGC KGC)
965 LJFOLDF(kfold_kref)
967 return CONDFOLD((fins->op1 == fins->op2) ^ (fins->o == IR_NE));
970 /* -- Algebraic shortcuts ------------------------------------------------- */
972 LJFOLD(FPMATH FPMATH IRFPM_FLOOR)
973 LJFOLD(FPMATH FPMATH IRFPM_CEIL)
974 LJFOLD(FPMATH FPMATH IRFPM_TRUNC)
975 LJFOLDF(shortcut_round)
977 IRFPMathOp op = (IRFPMathOp)fleft->op2;
978 if (op == IRFPM_FLOOR || op == IRFPM_CEIL || op == IRFPM_TRUNC)
979 return LEFTFOLD; /* round(round_left(x)) = round_left(x) */
980 return NEXTFOLD;
983 LJFOLD(ABS ABS FLOAD)
984 LJFOLDF(shortcut_left)
986 return LEFTFOLD; /* f(g(x)) ==> g(x) */
989 LJFOLD(ABS NEG FLOAD)
990 LJFOLDF(shortcut_dropleft)
992 PHIBARRIER(fleft);
993 fins->op1 = fleft->op1; /* abs(neg(x)) ==> abs(x) */
994 return RETRYFOLD;
997 /* Note: no safe shortcuts with STRTO and TOSTR ("1e2" ==> +100 ==> "100"). */
998 LJFOLD(NEG NEG any)
999 LJFOLD(BNOT BNOT)
1000 LJFOLD(BSWAP BSWAP)
1001 LJFOLDF(shortcut_leftleft)
1003 PHIBARRIER(fleft); /* See above. Fold would be ok, but not beneficial. */
1004 return fleft->op1; /* f(g(x)) ==> x */
1007 /* -- FP algebraic simplifications ---------------------------------------- */
1009 /* FP arithmetic is tricky -- there's not much to simplify.
1010 ** Please note the following common pitfalls before sending "improvements":
1011 ** x+0 ==> x is INVALID for x=-0
1012 ** 0-x ==> -x is INVALID for x=+0
1013 ** x*0 ==> 0 is INVALID for x=-0, x=+-Inf or x=NaN
1016 LJFOLD(ADD NEG any)
1017 LJFOLDF(simplify_numadd_negx)
1019 PHIBARRIER(fleft);
1020 fins->o = IR_SUB; /* (-a) + b ==> b - a */
1021 fins->op1 = fins->op2;
1022 fins->op2 = fleft->op1;
1023 return RETRYFOLD;
1026 LJFOLD(ADD any NEG)
1027 LJFOLDF(simplify_numadd_xneg)
1029 PHIBARRIER(fright);
1030 fins->o = IR_SUB; /* a + (-b) ==> a - b */
1031 fins->op2 = fright->op1;
1032 return RETRYFOLD;
1035 LJFOLD(SUB any KNUM)
1036 LJFOLDF(simplify_numsub_k)
1038 if (ir_knum(fright)->u64 == 0) /* x - (+0) ==> x */
1039 return LEFTFOLD;
1040 return NEXTFOLD;
1043 LJFOLD(SUB NEG KNUM)
1044 LJFOLDF(simplify_numsub_negk)
1046 PHIBARRIER(fleft);
1047 fins->op2 = fleft->op1; /* (-x) - k ==> (-k) - x */
1048 fins->op1 = (IRRef1)lj_ir_knum(J, -knumright);
1049 return RETRYFOLD;
1052 LJFOLD(SUB any NEG)
1053 LJFOLDF(simplify_numsub_xneg)
1055 PHIBARRIER(fright);
1056 fins->o = IR_ADD; /* a - (-b) ==> a + b */
1057 fins->op2 = fright->op1;
1058 return RETRYFOLD;
1061 LJFOLD(MUL any KNUM)
1062 LJFOLD(DIV any KNUM)
1063 LJFOLDF(simplify_nummuldiv_k)
1065 lua_Number n = knumright;
1066 if (n == 1.0) { /* x o 1 ==> x */
1067 return LEFTFOLD;
1068 } else if (n == -1.0) { /* x o -1 ==> -x */
1069 IRRef op1 = fins->op1;
1070 fins->op2 = (IRRef1)lj_ir_ksimd(J, LJ_KSIMD_NEG); /* Modifies fins. */
1071 fins->op1 = op1;
1072 fins->o = IR_NEG;
1073 return RETRYFOLD;
1074 } else if (fins->o == IR_MUL && n == 2.0) { /* x * 2 ==> x + x */
1075 fins->o = IR_ADD;
1076 fins->op2 = fins->op1;
1077 return RETRYFOLD;
1078 } else if (fins->o == IR_DIV) { /* x / 2^k ==> x * 2^-k */
1079 uint64_t u = ir_knum(fright)->u64;
1080 uint32_t ex = ((uint32_t)(u >> 52) & 0x7ff);
1081 if ((u & U64x(000fffff,ffffffff)) == 0 && ex - 1 < 0x7fd) {
1082 u = (u & ((uint64_t)1 << 63)) | ((uint64_t)(0x7fe - ex) << 52);
1083 fins->o = IR_MUL; /* Multiply by exact reciprocal. */
1084 fins->op2 = lj_ir_knum_u64(J, u);
1085 return RETRYFOLD;
1088 return NEXTFOLD;
1091 LJFOLD(MUL NEG KNUM)
1092 LJFOLD(DIV NEG KNUM)
1093 LJFOLDF(simplify_nummuldiv_negk)
1095 PHIBARRIER(fleft);
1096 fins->op1 = fleft->op1; /* (-a) o k ==> a o (-k) */
1097 fins->op2 = (IRRef1)lj_ir_knum(J, -knumright);
1098 return RETRYFOLD;
1101 LJFOLD(MUL NEG NEG)
1102 LJFOLD(DIV NEG NEG)
1103 LJFOLDF(simplify_nummuldiv_negneg)
1105 PHIBARRIER(fleft);
1106 PHIBARRIER(fright);
1107 fins->op1 = fleft->op1; /* (-a) o (-b) ==> a o b */
1108 fins->op2 = fright->op1;
1109 return RETRYFOLD;
1112 LJFOLD(POW any KNUM)
1113 LJFOLDF(simplify_numpow_k)
1115 if (knumright == 0.0) /* x ^ 0 ==> 1 */
1116 return lj_ir_knum_one(J); /* Result must be a number, not an int. */
1117 else if (knumright == 1.0) /* x ^ 1 ==> x */
1118 return LEFTFOLD;
1119 else if (knumright == 2.0) /* x ^ 2 ==> x * x */
1120 return emitir(IRTN(IR_MUL), fins->op1, fins->op1);
1121 else
1122 return NEXTFOLD;
1125 /* -- Simplify conversions ------------------------------------------------ */
1127 LJFOLD(CONV CONV IRCONV_NUM_INT) /* _NUM */
1128 LJFOLDF(shortcut_conv_num_int)
1130 PHIBARRIER(fleft);
1131 /* Only safe with a guarded conversion to int. */
1132 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_NUM && irt_isguard(fleft->t))
1133 return fleft->op1; /* f(g(x)) ==> x */
1134 return NEXTFOLD;
1137 LJFOLD(CONV CONV IRCONV_INT_NUM) /* _INT */
1138 LJFOLD(CONV CONV IRCONV_U32_NUM) /* _U32 */
1139 LJFOLDF(simplify_conv_int_num)
1141 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1142 if ((fleft->op2 & IRCONV_SRCMASK) ==
1143 ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH))
1144 return fleft->op1;
1145 return NEXTFOLD;
1148 LJFOLD(CONV CONV IRCONV_I64_NUM) /* _INT or _U32 */
1149 LJFOLD(CONV CONV IRCONV_U64_NUM) /* _INT or _U32 */
1150 LJFOLDF(simplify_conv_i64_num)
1152 PHIBARRIER(fleft);
1153 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1154 /* Reduce to a sign-extension. */
1155 fins->op1 = fleft->op1;
1156 fins->op2 = ((IRT_I64<<5)|IRT_INT|IRCONV_SEXT);
1157 return RETRYFOLD;
1158 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1159 #if LJ_TARGET_X64
1160 return fleft->op1;
1161 #else
1162 /* Reduce to a zero-extension. */
1163 fins->op1 = fleft->op1;
1164 fins->op2 = (IRT_I64<<5)|IRT_U32;
1165 return RETRYFOLD;
1166 #endif
1168 return NEXTFOLD;
1171 LJFOLD(CONV CONV IRCONV_INT_I64) /* _INT or _U32 */
1172 LJFOLD(CONV CONV IRCONV_INT_U64) /* _INT or _U32 */
1173 LJFOLD(CONV CONV IRCONV_INT_U32) /* _INT or _U32 */
1174 LJFOLD(CONV CONV IRCONV_U32_I64) /* _INT or _U32 */
1175 LJFOLD(CONV CONV IRCONV_U32_U64) /* _INT or _U32 */
1176 LJFOLD(CONV CONV IRCONV_U32_INT) /* _INT or _U32 */
1177 LJFOLDF(simplify_conv_int_i64)
1179 int src;
1180 PHIBARRIER(fleft);
1181 src = (fleft->op2 & IRCONV_SRCMASK);
1182 if (src == IRT_INT || src == IRT_U32) {
1183 if (src == ((fins->op2 & IRCONV_DSTMASK) >> IRCONV_DSH)) {
1184 return fleft->op1;
1185 } else {
1186 fins->op2 = ((fins->op2 & IRCONV_DSTMASK) | src);
1187 fins->op1 = fleft->op1;
1188 return RETRYFOLD;
1191 return NEXTFOLD;
1194 LJFOLD(CONV CONV IRCONV_FLOAT_NUM) /* _FLOAT */
1195 LJFOLDF(simplify_conv_flt_num)
1197 PHIBARRIER(fleft);
1198 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_FLOAT)
1199 return fleft->op1;
1200 return NEXTFOLD;
1203 /* Shortcut TOBIT + IRT_NUM <- IRT_INT/IRT_U32 conversion. */
1204 LJFOLD(TOBIT CONV KNUM)
1205 LJFOLDF(simplify_tobit_conv)
1207 /* Fold even across PHI to avoid expensive num->int conversions in loop. */
1208 if ((fleft->op2 & IRCONV_SRCMASK) == IRT_INT) {
1209 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1210 return fleft->op1;
1211 } else if ((fleft->op2 & IRCONV_SRCMASK) == IRT_U32) {
1212 lj_assertJ(irt_isnum(fleft->t), "expected TOBIT number arg");
1213 fins->o = IR_CONV;
1214 fins->op1 = fleft->op1;
1215 fins->op2 = (IRT_INT<<5)|IRT_U32;
1216 return RETRYFOLD;
1218 return NEXTFOLD;
1221 /* Shortcut floor/ceil/trunc + IRT_NUM <- integer conversion. */
1222 LJFOLD(FPMATH CONV IRFPM_FLOOR)
1223 LJFOLD(FPMATH CONV IRFPM_CEIL)
1224 LJFOLD(FPMATH CONV IRFPM_TRUNC)
1225 LJFOLDF(simplify_floor_conv)
1227 if ((uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8))
1228 return LEFTFOLD;
1229 return NEXTFOLD;
1232 /* Strength reduction of widening. */
1233 LJFOLD(CONV any IRCONV_I64_INT)
1234 LJFOLD(CONV any IRCONV_U64_INT)
1235 LJFOLDF(simplify_conv_sext)
1237 IRRef ref = fins->op1;
1238 int64_t ofs = 0;
1239 if (!(fins->op2 & IRCONV_SEXT))
1240 return NEXTFOLD;
1241 PHIBARRIER(fleft);
1242 if (fleft->o == IR_XLOAD && (irt_isu8(fleft->t) || irt_isu16(fleft->t)))
1243 goto ok_reduce;
1244 if (fleft->o == IR_ADD && irref_isk(fleft->op2)) {
1245 ofs = (int64_t)IR(fleft->op2)->i;
1246 ref = fleft->op1;
1248 /* Use scalar evolution analysis results to strength-reduce sign-extension. */
1249 if (ref == J->scev.idx) {
1250 IRRef lo = J->scev.dir ? J->scev.start : J->scev.stop;
1251 lj_assertJ(irt_isint(J->scev.t), "only int SCEV supported");
1252 if (lo && IR(lo)->o == IR_KINT && IR(lo)->i + ofs >= 0) {
1253 ok_reduce:
1254 #if LJ_TARGET_X64
1255 /* Eliminate widening. All 32 bit ops do an implicit zero-extension. */
1256 return LEFTFOLD;
1257 #else
1258 /* Reduce to a (cheaper) zero-extension. */
1259 fins->op2 &= ~IRCONV_SEXT;
1260 return RETRYFOLD;
1261 #endif
1264 return NEXTFOLD;
1267 /* Strength reduction of narrowing. */
1268 LJFOLD(CONV ADD IRCONV_INT_I64)
1269 LJFOLD(CONV SUB IRCONV_INT_I64)
1270 LJFOLD(CONV MUL IRCONV_INT_I64)
1271 LJFOLD(CONV ADD IRCONV_INT_U64)
1272 LJFOLD(CONV SUB IRCONV_INT_U64)
1273 LJFOLD(CONV MUL IRCONV_INT_U64)
1274 LJFOLD(CONV ADD IRCONV_U32_I64)
1275 LJFOLD(CONV SUB IRCONV_U32_I64)
1276 LJFOLD(CONV MUL IRCONV_U32_I64)
1277 LJFOLD(CONV ADD IRCONV_U32_U64)
1278 LJFOLD(CONV SUB IRCONV_U32_U64)
1279 LJFOLD(CONV MUL IRCONV_U32_U64)
1280 LJFOLDF(simplify_conv_narrow)
1282 #if LJ_64
1283 UNUSED(J);
1284 return NEXTFOLD;
1285 #else
1286 IROp op = (IROp)fleft->o;
1287 IRType t = irt_type(fins->t);
1288 IRRef op1 = fleft->op1, op2 = fleft->op2, mode = fins->op2;
1289 PHIBARRIER(fleft);
1290 op1 = emitir(IRT(IR_CONV, t), op1, mode);
1291 op2 = emitir(IRT(IR_CONV, t), op2, mode);
1292 fins->ot = IRT(op, t);
1293 fins->op1 = op1;
1294 fins->op2 = op2;
1295 return RETRYFOLD;
1296 #endif
1299 /* Special CSE rule for CONV. */
1300 LJFOLD(CONV any any)
1301 LJFOLDF(cse_conv)
1303 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
1304 IRRef op1 = fins->op1, op2 = (fins->op2 & IRCONV_MODEMASK);
1305 uint8_t guard = irt_isguard(fins->t);
1306 IRRef ref = J->chain[IR_CONV];
1307 while (ref > op1) {
1308 IRIns *ir = IR(ref);
1309 /* Commoning with stronger checks is ok. */
1310 if (ir->op1 == op1 && (ir->op2 & IRCONV_MODEMASK) == op2 &&
1311 irt_isguard(ir->t) >= guard)
1312 return ref;
1313 ref = ir->prev;
1316 return EMITFOLD; /* No fallthrough to regular CSE. */
1319 /* FP conversion narrowing. */
1320 LJFOLD(TOBIT ADD KNUM)
1321 LJFOLD(TOBIT SUB KNUM)
1322 LJFOLD(CONV ADD IRCONV_INT_NUM)
1323 LJFOLD(CONV SUB IRCONV_INT_NUM)
1324 LJFOLD(CONV ADD IRCONV_I64_NUM)
1325 LJFOLD(CONV SUB IRCONV_I64_NUM)
1326 LJFOLDF(narrow_convert)
1328 PHIBARRIER(fleft);
1329 /* Narrowing ignores PHIs and repeating it inside the loop is not useful. */
1330 if (J->chain[IR_LOOP])
1331 return NEXTFOLD;
1332 lj_assertJ(fins->o != IR_CONV || (fins->op2&IRCONV_CONVMASK) != IRCONV_TOBIT,
1333 "unexpected CONV TOBIT");
1334 return lj_opt_narrow_convert(J);
1337 /* -- Integer algebraic simplifications ----------------------------------- */
1339 LJFOLD(ADD any KINT)
1340 LJFOLD(ADDOV any KINT)
1341 LJFOLD(SUBOV any KINT)
1342 LJFOLDF(simplify_intadd_k)
1344 if (fright->i == 0) /* i o 0 ==> i */
1345 return LEFTFOLD;
1346 return NEXTFOLD;
1349 LJFOLD(MULOV any KINT)
1350 LJFOLDF(simplify_intmul_k)
1352 if (fright->i == 0) /* i * 0 ==> 0 */
1353 return RIGHTFOLD;
1354 if (fright->i == 1) /* i * 1 ==> i */
1355 return LEFTFOLD;
1356 if (fright->i == 2) { /* i * 2 ==> i + i */
1357 fins->o = IR_ADDOV;
1358 fins->op2 = fins->op1;
1359 return RETRYFOLD;
1361 return NEXTFOLD;
1364 LJFOLD(SUB any KINT)
1365 LJFOLDF(simplify_intsub_k)
1367 if (fright->i == 0) /* i - 0 ==> i */
1368 return LEFTFOLD;
1369 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1370 fins->op2 = (IRRef1)lj_ir_kint(J, (int32_t)(~(uint32_t)fright->i+1u)); /* Overflow for -2^31 ok. */
1371 return RETRYFOLD;
1374 LJFOLD(SUB KINT any)
1375 LJFOLD(SUB KINT64 any)
1376 LJFOLDF(simplify_intsub_kleft)
1378 if (fleft->o == IR_KINT ? (fleft->i == 0) : (ir_kint64(fleft)->u64 == 0)) {
1379 fins->o = IR_NEG; /* 0 - i ==> -i */
1380 fins->op1 = fins->op2;
1381 return RETRYFOLD;
1383 return NEXTFOLD;
1386 LJFOLD(ADD any KINT64)
1387 LJFOLDF(simplify_intadd_k64)
1389 if (ir_kint64(fright)->u64 == 0) /* i + 0 ==> i */
1390 return LEFTFOLD;
1391 return NEXTFOLD;
1394 LJFOLD(SUB any KINT64)
1395 LJFOLDF(simplify_intsub_k64)
1397 uint64_t k = ir_kint64(fright)->u64;
1398 if (k == 0) /* i - 0 ==> i */
1399 return LEFTFOLD;
1400 fins->o = IR_ADD; /* i - k ==> i + (-k) */
1401 fins->op2 = (IRRef1)lj_ir_kint64(J, ~k+1u);
1402 return RETRYFOLD;
1405 static TRef simplify_intmul_k(jit_State *J, int32_t k)
1407 /* Note: many more simplifications are possible, e.g. 2^k1 +- 2^k2.
1408 ** But this is mainly intended for simple address arithmetic.
1409 ** Also it's easier for the backend to optimize the original multiplies.
1411 if (k == 0) { /* i * 0 ==> 0 */
1412 return RIGHTFOLD;
1413 } else if (k == 1) { /* i * 1 ==> i */
1414 return LEFTFOLD;
1415 } else if ((k & (k-1)) == 0) { /* i * 2^k ==> i << k */
1416 fins->o = IR_BSHL;
1417 fins->op2 = lj_ir_kint(J, lj_fls((uint32_t)k));
1418 return RETRYFOLD;
1420 return NEXTFOLD;
1423 LJFOLD(MUL any KINT)
1424 LJFOLDF(simplify_intmul_k32)
1426 if (fright->i >= 0)
1427 return simplify_intmul_k(J, fright->i);
1428 return NEXTFOLD;
1431 LJFOLD(MUL any KINT64)
1432 LJFOLDF(simplify_intmul_k64)
1434 #if LJ_HASFFI
1435 if (ir_kint64(fright)->u64 < 0x80000000u)
1436 return simplify_intmul_k(J, (int32_t)ir_kint64(fright)->u64);
1437 return NEXTFOLD;
1438 #else
1439 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1440 #endif
1443 LJFOLD(MOD any KINT)
1444 LJFOLDF(simplify_intmod_k)
1446 int32_t k = fright->i;
1447 lj_assertJ(k != 0, "integer mod 0");
1448 if (k > 0 && (k & (k-1)) == 0) { /* i % (2^k) ==> i & (2^k-1) */
1449 fins->o = IR_BAND;
1450 fins->op2 = lj_ir_kint(J, k-1);
1451 return RETRYFOLD;
1453 return NEXTFOLD;
1456 LJFOLD(MOD KINT any)
1457 LJFOLDF(simplify_intmod_kleft)
1459 if (fleft->i == 0)
1460 return INTFOLD(0);
1461 return NEXTFOLD;
1464 LJFOLD(SUB any any)
1465 LJFOLD(SUBOV any any)
1466 LJFOLDF(simplify_intsub)
1468 if (fins->op1 == fins->op2 && !irt_isnum(fins->t)) /* i - i ==> 0 */
1469 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
1470 return NEXTFOLD;
1473 LJFOLD(SUB ADD any)
1474 LJFOLDF(simplify_intsubadd_leftcancel)
1476 if (!irt_isnum(fins->t)) {
1477 PHIBARRIER(fleft);
1478 if (fins->op2 == fleft->op1) /* (i + j) - i ==> j */
1479 return fleft->op2;
1480 if (fins->op2 == fleft->op2) /* (i + j) - j ==> i */
1481 return fleft->op1;
1483 return NEXTFOLD;
1486 LJFOLD(SUB SUB any)
1487 LJFOLDF(simplify_intsubsub_leftcancel)
1489 if (!irt_isnum(fins->t)) {
1490 PHIBARRIER(fleft);
1491 if (fins->op2 == fleft->op1) { /* (i - j) - i ==> 0 - j */
1492 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1493 fins->op2 = fleft->op2;
1494 return RETRYFOLD;
1497 return NEXTFOLD;
1500 LJFOLD(SUB any SUB)
1501 LJFOLDF(simplify_intsubsub_rightcancel)
1503 if (!irt_isnum(fins->t)) {
1504 PHIBARRIER(fright);
1505 if (fins->op1 == fright->op1) /* i - (i - j) ==> j */
1506 return fright->op2;
1508 return NEXTFOLD;
1511 LJFOLD(SUB any ADD)
1512 LJFOLDF(simplify_intsubadd_rightcancel)
1514 if (!irt_isnum(fins->t)) {
1515 PHIBARRIER(fright);
1516 if (fins->op1 == fright->op1) { /* i - (i + j) ==> 0 - j */
1517 fins->op2 = fright->op2;
1518 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1519 return RETRYFOLD;
1521 if (fins->op1 == fright->op2) { /* i - (j + i) ==> 0 - j */
1522 fins->op2 = fright->op1;
1523 fins->op1 = (IRRef1)lj_ir_kint(J, 0);
1524 return RETRYFOLD;
1527 return NEXTFOLD;
1530 LJFOLD(SUB ADD ADD)
1531 LJFOLDF(simplify_intsubaddadd_cancel)
1533 if (!irt_isnum(fins->t)) {
1534 PHIBARRIER(fleft);
1535 PHIBARRIER(fright);
1536 if (fleft->op1 == fright->op1) { /* (i + j1) - (i + j2) ==> j1 - j2 */
1537 fins->op1 = fleft->op2;
1538 fins->op2 = fright->op2;
1539 return RETRYFOLD;
1541 if (fleft->op1 == fright->op2) { /* (i + j1) - (j2 + i) ==> j1 - j2 */
1542 fins->op1 = fleft->op2;
1543 fins->op2 = fright->op1;
1544 return RETRYFOLD;
1546 if (fleft->op2 == fright->op1) { /* (j1 + i) - (i + j2) ==> j1 - j2 */
1547 fins->op1 = fleft->op1;
1548 fins->op2 = fright->op2;
1549 return RETRYFOLD;
1551 if (fleft->op2 == fright->op2) { /* (j1 + i) - (j2 + i) ==> j1 - j2 */
1552 fins->op1 = fleft->op1;
1553 fins->op2 = fright->op1;
1554 return RETRYFOLD;
1557 return NEXTFOLD;
1560 LJFOLD(BAND any KINT)
1561 LJFOLD(BAND any KINT64)
1562 LJFOLDF(simplify_band_k)
1564 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1565 (int64_t)ir_k64(fright)->u64;
1566 if (k == 0) /* i & 0 ==> 0 */
1567 return RIGHTFOLD;
1568 if (k == -1) /* i & -1 ==> i */
1569 return LEFTFOLD;
1570 return NEXTFOLD;
1573 LJFOLD(BOR any KINT)
1574 LJFOLD(BOR any KINT64)
1575 LJFOLDF(simplify_bor_k)
1577 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1578 (int64_t)ir_k64(fright)->u64;
1579 if (k == 0) /* i | 0 ==> i */
1580 return LEFTFOLD;
1581 if (k == -1) /* i | -1 ==> -1 */
1582 return RIGHTFOLD;
1583 return NEXTFOLD;
1586 LJFOLD(BXOR any KINT)
1587 LJFOLD(BXOR any KINT64)
1588 LJFOLDF(simplify_bxor_k)
1590 int64_t k = fright->o == IR_KINT ? (int64_t)fright->i :
1591 (int64_t)ir_k64(fright)->u64;
1592 if (k == 0) /* i xor 0 ==> i */
1593 return LEFTFOLD;
1594 if (k == -1) { /* i xor -1 ==> ~i */
1595 fins->o = IR_BNOT;
1596 fins->op2 = 0;
1597 return RETRYFOLD;
1599 return NEXTFOLD;
1602 LJFOLD(BSHL any KINT)
1603 LJFOLD(BSHR any KINT)
1604 LJFOLD(BSAR any KINT)
1605 LJFOLD(BROL any KINT)
1606 LJFOLD(BROR any KINT)
1607 LJFOLDF(simplify_shift_ik)
1609 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1610 int32_t k = (fright->i & mask);
1611 if (k == 0) /* i o 0 ==> i */
1612 return LEFTFOLD;
1613 if (k == 1 && fins->o == IR_BSHL) { /* i << 1 ==> i + i */
1614 fins->o = IR_ADD;
1615 fins->op2 = fins->op1;
1616 return RETRYFOLD;
1618 if (k != fright->i) { /* i o k ==> i o (k & mask) */
1619 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1620 return RETRYFOLD;
1622 #ifndef LJ_TARGET_UNIFYROT
1623 if (fins->o == IR_BROR) { /* bror(i, k) ==> brol(i, (-k)&mask) */
1624 fins->o = IR_BROL;
1625 fins->op2 = (IRRef1)lj_ir_kint(J, (-k)&mask);
1626 return RETRYFOLD;
1628 #endif
1629 return NEXTFOLD;
1632 LJFOLD(BSHL any BAND)
1633 LJFOLD(BSHR any BAND)
1634 LJFOLD(BSAR any BAND)
1635 LJFOLD(BROL any BAND)
1636 LJFOLD(BROR any BAND)
1637 LJFOLDF(simplify_shift_andk)
1639 IRIns *irk = IR(fright->op2);
1640 PHIBARRIER(fright);
1641 if ((fins->o < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
1642 irk->o == IR_KINT) { /* i o (j & mask) ==> i o j */
1643 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1644 int32_t k = irk->i & mask;
1645 if (k == mask) {
1646 fins->op2 = fright->op1;
1647 return RETRYFOLD;
1650 return NEXTFOLD;
1653 LJFOLD(BSHL KINT any)
1654 LJFOLD(BSHR KINT any)
1655 LJFOLD(BSHL KINT64 any)
1656 LJFOLD(BSHR KINT64 any)
1657 LJFOLDF(simplify_shift1_ki)
1659 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1660 (int64_t)ir_k64(fleft)->u64;
1661 if (k == 0) /* 0 o i ==> 0 */
1662 return LEFTFOLD;
1663 return NEXTFOLD;
1666 LJFOLD(BSAR KINT any)
1667 LJFOLD(BROL KINT any)
1668 LJFOLD(BROR KINT any)
1669 LJFOLD(BSAR KINT64 any)
1670 LJFOLD(BROL KINT64 any)
1671 LJFOLD(BROR KINT64 any)
1672 LJFOLDF(simplify_shift2_ki)
1674 int64_t k = fleft->o == IR_KINT ? (int64_t)fleft->i :
1675 (int64_t)ir_k64(fleft)->u64;
1676 if (k == 0 || k == -1) /* 0 o i ==> 0; -1 o i ==> -1 */
1677 return LEFTFOLD;
1678 return NEXTFOLD;
1681 LJFOLD(BSHL BAND KINT)
1682 LJFOLD(BSHR BAND KINT)
1683 LJFOLD(BROL BAND KINT)
1684 LJFOLD(BROR BAND KINT)
1685 LJFOLDF(simplify_shiftk_andk)
1687 IRIns *irk = IR(fleft->op2);
1688 PHIBARRIER(fleft);
1689 if (irk->o == IR_KINT) { /* (i & k1) o k2 ==> (i o k2) & (k1 o k2) */
1690 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1691 fins->op1 = fleft->op1;
1692 fins->op1 = (IRRef1)lj_opt_fold(J);
1693 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1694 fins->ot = IRTI(IR_BAND);
1695 return RETRYFOLD;
1696 } else if (irk->o == IR_KINT64) {
1697 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, fright->i,
1698 (IROp)fins->o);
1699 IROpT ot = fleft->ot;
1700 fins->op1 = fleft->op1;
1701 fins->op1 = (IRRef1)lj_opt_fold(J);
1702 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1703 fins->ot = ot;
1704 return RETRYFOLD;
1706 return NEXTFOLD;
1709 LJFOLD(BAND BSHL KINT)
1710 LJFOLD(BAND BSHR KINT)
1711 LJFOLDF(simplify_andk_shiftk)
1713 IRIns *irk = IR(fleft->op2);
1714 if (irk->o == IR_KINT &&
1715 kfold_intop(-1, irk->i, (IROp)fleft->o) == fright->i)
1716 return LEFTFOLD; /* (i o k1) & k2 ==> i, if (-1 o k1) == k2 */
1717 return NEXTFOLD;
1720 LJFOLD(BAND BOR KINT)
1721 LJFOLD(BOR BAND KINT)
1722 LJFOLDF(simplify_andor_k)
1724 IRIns *irk = IR(fleft->op2);
1725 PHIBARRIER(fleft);
1726 if (irk->o == IR_KINT) {
1727 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1728 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1729 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1730 if (k == (fins->o == IR_BAND ? 0 : -1)) {
1731 fins->op1 = fleft->op1;
1732 return RETRYFOLD;
1735 return NEXTFOLD;
1738 LJFOLD(BAND BOR KINT64)
1739 LJFOLD(BOR BAND KINT64)
1740 LJFOLDF(simplify_andor_k64)
1742 #if LJ_HASFFI
1743 IRIns *irk = IR(fleft->op2);
1744 PHIBARRIER(fleft);
1745 if (irk->o == IR_KINT64) {
1746 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1747 (IROp)fins->o);
1748 /* (i | k1) & k2 ==> i & k2, if (k1 & k2) == 0. */
1749 /* (i & k1) | k2 ==> i | k2, if (k1 | k2) == -1. */
1750 if (k == (fins->o == IR_BAND ? (uint64_t)0 : ~(uint64_t)0)) {
1751 fins->op1 = fleft->op1;
1752 return RETRYFOLD;
1755 return NEXTFOLD;
1756 #else
1757 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1758 #endif
1761 /* -- Reassociation ------------------------------------------------------- */
1763 LJFOLD(ADD ADD KINT)
1764 LJFOLD(MUL MUL KINT)
1765 LJFOLD(BAND BAND KINT)
1766 LJFOLD(BOR BOR KINT)
1767 LJFOLD(BXOR BXOR KINT)
1768 LJFOLDF(reassoc_intarith_k)
1770 IRIns *irk = IR(fleft->op2);
1771 if (irk->o == IR_KINT) {
1772 int32_t k = kfold_intop(irk->i, fright->i, (IROp)fins->o);
1773 if (k == irk->i) /* (i o k1) o k2 ==> i o k1, if (k1 o k2) == k1. */
1774 return LEFTFOLD;
1775 PHIBARRIER(fleft);
1776 fins->op1 = fleft->op1;
1777 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1778 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1780 return NEXTFOLD;
1783 LJFOLD(ADD ADD KINT64)
1784 LJFOLD(MUL MUL KINT64)
1785 LJFOLD(BAND BAND KINT64)
1786 LJFOLD(BOR BOR KINT64)
1787 LJFOLD(BXOR BXOR KINT64)
1788 LJFOLDF(reassoc_intarith_k64)
1790 #if LJ_HASFFI
1791 IRIns *irk = IR(fleft->op2);
1792 if (irk->o == IR_KINT64) {
1793 uint64_t k = kfold_int64arith(J, ir_k64(irk)->u64, ir_k64(fright)->u64,
1794 (IROp)fins->o);
1795 PHIBARRIER(fleft);
1796 fins->op1 = fleft->op1;
1797 fins->op2 = (IRRef1)lj_ir_kint64(J, k);
1798 return RETRYFOLD; /* (i o k1) o k2 ==> i o (k1 o k2) */
1800 return NEXTFOLD;
1801 #else
1802 UNUSED(J); lj_assertJ(0, "FFI IR op without FFI"); return FAILFOLD;
1803 #endif
1806 LJFOLD(BAND BAND any)
1807 LJFOLD(BOR BOR any)
1808 LJFOLDF(reassoc_dup)
1810 if (fins->op2 == fleft->op1 || fins->op2 == fleft->op2)
1811 return LEFTFOLD; /* (a o b) o a ==> a o b; (a o b) o b ==> a o b */
1812 return NEXTFOLD;
1815 LJFOLD(MIN MIN any)
1816 LJFOLD(MAX MAX any)
1817 LJFOLDF(reassoc_dup_minmax)
1819 if (fins->op2 == fleft->op2)
1820 return LEFTFOLD; /* (a o b) o b ==> a o b */
1821 return NEXTFOLD;
1824 LJFOLD(BXOR BXOR any)
1825 LJFOLDF(reassoc_bxor)
1827 PHIBARRIER(fleft);
1828 if (fins->op2 == fleft->op1) /* (a xor b) xor a ==> b */
1829 return fleft->op2;
1830 if (fins->op2 == fleft->op2) /* (a xor b) xor b ==> a */
1831 return fleft->op1;
1832 return NEXTFOLD;
1835 LJFOLD(BSHL BSHL KINT)
1836 LJFOLD(BSHR BSHR KINT)
1837 LJFOLD(BSAR BSAR KINT)
1838 LJFOLD(BROL BROL KINT)
1839 LJFOLD(BROR BROR KINT)
1840 LJFOLDF(reassoc_shift)
1842 IRIns *irk = IR(fleft->op2);
1843 PHIBARRIER(fleft); /* The (shift any KINT) rule covers k2 == 0 and more. */
1844 if (irk->o == IR_KINT) { /* (i o k1) o k2 ==> i o (k1 + k2) */
1845 int32_t mask = irt_is64(fins->t) ? 63 : 31;
1846 int32_t k = (irk->i & mask) + (fright->i & mask);
1847 if (k > mask) { /* Combined shift too wide? */
1848 if (fins->o == IR_BSHL || fins->o == IR_BSHR)
1849 return mask == 31 ? INTFOLD(0) : INT64FOLD(0);
1850 else if (fins->o == IR_BSAR)
1851 k = mask;
1852 else
1853 k &= mask;
1855 fins->op1 = fleft->op1;
1856 fins->op2 = (IRRef1)lj_ir_kint(J, k);
1857 return RETRYFOLD;
1859 return NEXTFOLD;
1862 LJFOLD(MIN MIN KINT)
1863 LJFOLD(MAX MAX KINT)
1864 LJFOLDF(reassoc_minmax_k)
1866 IRIns *irk = IR(fleft->op2);
1867 if (irk->o == IR_KINT) {
1868 int32_t a = irk->i;
1869 int32_t y = kfold_intop(a, fright->i, fins->o);
1870 if (a == y) /* (x o k1) o k2 ==> x o k1, if (k1 o k2) == k1. */
1871 return LEFTFOLD;
1872 PHIBARRIER(fleft);
1873 fins->op1 = fleft->op1;
1874 fins->op2 = (IRRef1)lj_ir_kint(J, y);
1875 return RETRYFOLD; /* (x o k1) o k2 ==> x o (k1 o k2) */
1877 return NEXTFOLD;
1880 /* -- Array bounds check elimination -------------------------------------- */
1882 /* Eliminate ABC across PHIs to handle t[i-1] forwarding case.
1883 ** ABC(asize, (i+k)+(-k)) ==> ABC(asize, i), but only if it already exists.
1884 ** Could be generalized to (i+k1)+k2 ==> i+(k1+k2), but needs better disambig.
1886 LJFOLD(ABC any ADD)
1887 LJFOLDF(abc_fwd)
1889 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1890 if (irref_isk(fright->op2)) {
1891 IRIns *add2 = IR(fright->op1);
1892 if (add2->o == IR_ADD && irref_isk(add2->op2) &&
1893 IR(fright->op2)->i == -IR(add2->op2)->i) {
1894 IRRef ref = J->chain[IR_ABC];
1895 IRRef lim = add2->op1;
1896 if (fins->op1 > lim) lim = fins->op1;
1897 while (ref > lim) {
1898 IRIns *ir = IR(ref);
1899 if (ir->op1 == fins->op1 && ir->op2 == add2->op1)
1900 return DROPFOLD;
1901 ref = ir->prev;
1906 return NEXTFOLD;
1909 /* Eliminate ABC for constants.
1910 ** ABC(asize, k1), ABC(asize k2) ==> ABC(asize, max(k1, k2))
1911 ** Drop second ABC if k2 is lower. Otherwise patch first ABC with k2.
1913 LJFOLD(ABC any KINT)
1914 LJFOLDF(abc_k)
1916 PHIBARRIER(fleft);
1917 if (LJ_LIKELY(J->flags & JIT_F_OPT_ABC)) {
1918 IRRef ref = J->chain[IR_ABC];
1919 IRRef asize = fins->op1;
1920 while (ref > asize) {
1921 IRIns *ir = IR(ref);
1922 if (ir->op1 == asize && irref_isk(ir->op2)) {
1923 uint32_t k = (uint32_t)IR(ir->op2)->i;
1924 if ((uint32_t)fright->i > k)
1925 ir->op2 = fins->op2;
1926 return DROPFOLD;
1928 ref = ir->prev;
1930 return EMITFOLD; /* Already performed CSE. */
1932 return NEXTFOLD;
1935 /* Eliminate invariant ABC inside loop. */
1936 LJFOLD(ABC any any)
1937 LJFOLDF(abc_invar)
1939 /* Invariant ABC marked as P32 or U32. Drop if op1 is invariant too. */
1940 if (!irt_isint(fins->t) && fins->op1 < J->chain[IR_LOOP] &&
1941 (irt_isu32(fins->t) ||
1942 (!irref_isk(fins->op1) && !irt_isphi(IR(fins->op1)->t))))
1943 return DROPFOLD;
1944 return NEXTFOLD;
1947 /* -- Commutativity ------------------------------------------------------- */
1949 /* The refs of commutative ops are canonicalized. Lower refs go to the right.
1950 ** Rationale behind this:
1951 ** - It (also) moves constants to the right.
1952 ** - It reduces the number of FOLD rules (e.g. (BOR any KINT) suffices).
1953 ** - It helps CSE to find more matches.
1954 ** - The assembler generates better code with constants at the right.
1957 LJFOLD(ADD any any)
1958 LJFOLD(MUL any any)
1959 LJFOLD(ADDOV any any)
1960 LJFOLD(MULOV any any)
1961 LJFOLDF(comm_swap)
1963 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1964 IRRef1 tmp = fins->op1;
1965 fins->op1 = fins->op2;
1966 fins->op2 = tmp;
1967 return RETRYFOLD;
1969 return NEXTFOLD;
1972 LJFOLD(EQ any any)
1973 LJFOLD(NE any any)
1974 LJFOLDF(comm_equal)
1976 /* For non-numbers only: x == x ==> drop; x ~= x ==> fail */
1977 if (fins->op1 == fins->op2 &&
1978 (!irt_isnum(fins->t) ||
1979 (fleft->o == IR_CONV && /* Converted integers cannot be NaN. */
1980 (uint32_t)(fleft->op2 & IRCONV_SRCMASK) - (uint32_t)IRT_I8 <= (uint32_t)(IRT_U64 - IRT_U8))))
1981 return CONDFOLD(fins->o == IR_EQ);
1982 return fold_comm_swap(J);
1985 LJFOLD(LT any any)
1986 LJFOLD(GE any any)
1987 LJFOLD(LE any any)
1988 LJFOLD(GT any any)
1989 LJFOLD(ULT any any)
1990 LJFOLD(UGE any any)
1991 LJFOLD(ULE any any)
1992 LJFOLD(UGT any any)
1993 LJFOLDF(comm_comp)
1995 /* For non-numbers only: x <=> x ==> drop; x <> x ==> fail */
1996 if (fins->op1 == fins->op2 && !irt_isnum(fins->t))
1997 return CONDFOLD((fins->o ^ (fins->o >> 1)) & 1);
1998 if (fins->op1 < fins->op2) { /* Move lower ref to the right. */
1999 IRRef1 tmp = fins->op1;
2000 fins->op1 = fins->op2;
2001 fins->op2 = tmp;
2002 fins->o ^= 3; /* GT <-> LT, GE <-> LE, does not affect U */
2003 return RETRYFOLD;
2005 return NEXTFOLD;
2008 LJFOLD(BAND any any)
2009 LJFOLD(BOR any any)
2010 LJFOLDF(comm_dup)
2012 if (fins->op1 == fins->op2) /* x o x ==> x */
2013 return LEFTFOLD;
2014 return fold_comm_swap(J);
2017 LJFOLD(MIN any any)
2018 LJFOLD(MAX any any)
2019 LJFOLDF(comm_dup_minmax)
2021 if (fins->op1 == fins->op2) /* x o x ==> x */
2022 return LEFTFOLD;
2023 return NEXTFOLD;
2026 LJFOLD(BXOR any any)
2027 LJFOLDF(comm_bxor)
2029 if (fins->op1 == fins->op2) /* i xor i ==> 0 */
2030 return irt_is64(fins->t) ? INT64FOLD(0) : INTFOLD(0);
2031 return fold_comm_swap(J);
2034 /* -- Simplification of compound expressions ------------------------------ */
2036 static TRef kfold_xload(jit_State *J, IRIns *ir, const void *p)
2038 int32_t k;
2039 switch (irt_type(ir->t)) {
2040 case IRT_NUM: return lj_ir_knum_u64(J, *(uint64_t *)p);
2041 case IRT_I8: k = (int32_t)*(int8_t *)p; break;
2042 case IRT_U8: k = (int32_t)*(uint8_t *)p; break;
2043 case IRT_I16: k = (int32_t)(int16_t)lj_getu16(p); break;
2044 case IRT_U16: k = (int32_t)(uint16_t)lj_getu16(p); break;
2045 case IRT_INT: case IRT_U32: k = (int32_t)lj_getu32(p); break;
2046 case IRT_I64: case IRT_U64: return lj_ir_kint64(J, *(uint64_t *)p);
2047 default: return 0;
2049 return lj_ir_kint(J, k);
2052 /* Turn: string.sub(str, a, b) == kstr
2053 ** into: string.byte(str, a) == string.byte(kstr, 1) etc.
2054 ** Note: this creates unaligned XLOADs on x86/x64.
2056 LJFOLD(EQ SNEW KGC)
2057 LJFOLD(NE SNEW KGC)
2058 LJFOLDF(merge_eqne_snew_kgc)
2060 GCstr *kstr = ir_kstr(fright);
2061 int32_t len = (int32_t)kstr->len;
2062 lj_assertJ(irt_isstr(fins->t), "bad equality IR type");
2064 #if LJ_TARGET_UNALIGNED
2065 #define FOLD_SNEW_MAX_LEN 4 /* Handle string lengths 0, 1, 2, 3, 4. */
2066 #define FOLD_SNEW_TYPE8 IRT_I8 /* Creates shorter immediates. */
2067 #else
2068 #define FOLD_SNEW_MAX_LEN 1 /* Handle string lengths 0 or 1. */
2069 #define FOLD_SNEW_TYPE8 IRT_U8 /* Prefer unsigned loads. */
2070 #endif
2072 PHIBARRIER(fleft);
2073 if (len <= FOLD_SNEW_MAX_LEN) {
2074 IROp op = (IROp)fins->o;
2075 IRRef strref = fleft->op1;
2076 if (IR(strref)->o != IR_STRREF)
2077 return NEXTFOLD;
2078 if (op == IR_EQ) {
2079 emitir(IRTGI(IR_EQ), fleft->op2, lj_ir_kint(J, len));
2080 /* Caveat: fins/fleft/fright is no longer valid after emitir. */
2081 } else {
2082 /* NE is not expanded since this would need an OR of two conds. */
2083 if (!irref_isk(fleft->op2)) /* Only handle the constant length case. */
2084 return NEXTFOLD;
2085 if (IR(fleft->op2)->i != len)
2086 return DROPFOLD;
2088 if (len > 0) {
2089 /* A 4 byte load for length 3 is ok -- all strings have an extra NUL. */
2090 uint16_t ot = (uint16_t)(len == 1 ? IRT(IR_XLOAD, FOLD_SNEW_TYPE8) :
2091 len == 2 ? IRT(IR_XLOAD, IRT_U16) :
2092 IRTI(IR_XLOAD));
2093 TRef tmp = emitir(ot, strref,
2094 IRXLOAD_READONLY | (len > 1 ? IRXLOAD_UNALIGNED : 0));
2095 TRef val = kfold_xload(J, IR(tref_ref(tmp)), strdata(kstr));
2096 if (len == 3)
2097 tmp = emitir(IRTI(IR_BAND), tmp,
2098 lj_ir_kint(J, LJ_ENDIAN_SELECT(0x00ffffff, 0xffffff00)));
2099 fins->op1 = (IRRef1)tmp;
2100 fins->op2 = (IRRef1)val;
2101 fins->ot = (IROpT)IRTGI(op);
2102 return RETRYFOLD;
2103 } else {
2104 return DROPFOLD;
2107 return NEXTFOLD;
2110 /* -- Loads --------------------------------------------------------------- */
2112 /* Loads cannot be folded or passed on to CSE in general.
2113 ** Alias analysis is needed to check for forwarding opportunities.
2115 ** Caveat: *all* loads must be listed here or they end up at CSE!
2118 LJFOLD(ALOAD any)
2119 LJFOLDX(lj_opt_fwd_aload)
2121 /* From HREF fwd (see below). Must eliminate, not supported by fwd/backend. */
2122 LJFOLD(HLOAD KKPTR)
2123 LJFOLDF(kfold_hload_kkptr)
2125 UNUSED(J);
2126 lj_assertJ(ir_kptr(fleft) == niltvg(J2G(J)), "expected niltv");
2127 return TREF_NIL;
2130 LJFOLD(HLOAD any)
2131 LJFOLDX(lj_opt_fwd_hload)
2133 LJFOLD(ULOAD any)
2134 LJFOLDX(lj_opt_fwd_uload)
2136 LJFOLD(ALEN any any)
2137 LJFOLDX(lj_opt_fwd_alen)
2139 /* Try to merge UREFO/UREFC into referenced instruction. */
2140 static TRef merge_uref(jit_State *J, IRRef ref, IRIns* ir)
2142 if (ir->o == IR_UREFO && irt_isguard(ir->t)) {
2143 /* Might be pointing to some other coroutine's stack.
2144 ** And GC might shrink said stack, thereby repointing the upvalue.
2145 ** GC might even collect said coroutine, thereby closing the upvalue.
2147 if (gcstep_barrier(J, ref))
2148 return EMITFOLD; /* So cannot merge. */
2149 /* Current fins wants a check, but ir doesn't have one. */
2150 if ((irt_t(fins->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC) &&
2151 irt_type(ir->t) == IRT_IGC)
2152 ir->t.irt += IRT_PGC-IRT_IGC; /* So install a check. */
2154 return ref; /* Not a TRef, but the caller doesn't care. */
2157 /* Upvalue refs are really loads, but there are no corresponding stores.
2158 ** So CSE is ok for them, except for guarded UREFO across a GC step.
2159 ** If the referenced function is const, its upvalue addresses are const, too.
2160 ** This can be used to improve CSE by looking for the same address,
2161 ** even if the upvalues originate from a different function.
2163 LJFOLD(UREFO KGC any)
2164 LJFOLD(UREFC KGC any)
2165 LJFOLDF(cse_uref)
2167 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2168 IRRef ref = J->chain[fins->o];
2169 GCfunc *fn = ir_kfunc(fleft);
2170 GCupval *uv = gco2uv(gcref(fn->l.uvptr[(fins->op2 >> 8)]));
2171 while (ref > 0) {
2172 IRIns *ir = IR(ref);
2173 if (irref_isk(ir->op1)) {
2174 GCfunc *fn2 = ir_kfunc(IR(ir->op1));
2175 if (gco2uv(gcref(fn2->l.uvptr[(ir->op2 >> 8)])) == uv) {
2176 return merge_uref(J, ref, ir);
2179 ref = ir->prev;
2182 return EMITFOLD;
2185 /* Custom CSE for UREFO. */
2186 LJFOLD(UREFO any any)
2187 LJFOLDF(cse_urefo)
2189 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2190 IRRef ref = J->chain[IR_UREFO];
2191 IRRef lim = fins->op1;
2192 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2193 while (ref > lim) {
2194 IRIns *ir = IR(ref);
2195 if (ir->op12 == op12)
2196 return merge_uref(J, ref, ir);
2197 ref = ir->prev;
2200 return EMITFOLD;
2203 LJFOLD(HREFK any any)
2204 LJFOLDX(lj_opt_fwd_hrefk)
2206 LJFOLD(HREF TNEW any)
2207 LJFOLDF(fwd_href_tnew)
2209 if (lj_opt_fwd_href_nokey(J))
2210 return lj_ir_kkptr(J, niltvg(J2G(J)));
2211 return NEXTFOLD;
2214 LJFOLD(HREF TDUP KPRI)
2215 LJFOLD(HREF TDUP KGC)
2216 LJFOLD(HREF TDUP KNUM)
2217 LJFOLDF(fwd_href_tdup)
2219 TValue keyv;
2220 cTValue *val;
2221 lj_ir_kvalue(J->L, &keyv, fright);
2222 val = lj_tab_get(J->L, ir_ktab(IR(fleft->op1)), &keyv);
2223 /* Check for either nil or the nil value marker in the template table. */
2224 if ((tvisnil(val) || tvistab(val)) && lj_opt_fwd_href_nokey(J))
2225 return lj_ir_kkptr(J, niltvg(J2G(J)));
2226 return NEXTFOLD;
2229 /* We can safely FOLD/CSE array/hash refs and field loads, since there
2230 ** are no corresponding stores. But we need to check for any NEWREF with
2231 ** an aliased table, as it may invalidate all of the pointers and fields.
2232 ** Only HREF needs the NEWREF check -- AREF and HREFK already depend on
2233 ** FLOADs. And NEWREF itself is treated like a store (see below).
2234 ** LREF is constant (per trace) since coroutine switches are not inlined.
2236 LJFOLD(FLOAD TNEW IRFL_TAB_ASIZE)
2237 LJFOLDF(fload_tab_tnew_asize)
2239 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2240 return INTFOLD(fleft->op1);
2241 return NEXTFOLD;
2244 LJFOLD(FLOAD TNEW IRFL_TAB_HMASK)
2245 LJFOLDF(fload_tab_tnew_hmask)
2247 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2248 return INTFOLD((1 << fleft->op2)-1);
2249 return NEXTFOLD;
2252 LJFOLD(FLOAD TDUP IRFL_TAB_ASIZE)
2253 LJFOLDF(fload_tab_tdup_asize)
2255 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2256 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->asize);
2257 return NEXTFOLD;
2260 LJFOLD(FLOAD TDUP IRFL_TAB_HMASK)
2261 LJFOLDF(fload_tab_tdup_hmask)
2263 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && lj_opt_fwd_tptr(J, fins->op1))
2264 return INTFOLD((int32_t)ir_ktab(IR(fleft->op1))->hmask);
2265 return NEXTFOLD;
2268 LJFOLD(HREF any any)
2269 LJFOLD(FLOAD any IRFL_TAB_ARRAY)
2270 LJFOLD(FLOAD any IRFL_TAB_NODE)
2271 LJFOLD(FLOAD any IRFL_TAB_ASIZE)
2272 LJFOLD(FLOAD any IRFL_TAB_HMASK)
2273 LJFOLDF(fload_tab_ah)
2275 TRef tr = lj_opt_cse(J);
2276 return lj_opt_fwd_tptr(J, tref_ref(tr)) ? tr : EMITFOLD;
2279 /* Strings are immutable, so we can safely FOLD/CSE the related FLOAD. */
2280 LJFOLD(FLOAD KGC IRFL_STR_LEN)
2281 LJFOLDF(fload_str_len_kgc)
2283 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2284 return INTFOLD((int32_t)ir_kstr(fleft)->len);
2285 return NEXTFOLD;
2288 LJFOLD(FLOAD SNEW IRFL_STR_LEN)
2289 LJFOLDF(fload_str_len_snew)
2291 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2292 PHIBARRIER(fleft);
2293 return fleft->op2;
2295 return NEXTFOLD;
2298 LJFOLD(FLOAD TOSTR IRFL_STR_LEN)
2299 LJFOLDF(fload_str_len_tostr)
2301 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD) && fleft->op2 == IRTOSTR_CHAR)
2302 return INTFOLD(1);
2303 return NEXTFOLD;
2306 LJFOLD(FLOAD any IRFL_SBUF_W)
2307 LJFOLD(FLOAD any IRFL_SBUF_E)
2308 LJFOLD(FLOAD any IRFL_SBUF_B)
2309 LJFOLD(FLOAD any IRFL_SBUF_L)
2310 LJFOLD(FLOAD any IRFL_SBUF_REF)
2311 LJFOLD(FLOAD any IRFL_SBUF_R)
2312 LJFOLDF(fload_sbuf)
2314 TRef tr = lj_opt_fwd_fload(J);
2315 return lj_opt_fwd_sbuf(J, tref_ref(tr)) ? tr : EMITFOLD;
2318 /* The fast function ID of function objects is immutable. */
2319 LJFOLD(FLOAD KGC IRFL_FUNC_FFID)
2320 LJFOLDF(fload_func_ffid_kgc)
2322 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2323 return INTFOLD((int32_t)ir_kfunc(fleft)->c.ffid);
2324 return NEXTFOLD;
2327 /* The C type ID of cdata objects is immutable. */
2328 LJFOLD(FLOAD KGC IRFL_CDATA_CTYPEID)
2329 LJFOLDF(fload_cdata_typeid_kgc)
2331 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2332 return INTFOLD((int32_t)ir_kcdata(fleft)->ctypeid);
2333 return NEXTFOLD;
2336 /* Get the contents of immutable cdata objects. */
2337 LJFOLD(FLOAD KGC IRFL_CDATA_PTR)
2338 LJFOLD(FLOAD KGC IRFL_CDATA_INT)
2339 LJFOLD(FLOAD KGC IRFL_CDATA_INT64)
2340 LJFOLDF(fload_cdata_int64_kgc)
2342 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD)) {
2343 void *p = cdataptr(ir_kcdata(fleft));
2344 if (irt_is64(fins->t))
2345 return INT64FOLD(*(uint64_t *)p);
2346 else
2347 return INTFOLD(*(int32_t *)p);
2349 return NEXTFOLD;
2352 LJFOLD(FLOAD CNEW IRFL_CDATA_CTYPEID)
2353 LJFOLD(FLOAD CNEWI IRFL_CDATA_CTYPEID)
2354 LJFOLDF(fload_cdata_typeid_cnew)
2356 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2357 return fleft->op1; /* No PHI barrier needed. CNEW/CNEWI op1 is const. */
2358 return NEXTFOLD;
2361 /* Pointer, int and int64 cdata objects are immutable. */
2362 LJFOLD(FLOAD CNEWI IRFL_CDATA_PTR)
2363 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT)
2364 LJFOLD(FLOAD CNEWI IRFL_CDATA_INT64)
2365 LJFOLDF(fload_cdata_ptr_int64_cnew)
2367 if (LJ_LIKELY(J->flags & JIT_F_OPT_FOLD))
2368 return fleft->op2; /* Fold even across PHI to avoid allocations. */
2369 return NEXTFOLD;
2372 LJFOLD(FLOAD any IRFL_STR_LEN)
2373 LJFOLD(FLOAD any IRFL_FUNC_ENV)
2374 LJFOLD(FLOAD any IRFL_THREAD_ENV)
2375 LJFOLD(FLOAD any IRFL_CDATA_CTYPEID)
2376 LJFOLD(FLOAD any IRFL_CDATA_PTR)
2377 LJFOLD(FLOAD any IRFL_CDATA_INT)
2378 LJFOLD(FLOAD any IRFL_CDATA_INT64)
2379 LJFOLD(VLOAD any any) /* Vararg loads have no corresponding stores. */
2380 LJFOLDX(lj_opt_cse)
2382 /* All other field loads need alias analysis. */
2383 LJFOLD(FLOAD any any)
2384 LJFOLDX(lj_opt_fwd_fload)
2386 /* This is for LOOP only. Recording handles SLOADs internally. */
2387 LJFOLD(SLOAD any any)
2388 LJFOLDF(fwd_sload)
2390 if ((fins->op2 & IRSLOAD_FRAME)) {
2391 TRef tr = lj_opt_cse(J);
2392 return tref_ref(tr) < J->chain[IR_RETF] ? EMITFOLD : tr;
2393 } else {
2394 lj_assertJ(J->slot[fins->op1] != 0, "uninitialized slot accessed");
2395 return J->slot[fins->op1];
2399 /* Only fold for KKPTR. The pointer _and_ the contents must be const. */
2400 LJFOLD(XLOAD KKPTR any)
2401 LJFOLDF(xload_kptr)
2403 TRef tr = kfold_xload(J, fins, ir_kptr(fleft));
2404 return tr ? tr : NEXTFOLD;
2407 LJFOLD(XLOAD any any)
2408 LJFOLDX(lj_opt_fwd_xload)
2410 /* -- Frame handling ------------------------------------------------------ */
2412 /* Prevent CSE of a REF_BASE operand across IR_RETF. */
2413 LJFOLD(SUB any BASE)
2414 LJFOLD(SUB BASE any)
2415 LJFOLD(EQ any BASE)
2416 LJFOLDF(fold_base)
2418 return lj_opt_cselim(J, J->chain[IR_RETF]);
2421 /* -- Write barriers ------------------------------------------------------ */
2423 /* Write barriers are amenable to CSE, but not across any incremental
2424 ** GC steps.
2426 LJFOLD(TBAR any)
2427 LJFOLD(OBAR any any)
2428 LJFOLDF(barrier_tab)
2430 TRef tr = lj_opt_cse(J);
2431 if (gcstep_barrier(J, tref_ref(tr))) /* CSE across GC step? */
2432 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2433 return tr;
2436 LJFOLD(TBAR TNEW)
2437 LJFOLD(TBAR TDUP)
2438 LJFOLDF(barrier_tnew_tdup)
2440 /* New tables are always white and never need a barrier. */
2441 if (fins->op1 < J->chain[IR_LOOP]) /* Except across a GC step. */
2442 return NEXTFOLD;
2443 return DROPFOLD;
2446 /* -- Profiling ----------------------------------------------------------- */
2448 LJFOLD(PROF any any)
2449 LJFOLDF(prof)
2451 IRRef ref = J->chain[IR_PROF];
2452 if (ref+1 == J->cur.nins) /* Drop neighbouring IR_PROF. */
2453 return ref;
2454 return EMITFOLD;
2457 /* -- Stores and allocations ---------------------------------------------- */
2459 /* Stores and allocations cannot be folded or passed on to CSE in general.
2460 ** But some stores can be eliminated with dead-store elimination (DSE).
2462 ** Caveat: *all* stores and allocs must be listed here or they end up at CSE!
2465 LJFOLD(ASTORE any any)
2466 LJFOLD(HSTORE any any)
2467 LJFOLDX(lj_opt_dse_ahstore)
2469 LJFOLD(USTORE any any)
2470 LJFOLDX(lj_opt_dse_ustore)
2472 LJFOLD(FSTORE any any)
2473 LJFOLDX(lj_opt_dse_fstore)
2475 LJFOLD(XSTORE any any)
2476 LJFOLDX(lj_opt_dse_xstore)
2478 LJFOLD(NEWREF any any) /* Treated like a store. */
2479 LJFOLD(TMPREF any any)
2480 LJFOLD(CALLA any any)
2481 LJFOLD(CALLL any any) /* Safeguard fallback. */
2482 LJFOLD(CALLS any any)
2483 LJFOLD(CALLXS any any)
2484 LJFOLD(XBAR)
2485 LJFOLD(RETF any any) /* Modifies BASE. */
2486 LJFOLD(TNEW any any)
2487 LJFOLD(TDUP any)
2488 LJFOLD(CNEW any any)
2489 LJFOLD(XSNEW any any)
2490 LJFOLDX(lj_ir_emit)
2492 /* -- Miscellaneous ------------------------------------------------------- */
2494 LJFOLD(CARG any any)
2495 LJFOLDF(cse_carg)
2497 TRef tr = lj_opt_cse(J);
2498 if (tref_ref(tr) < J->chain[IR_LOOP]) /* CSE across loop? */
2499 return EMITFOLD; /* Raw emit. Assumes fins is left intact by CSE. */
2500 return tr;
2503 /* ------------------------------------------------------------------------ */
2505 /* Every entry in the generated hash table is a 32 bit pattern:
2507 ** xxxxxxxx iiiiiii lllllll rrrrrrrrrr
2509 ** xxxxxxxx = 8 bit index into fold function table
2510 ** iiiiiii = 7 bit folded instruction opcode
2511 ** lllllll = 7 bit left instruction opcode
2512 ** rrrrrrrrrr = 8 bit right instruction opcode or 10 bits from literal field
2515 #include "lj_folddef.h"
2517 /* ------------------------------------------------------------------------ */
2519 /* Fold IR instruction. */
2520 TRef LJ_FASTCALL lj_opt_fold(jit_State *J)
2522 uint32_t key, any;
2523 IRRef ref;
2525 if (LJ_UNLIKELY((J->flags & JIT_F_OPT_MASK) != JIT_F_OPT_DEFAULT)) {
2526 lj_assertJ(((JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE|JIT_F_OPT_DSE) |
2527 JIT_F_OPT_DEFAULT) == JIT_F_OPT_DEFAULT,
2528 "bad JIT_F_OPT_DEFAULT");
2529 /* Folding disabled? Chain to CSE, but not for loads/stores/allocs. */
2530 if (!(J->flags & JIT_F_OPT_FOLD) && irm_kind(lj_ir_mode[fins->o]) == IRM_N)
2531 return lj_opt_cse(J);
2533 /* No FOLD, forwarding or CSE? Emit raw IR for loads, except for SLOAD. */
2534 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE)) !=
2535 (JIT_F_OPT_FOLD|JIT_F_OPT_FWD|JIT_F_OPT_CSE) &&
2536 irm_kind(lj_ir_mode[fins->o]) == IRM_L && fins->o != IR_SLOAD)
2537 return lj_ir_emit(J);
2539 /* No FOLD or DSE? Emit raw IR for stores. */
2540 if ((J->flags & (JIT_F_OPT_FOLD|JIT_F_OPT_DSE)) !=
2541 (JIT_F_OPT_FOLD|JIT_F_OPT_DSE) &&
2542 irm_kind(lj_ir_mode[fins->o]) == IRM_S)
2543 return lj_ir_emit(J);
2546 /* Fold engine start/retry point. */
2547 retry:
2548 /* Construct key from opcode and operand opcodes (unless literal/none). */
2549 key = ((uint32_t)fins->o << 17);
2550 if (fins->op1 >= J->cur.nk) {
2551 key += (uint32_t)IR(fins->op1)->o << 10;
2552 *fleft = *IR(fins->op1);
2553 if (fins->op1 < REF_TRUE)
2554 fleft[1] = IR(fins->op1)[1];
2556 if (fins->op2 >= J->cur.nk) {
2557 key += (uint32_t)IR(fins->op2)->o;
2558 *fright = *IR(fins->op2);
2559 if (fins->op2 < REF_TRUE)
2560 fright[1] = IR(fins->op2)[1];
2561 } else {
2562 key += (fins->op2 & 0x3ffu); /* Literal mask. Must include IRCONV_*MASK. */
2565 /* Check for a match in order from most specific to least specific. */
2566 any = 0;
2567 for (;;) {
2568 uint32_t k = key | (any & 0x1ffff);
2569 uint32_t h = fold_hashkey(k);
2570 uint32_t fh = fold_hash[h]; /* Lookup key in semi-perfect hash table. */
2571 if ((fh & 0xffffff) == k || (fh = fold_hash[h+1], (fh & 0xffffff) == k)) {
2572 ref = (IRRef)tref_ref(fold_func[fh >> 24](J));
2573 if (ref != NEXTFOLD)
2574 break;
2576 if (any == 0xfffff) /* Exhausted folding. Pass on to CSE. */
2577 return lj_opt_cse(J);
2578 any = (any | (any >> 10)) ^ 0xffc00;
2581 /* Return value processing, ordered by frequency. */
2582 if (LJ_LIKELY(ref >= MAX_FOLD))
2583 return TREF(ref, irt_t(IR(ref)->t));
2584 if (ref == RETRYFOLD)
2585 goto retry;
2586 if (ref == KINTFOLD)
2587 return lj_ir_kint(J, fins->i);
2588 if (ref == FAILFOLD)
2589 lj_trace_err(J, LJ_TRERR_GFAIL);
2590 lj_assertJ(ref == DROPFOLD, "bad fold result");
2591 return REF_DROP;
2594 /* -- Common-Subexpression Elimination ------------------------------------ */
2596 /* CSE an IR instruction. This is very fast due to the skip-list chains. */
2597 TRef LJ_FASTCALL lj_opt_cse(jit_State *J)
2599 /* Avoid narrow to wide store-to-load forwarding stall */
2600 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2601 IROp op = fins->o;
2602 if (LJ_LIKELY(J->flags & JIT_F_OPT_CSE)) {
2603 /* Limited search for same operands in per-opcode chain. */
2604 IRRef ref = J->chain[op];
2605 IRRef lim = fins->op1;
2606 if (fins->op2 > lim) lim = fins->op2; /* Relies on lit < REF_BIAS. */
2607 while (ref > lim) {
2608 if (IR(ref)->op12 == op12)
2609 return TREF(ref, irt_t(IR(ref)->t)); /* Common subexpression found. */
2610 ref = IR(ref)->prev;
2613 /* Otherwise emit IR (inlined for speed). */
2615 IRRef ref = lj_ir_nextins(J);
2616 IRIns *ir = IR(ref);
2617 ir->prev = J->chain[op];
2618 ir->op12 = op12;
2619 J->chain[op] = (IRRef1)ref;
2620 ir->o = fins->o;
2621 J->guardemit.irt |= fins->t.irt;
2622 return TREF(ref, irt_t((ir->t = fins->t)));
2626 /* CSE with explicit search limit. */
2627 TRef LJ_FASTCALL lj_opt_cselim(jit_State *J, IRRef lim)
2629 IRRef ref = J->chain[fins->o];
2630 IRRef2 op12 = (IRRef2)fins->op1 + ((IRRef2)fins->op2 << 16);
2631 while (ref > lim) {
2632 if (IR(ref)->op12 == op12)
2633 return ref;
2634 ref = IR(ref)->prev;
2636 return lj_ir_emit(J);
2639 /* ------------------------------------------------------------------------ */
2641 #undef IR
2642 #undef fins
2643 #undef fleft
2644 #undef fright
2645 #undef knumleft
2646 #undef knumright
2647 #undef emitir
2649 #endif