Fast forward to sync public repo.
Compile math.sinh(), math.cosh(), math.tanh() and math.random(). Compile various io.*() functions. Drive the GC forward on string allocations in the parser. Improve KNUM fuse vs. load heuristics. Add abstract C call handling to IR.
This commit is contained in:
598
src/lj_asm.c
598
src/lj_asm.c
@@ -13,6 +13,7 @@
|
||||
#include "lj_gc.h"
|
||||
#include "lj_str.h"
|
||||
#include "lj_tab.h"
|
||||
#include "lj_frame.h"
|
||||
#include "lj_ir.h"
|
||||
#include "lj_jit.h"
|
||||
#include "lj_iropt.h"
|
||||
@@ -81,6 +82,10 @@ typedef struct ASMState {
|
||||
|
||||
#define IR(ref) (&as->ir[(ref)])
|
||||
|
||||
#define ASMREF_TMP1 REF_TRUE /* Temp. register. */
|
||||
#define ASMREF_TMP2 REF_FALSE /* Temp. register. */
|
||||
#define ASMREF_L REF_NIL /* Stores register for L. */
|
||||
|
||||
/* Check for variant to invariant references. */
|
||||
#define iscrossref(as, ref) ((ref) < as->sectref)
|
||||
|
||||
@@ -115,9 +120,11 @@ static LJ_NORET LJ_NOINLINE void asm_mclimit(ASMState *as)
|
||||
{ MCode rex = 0x40 + (((rr)>>1)&4) + (((rb)>>3)&1); \
|
||||
if (rex != 0x40) *--(p) = rex; }
|
||||
#define FORCE_REX 0x200
|
||||
#define REX_64 (FORCE_REX|0x080000)
|
||||
#else
|
||||
#define REXRB(p, rr, rb) ((void)0)
|
||||
#define FORCE_REX 0
|
||||
#define REX_64 0
|
||||
#endif
|
||||
|
||||
#define emit_i8(as, i) (*--as->mcp = (MCode)(i))
|
||||
@@ -144,6 +151,7 @@ static LJ_AINLINE MCode *emit_op(x86Op xo, Reg rr, Reg rb, Reg rx,
|
||||
{
|
||||
uint32_t rex = 0x40 + ((rr>>1)&(4+(FORCE_REX>>1)))+((rx>>2)&2)+((rb>>3)&1);
|
||||
if (rex != 0x40) {
|
||||
rex |= (rr >> 16);
|
||||
if (n == -4) { *p = (MCode)rex; rex = (MCode)(xo >> 8); }
|
||||
*--p = (MCode)rex;
|
||||
}
|
||||
@@ -451,14 +459,6 @@ static void emit_call_(ASMState *as, MCode *target)
|
||||
|
||||
#define emit_call(as, f) emit_call_(as, (MCode *)(void *)(f))
|
||||
|
||||
/* Argument setup for C calls. Up to 3 args need no stack adjustment. */
|
||||
#define emit_setargr(as, narg, r) \
|
||||
emit_movtomro(as, (r), RID_ESP, ((narg)-1)*4);
|
||||
#define emit_setargi(as, narg, imm) \
|
||||
emit_movmroi(as, RID_ESP, ((narg)-1)*4, (imm))
|
||||
#define emit_setargp(as, narg, ptr) \
|
||||
emit_setargi(as, (narg), ptr2addr((ptr)))
|
||||
|
||||
/* -- Register allocator debugging ---------------------------------------- */
|
||||
|
||||
/* #define LUAJIT_DEBUG_RA */
|
||||
@@ -578,10 +578,6 @@ static void ra_setup(ASMState *as)
|
||||
memset(as->phireg, 0, sizeof(as->phireg));
|
||||
memset(as->cost, 0, sizeof(as->cost));
|
||||
as->cost[RID_ESP] = REGCOST(~0u, 0u);
|
||||
|
||||
/* Start slots for spill slot allocation. */
|
||||
as->evenspill = (SPS_FIRST+1)&~1;
|
||||
as->oddspill = (SPS_FIRST&1) ? SPS_FIRST : 0;
|
||||
}
|
||||
|
||||
/* Rematerialize constants. */
|
||||
@@ -598,6 +594,9 @@ static Reg ra_rematk(ASMState *as, IRIns *ir)
|
||||
} else if (ir->o == IR_BASE) {
|
||||
ra_sethint(ir->r, RID_BASE); /* Restore BASE register hint. */
|
||||
emit_getgl(as, r, jit_base);
|
||||
} else if (ir->o == IR_KPRI) { /* REF_NIL stores ASMREF_L register. */
|
||||
lua_assert(irt_isnil(ir->t));
|
||||
emit_getgl(as, r, jit_L);
|
||||
} else {
|
||||
lua_assert(ir->o == IR_KINT || ir->o == IR_KGC ||
|
||||
ir->o == IR_KPTR || ir->o == IR_KNULL);
|
||||
@@ -629,6 +628,18 @@ static int32_t ra_spill(ASMState *as, IRIns *ir)
|
||||
return sps_scale(slot);
|
||||
}
|
||||
|
||||
/* Release the temporarily allocated register in ASMREF_TMP1/ASMREF_TMP2. */
|
||||
static Reg ra_releasetmp(ASMState *as, IRRef ref)
|
||||
{
|
||||
IRIns *ir = IR(ref);
|
||||
Reg r = ir->r;
|
||||
lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
|
||||
ra_free(as, r);
|
||||
ra_modified(as, r);
|
||||
ir->r = RID_INIT;
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Restore a register (marked as free). Rematerialize or force a spill. */
|
||||
static Reg ra_restore(ASMState *as, IRRef ref)
|
||||
{
|
||||
@@ -1008,7 +1019,7 @@ static void asm_guardcc(ASMState *as, int cc)
|
||||
|
||||
/* Arch-specific field offsets. */
|
||||
static const uint8_t field_ofs[IRFL__MAX+1] = {
|
||||
#define FLOFS(name, type, field) (uint8_t)offsetof(type, field),
|
||||
#define FLOFS(name, ofs) (uint8_t)(ofs),
|
||||
IRFLDEF(FLOFS)
|
||||
#undef FLOFS
|
||||
0
|
||||
@@ -1129,7 +1140,7 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
|
||||
{
|
||||
IRIns *irr;
|
||||
lua_assert(ir->o == IR_STRREF);
|
||||
as->mrm.idx = as->mrm.base = RID_NONE;
|
||||
as->mrm.base = as->mrm.idx = RID_NONE;
|
||||
as->mrm.scale = XM_SCALE1;
|
||||
as->mrm.ofs = sizeof(GCstr);
|
||||
if (irref_isk(ir->op1)) {
|
||||
@@ -1158,6 +1169,17 @@ static void asm_fusestrref(ASMState *as, IRIns *ir, RegSet allow)
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_fusexref(ASMState *as, IRIns *ir, RegSet allow)
|
||||
{
|
||||
if (ir->o == IR_KPTR) {
|
||||
as->mrm.ofs = ir->i;
|
||||
as->mrm.base = as->mrm.idx = RID_NONE;
|
||||
} else {
|
||||
lua_assert(ir->o == IR_STRREF);
|
||||
asm_fusestrref(as, ir, allow);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fuse load into memory operand. */
|
||||
static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
{
|
||||
@@ -1172,8 +1194,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
return RID_MRM;
|
||||
}
|
||||
if (ir->o == IR_KNUM) {
|
||||
RegSet avail = as->freeset & ~as->modset & RSET_FPR;
|
||||
lua_assert(allow != RSET_EMPTY);
|
||||
if (!(as->freeset & ~as->modset & RSET_FPR)) {
|
||||
if (!(avail & (avail-1))) { /* Fuse if less than two regs available. */
|
||||
as->mrm.ofs = ptr2addr(ir_knum(ir));
|
||||
as->mrm.base = as->mrm.idx = RID_NONE;
|
||||
return RID_MRM;
|
||||
@@ -1188,8 +1211,9 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
return RID_MRM;
|
||||
}
|
||||
} else if (ir->o == IR_FLOAD) {
|
||||
/* Generic fusion is only ok for IRT_INT operand (but see asm_comp). */
|
||||
if (irt_isint(ir->t) && noconflict(as, ref, IR_FSTORE)) {
|
||||
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
|
||||
if ((irt_isint(ir->t) || irt_isaddr(ir->t)) &&
|
||||
noconflict(as, ref, IR_FSTORE)) {
|
||||
asm_fusefref(as, ir, xallow);
|
||||
return RID_MRM;
|
||||
}
|
||||
@@ -1199,11 +1223,11 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
return RID_MRM;
|
||||
}
|
||||
} else if (ir->o == IR_XLOAD) {
|
||||
/* Generic fusion is only ok for IRT_INT operand (but see asm_comp).
|
||||
/* Generic fusion is only ok for 32 bit operand (but see asm_comp).
|
||||
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
|
||||
*/
|
||||
if (irt_isint(ir->t)) {
|
||||
asm_fusestrref(as, IR(ir->op1), xallow);
|
||||
if (irt_isint(ir->t) || irt_isaddr(ir->t)) {
|
||||
asm_fusexref(as, IR(ir->op1), xallow);
|
||||
return RID_MRM;
|
||||
}
|
||||
}
|
||||
@@ -1214,6 +1238,137 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
|
||||
return ra_allocref(as, ref, allow);
|
||||
}
|
||||
|
||||
/* -- Calls --------------------------------------------------------------- */
|
||||
|
||||
/* Generate a call to a C function. */
|
||||
static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
|
||||
{
|
||||
RegSet allow = RSET_ALL;
|
||||
uint32_t n, nargs = CCI_NARGS(ci);
|
||||
int32_t ofs = 0;
|
||||
lua_assert(!(nargs > 2 && (ci->flags&CCI_FASTCALL))); /* Avoid stack adj. */
|
||||
emit_call(as, ci->func);
|
||||
for (n = 0; n < nargs; n++) { /* Setup args. */
|
||||
#if LJ_64
|
||||
#error "NYI: 64 bit mode call argument setup"
|
||||
#endif
|
||||
IRIns *ir = IR(args[n]);
|
||||
if (irt_isnum(ir->t)) {
|
||||
if ((ofs & 4) && irref_isk(args[n])) {
|
||||
/* Split stores for unaligned FP consts. */
|
||||
emit_movmroi(as, RID_ESP, ofs, (int32_t)ir_knum(ir)->u32.lo);
|
||||
emit_movmroi(as, RID_ESP, ofs+4, (int32_t)ir_knum(ir)->u32.hi);
|
||||
} else {
|
||||
Reg r;
|
||||
if ((allow & RSET_FPR) == RSET_EMPTY)
|
||||
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
|
||||
r = ra_alloc1(as, args[n], allow & RSET_FPR);
|
||||
allow &= ~RID2RSET(r);
|
||||
emit_rmro(as, XO_MOVSDto, r, RID_ESP, ofs);
|
||||
}
|
||||
ofs += 8;
|
||||
} else {
|
||||
if ((ci->flags & CCI_FASTCALL) && n < 2) {
|
||||
Reg r = n == 0 ? RID_ECX : RID_EDX;
|
||||
if (args[n] < ASMREF_TMP1) {
|
||||
emit_loadi(as, r, ir->i);
|
||||
} else {
|
||||
lua_assert(rset_test(as->freeset, r)); /* Must have been evicted. */
|
||||
allow &= ~RID2RSET(r);
|
||||
if (ra_hasreg(ir->r))
|
||||
emit_movrr(as, r, ir->r);
|
||||
else
|
||||
ra_allocref(as, args[n], RID2RSET(r));
|
||||
}
|
||||
} else {
|
||||
if (args[n] < ASMREF_TMP1) {
|
||||
emit_movmroi(as, RID_ESP, ofs, ir->i);
|
||||
} else {
|
||||
Reg r;
|
||||
if ((allow & RSET_GPR) == RSET_EMPTY)
|
||||
lj_trace_err(as->J, LJ_TRERR_NYICOAL);
|
||||
r = ra_alloc1(as, args[n], allow & RSET_GPR);
|
||||
allow &= ~RID2RSET(r);
|
||||
emit_movtomro(as, r, RID_ESP, ofs);
|
||||
}
|
||||
ofs += 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Setup result reg/sp for call. Evict scratch regs. */
|
||||
static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
|
||||
{
|
||||
RegSet drop = RSET_SCRATCH;
|
||||
if ((ci->flags & CCI_NOFPRCLOBBER))
|
||||
drop &= ~RSET_FPR;
|
||||
if (ra_hasreg(ir->r))
|
||||
rset_clear(drop, ir->r); /* Dest reg handled below. */
|
||||
ra_evictset(as, drop); /* Evictions must be performed first. */
|
||||
if (ra_used(ir)) {
|
||||
if (irt_isnum(ir->t)) {
|
||||
int32_t ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
|
||||
#if LJ_64
|
||||
if ((ci->flags & CCI_CASTU64)) {
|
||||
Reg dest = ir->r;
|
||||
if (ra_hasreg(dest)) {
|
||||
ra_free(as, dest);
|
||||
ra_modified(as, dest);
|
||||
emit_rr(as, XO_MOVD, dest|REX_64, RID_RET); /* Really MOVQ. */
|
||||
} else {
|
||||
emit_movrmro(as, RID_RET, RID_ESP, ofs);
|
||||
}
|
||||
} else {
|
||||
ra_destreg(as, ir, RID_FPRET);
|
||||
}
|
||||
#else
|
||||
/* Number result is in x87 st0 for x86 calling convention. */
|
||||
Reg dest = ir->r;
|
||||
if (ra_hasreg(dest)) {
|
||||
ra_free(as, dest);
|
||||
ra_modified(as, dest);
|
||||
emit_rmro(as, XMM_MOVRM(as), dest, RID_ESP, ofs);
|
||||
}
|
||||
if ((ci->flags & CCI_CASTU64)) {
|
||||
emit_movtomro(as, RID_RET, RID_ESP, ofs);
|
||||
emit_movtomro(as, RID_RETHI, RID_ESP, ofs+4);
|
||||
} else {
|
||||
emit_rmro(as, XO_FSTPq, XOg_FSTPq, RID_ESP, ofs);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
lua_assert(!irt_ispri(ir->t));
|
||||
ra_destreg(as, ir, RID_RET);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Collect arguments from CALL* and ARG instructions. */
|
||||
static void asm_collectargs(ASMState *as, IRIns *ir,
|
||||
const CCallInfo *ci, IRRef *args)
|
||||
{
|
||||
uint32_t n = CCI_NARGS(ci);
|
||||
lua_assert(n <= CCI_NARGS_MAX);
|
||||
if ((ci->flags & CCI_L)) { *args++ = ASMREF_L; n--; }
|
||||
while (n-- > 1) {
|
||||
ir = IR(ir->op1);
|
||||
lua_assert(ir->o == IR_CARG);
|
||||
args[n] = ir->op2;
|
||||
}
|
||||
args[0] = ir->op1;
|
||||
lua_assert(IR(ir->op1)->o != IR_CARG);
|
||||
}
|
||||
|
||||
static void asm_call(ASMState *as, IRIns *ir)
|
||||
{
|
||||
IRRef args[CCI_NARGS_MAX];
|
||||
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
|
||||
asm_collectargs(as, ir, ci, args);
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
}
|
||||
|
||||
/* -- Type conversions ---------------------------------------------------- */
|
||||
|
||||
static void asm_tonum(ASMState *as, IRIns *ir)
|
||||
@@ -1260,48 +1415,41 @@ static void asm_tobit(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_strto(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg str;
|
||||
int32_t ofs;
|
||||
RegSet drop = RSET_SCRATCH;
|
||||
/* Force a spill slot for the destination register (if any). */
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_tonum];
|
||||
IRRef args[2];
|
||||
RegSet drop = RSET_SCRATCH;
|
||||
if ((drop & RSET_FPR) != RSET_FPR && ra_hasreg(ir->r))
|
||||
rset_set(drop, ir->r); /* WIN64 doesn't spill all FPRs. */
|
||||
ra_evictset(as, drop);
|
||||
asm_guardcc(as, CC_E);
|
||||
emit_rr(as, XO_TEST, RID_RET, RID_RET);
|
||||
/* int lj_str_numconv(const char *s, TValue *n) */
|
||||
emit_call(as, lj_str_numconv);
|
||||
ofs = sps_scale(ir->s); /* Use spill slot or slots SPS_TEMP1/2. */
|
||||
if (ofs == 0) {
|
||||
emit_setargr(as, 2, RID_ESP);
|
||||
} else {
|
||||
emit_setargr(as, 2, RID_RET);
|
||||
emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ofs);
|
||||
}
|
||||
emit_setargr(as, 1, RID_RET);
|
||||
str = ra_alloc1(as, ir->op1, RSET_GPR);
|
||||
emit_rmro(as, XO_LEA, RID_RET, str, sizeof(GCstr));
|
||||
args[0] = ir->op1;
|
||||
args[1] = ASMREF_TMP1;
|
||||
asm_gencall(as, ci, args);
|
||||
/* Store the result to the spill slot or slots SPS_TEMP1/2. */
|
||||
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
|
||||
RID_ESP, sps_scale(ir->s));
|
||||
}
|
||||
|
||||
static void asm_tostr(ASMState *as, IRIns *ir)
|
||||
{
|
||||
IRIns *irl = IR(ir->op1);
|
||||
ra_destreg(as, ir, RID_RET);
|
||||
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
|
||||
IRRef args[2];
|
||||
args[0] = ASMREF_L;
|
||||
as->gcsteps++;
|
||||
if (irt_isnum(irl->t)) {
|
||||
/* GCstr *lj_str_fromnum(lua_State *L, const lua_Number *np) */
|
||||
emit_call(as, lj_str_fromnum);
|
||||
emit_setargr(as, 1, RID_RET);
|
||||
emit_getgl(as, RID_RET, jit_L);
|
||||
emit_setargr(as, 2, RID_RET);
|
||||
emit_rmro(as, XO_LEA, RID_RET, RID_ESP, ra_spill(as, irl));
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
|
||||
args[1] = ASMREF_TMP1;
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
emit_rmro(as, XO_LEA, ra_releasetmp(as, ASMREF_TMP1),
|
||||
RID_ESP, ra_spill(as, irl));
|
||||
} else {
|
||||
/* GCstr *lj_str_fromint(lua_State *L, int32_t k) */
|
||||
emit_call(as, lj_str_fromint);
|
||||
emit_setargr(as, 1, RID_RET);
|
||||
emit_getgl(as, RID_RET, jit_L);
|
||||
emit_setargr(as, 2, ra_alloc1(as, ir->op1, RSET_GPR));
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
|
||||
args[1] = ir->op1;
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1330,7 +1478,7 @@ static uint32_t ir_khash(IRIns *ir)
|
||||
lua_assert(!irt_isnil(ir->t));
|
||||
return irt_type(ir->t)-IRT_FALSE;
|
||||
} else {
|
||||
lua_assert(irt_isaddr(ir->t));
|
||||
lua_assert(irt_isgcv(ir->t));
|
||||
lo = u32ptr(ir_kgc(ir));
|
||||
hi = lo - 0x04c11db7;
|
||||
}
|
||||
@@ -1517,33 +1665,27 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_newref(ASMState *as, IRIns *ir)
|
||||
{
|
||||
IRRef keyref = ir->op2;
|
||||
IRIns *irkey = IR(keyref);
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg tab, tmp;
|
||||
ra_destreg(as, ir, RID_RET);
|
||||
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
|
||||
tab = ra_alloc1(as, ir->op1, allow);
|
||||
tmp = ra_scratch(as, rset_clear(allow, tab));
|
||||
/* TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key) */
|
||||
emit_call(as, lj_tab_newkey);
|
||||
emit_setargr(as, 1, tmp);
|
||||
emit_setargr(as, 2, tab);
|
||||
emit_getgl(as, tmp, jit_L);
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
|
||||
IRRef args[3];
|
||||
IRIns *irkey;
|
||||
Reg tmp;
|
||||
args[0] = ASMREF_L;
|
||||
args[1] = ir->op1;
|
||||
args[2] = ASMREF_TMP1;
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
tmp = ra_releasetmp(as, ASMREF_TMP1);
|
||||
irkey = IR(ir->op2);
|
||||
if (irt_isnum(irkey->t)) {
|
||||
/* For numbers use the constant itself or a spill slot as a TValue. */
|
||||
if (irref_isk(keyref)) {
|
||||
emit_setargp(as, 3, ir_knum(irkey));
|
||||
} else {
|
||||
emit_setargr(as, 3, tmp);
|
||||
if (irref_isk(ir->op2))
|
||||
emit_loada(as, tmp, ir_knum(irkey));
|
||||
else
|
||||
emit_rmro(as, XO_LEA, tmp, RID_ESP, ra_spill(as, irkey));
|
||||
}
|
||||
} else {
|
||||
/* Otherwise use g->tmptv to hold the TValue. */
|
||||
lua_assert(irt_ispri(irkey->t) || irt_isaddr(irkey->t));
|
||||
emit_setargr(as, 3, tmp);
|
||||
if (!irref_isk(keyref)) {
|
||||
Reg src = ra_alloc1(as, keyref, rset_exclude(allow, tmp));
|
||||
if (!irref_isk(ir->op2)) {
|
||||
Reg src = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, tmp));
|
||||
emit_movtomro(as, src, tmp, 0);
|
||||
} else if (!irt_ispri(irkey->t)) {
|
||||
emit_movmroi(as, tmp, 0, irkey->i);
|
||||
@@ -1600,11 +1742,15 @@ static void asm_strref(ASMState *as, IRIns *ir)
|
||||
|
||||
/* -- Loads and stores ---------------------------------------------------- */
|
||||
|
||||
static void asm_fload(ASMState *as, IRIns *ir)
|
||||
static void asm_fxload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
x86Op xo;
|
||||
asm_fusefref(as, ir, RSET_GPR);
|
||||
if (ir->o == IR_FLOAD)
|
||||
asm_fusefref(as, ir, RSET_GPR);
|
||||
else
|
||||
asm_fusexref(as, IR(ir->op1), RSET_GPR);
|
||||
/* ir->op2 is ignored -- unaligned loads are ok on x86. */
|
||||
switch (irt_type(ir->t)) {
|
||||
case IRT_I8: xo = XO_MOVSXb; break;
|
||||
case IRT_U8: xo = XO_MOVZXb; break;
|
||||
@@ -1731,96 +1877,44 @@ static void asm_sload(ASMState *as, IRIns *ir)
|
||||
}
|
||||
}
|
||||
|
||||
static void asm_xload(ASMState *as, IRIns *ir)
|
||||
{
|
||||
Reg dest = ra_dest(as, ir, RSET_GPR);
|
||||
x86Op xo;
|
||||
asm_fusestrref(as, IR(ir->op1), RSET_GPR); /* For now only support STRREF. */
|
||||
/* ir->op2 is ignored -- unaligned loads are ok on x86. */
|
||||
switch (irt_type(ir->t)) {
|
||||
case IRT_I8: xo = XO_MOVSXb; break;
|
||||
case IRT_U8: xo = XO_MOVZXb; break;
|
||||
case IRT_I16: xo = XO_MOVSXw; break;
|
||||
case IRT_U16: xo = XO_MOVZXw; break;
|
||||
default: lua_assert(irt_isint(ir->t)); xo = XO_MOV; break;
|
||||
}
|
||||
emit_mrm(as, xo, dest, RID_MRM);
|
||||
}
|
||||
|
||||
/* -- String ops ---------------------------------------------------------- */
|
||||
/* -- Allocations --------------------------------------------------------- */
|
||||
|
||||
static void asm_snew(ASMState *as, IRIns *ir)
|
||||
{
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg left, right;
|
||||
IRIns *irl;
|
||||
ra_destreg(as, ir, RID_RET);
|
||||
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
|
||||
irl = IR(ir->op1);
|
||||
left = irl->r;
|
||||
right = IR(ir->op2)->r;
|
||||
if (ra_noreg(left)) {
|
||||
lua_assert(irl->o == IR_STRREF);
|
||||
/* Get register only for non-const STRREF. */
|
||||
if (!(irref_isk(irl->op1) && irref_isk(irl->op2))) {
|
||||
if (ra_hasreg(right)) rset_clear(allow, right);
|
||||
left = ra_allocref(as, ir->op1, allow);
|
||||
}
|
||||
}
|
||||
if (ra_noreg(right) && !irref_isk(ir->op2)) {
|
||||
if (ra_hasreg(left)) rset_clear(allow, left);
|
||||
right = ra_allocref(as, ir->op2, allow);
|
||||
}
|
||||
/* GCstr *lj_str_new(lua_State *L, const char *str, size_t len) */
|
||||
emit_call(as, lj_str_new);
|
||||
emit_setargr(as, 1, RID_RET);
|
||||
emit_getgl(as, RID_RET, jit_L);
|
||||
if (ra_noreg(left)) /* Use immediate for const STRREF. */
|
||||
emit_setargi(as, 2, IR(irl->op1)->i + IR(irl->op2)->i +
|
||||
(int32_t)sizeof(GCstr));
|
||||
else
|
||||
emit_setargr(as, 2, left);
|
||||
if (ra_noreg(right))
|
||||
emit_setargi(as, 3, IR(ir->op2)->i);
|
||||
else
|
||||
emit_setargr(as, 3, right);
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_new];
|
||||
IRRef args[3];
|
||||
args[0] = ASMREF_L;
|
||||
args[1] = ir->op1;
|
||||
args[2] = ir->op2;
|
||||
as->gcsteps++;
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
}
|
||||
|
||||
/* -- Table ops ----------------------------------------------------------- */
|
||||
|
||||
static void asm_tnew(ASMState *as, IRIns *ir)
|
||||
{
|
||||
ra_destreg(as, ir, RID_RET);
|
||||
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
|
||||
/* GCtab *lj_tab_new(lua_State *L, int32_t asize, uint32_t hbits) */
|
||||
emit_call(as, lj_tab_new);
|
||||
emit_setargr(as, 1, RID_RET);
|
||||
emit_setargi(as, 2, ir->op1);
|
||||
emit_setargi(as, 3, ir->op2);
|
||||
emit_getgl(as, RID_RET, jit_L);
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_new1];
|
||||
IRRef args[2];
|
||||
args[0] = ASMREF_L;
|
||||
args[1] = ASMREF_TMP1;
|
||||
as->gcsteps++;
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1 | (ir->op2 << 24));
|
||||
}
|
||||
|
||||
static void asm_tdup(ASMState *as, IRIns *ir)
|
||||
{
|
||||
ra_destreg(as, ir, RID_RET);
|
||||
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
|
||||
/* GCtab *lj_tab_dup(lua_State *L, const GCtab *kt) */
|
||||
emit_call(as, lj_tab_dup);
|
||||
emit_setargr(as, 1, RID_RET);
|
||||
emit_setargp(as, 2, ir_kgc(IR(ir->op1)));
|
||||
emit_getgl(as, RID_RET, jit_L);
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_dup];
|
||||
IRRef args[2];
|
||||
args[0] = ASMREF_L;
|
||||
args[1] = ir->op1;
|
||||
as->gcsteps++;
|
||||
asm_setupresult(as, ir, ci);
|
||||
asm_gencall(as, ci, args);
|
||||
}
|
||||
|
||||
static void asm_tlen(ASMState *as, IRIns *ir)
|
||||
{
|
||||
ra_destreg(as, ir, RID_RET);
|
||||
ra_evictset(as, rset_exclude(RSET_SCRATCH, RID_RET));
|
||||
emit_call(as, lj_tab_len); /* MSize lj_tab_len(GCtab *t) */
|
||||
emit_setargr(as, 1, ra_alloc1(as, ir->op1, RSET_GPR));
|
||||
}
|
||||
/* -- Write barriers ------------------------------------------------------ */
|
||||
|
||||
static void asm_tbar(ASMState *as, IRIns *ir)
|
||||
{
|
||||
@@ -1839,51 +1933,31 @@ static void asm_tbar(ASMState *as, IRIns *ir)
|
||||
|
||||
static void asm_obar(ASMState *as, IRIns *ir)
|
||||
{
|
||||
RegSet allow = RSET_GPR;
|
||||
Reg obj, val;
|
||||
GCobj *valp;
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
|
||||
IRRef args[2];
|
||||
MCLabel l_end;
|
||||
int32_t ofs;
|
||||
ra_evictset(as, RSET_SCRATCH);
|
||||
if (irref_isk(ir->op2)) {
|
||||
valp = ir_kgc(IR(ir->op2));
|
||||
val = RID_NONE;
|
||||
} else {
|
||||
valp = NULL;
|
||||
val = ra_alloc1(as, ir->op2, allow);
|
||||
rset_clear(allow, val);
|
||||
}
|
||||
obj = ra_alloc1(as, ir->op1, allow);
|
||||
l_end = emit_label(as);
|
||||
Reg obj;
|
||||
/* No need for other object barriers (yet). */
|
||||
lua_assert(IR(ir->op1)->o == IR_UREFC);
|
||||
ofs = -(int32_t)offsetof(GCupval, tv);
|
||||
/* void lj_gc_barrieruv(global_State *g, GCobj *o, GCobj *v) */
|
||||
emit_call(as, lj_gc_barrieruv);
|
||||
if (ofs == 0) {
|
||||
emit_setargr(as, 2, obj);
|
||||
} else if (rset_test(RSET_SCRATCH, obj) && !(as->flags & JIT_F_LEA_AGU)) {
|
||||
emit_setargr(as, 2, obj);
|
||||
emit_gri(as, XG_ARITHi(XOg_ADD), obj, ofs);
|
||||
} else {
|
||||
emit_setargr(as, 2, RID_RET);
|
||||
emit_rmro(as, XO_LEA, RID_RET, obj, ofs);
|
||||
}
|
||||
emit_setargp(as, 1, J2G(as->J));
|
||||
if (valp)
|
||||
emit_setargp(as, 3, valp);
|
||||
else
|
||||
emit_setargr(as, 3, val);
|
||||
l_end = emit_label(as);
|
||||
args[0] = ASMREF_TMP1;
|
||||
args[1] = ir->op1;
|
||||
asm_gencall(as, ci, args);
|
||||
emit_loada(as, ra_releasetmp(as, ASMREF_TMP1), J2G(as->J));
|
||||
obj = IR(ir->op1)->r;
|
||||
emit_sjcc(as, CC_Z, l_end);
|
||||
emit_i8(as, LJ_GC_WHITES);
|
||||
if (valp)
|
||||
emit_rma(as, XO_GROUP3b, XOg_TEST, &valp->gch.marked);
|
||||
else
|
||||
if (irref_isk(ir->op2)) {
|
||||
GCobj *vp = ir_kgc(IR(ir->op2));
|
||||
emit_rma(as, XO_GROUP3b, XOg_TEST, &vp->gch.marked);
|
||||
} else {
|
||||
Reg val = ra_alloc1(as, ir->op2, rset_exclude(RSET_SCRATCH&RSET_GPR, obj));
|
||||
emit_rmro(as, XO_GROUP3b, XOg_TEST, val, (int32_t)offsetof(GChead, marked));
|
||||
}
|
||||
emit_sjcc(as, CC_Z, l_end);
|
||||
emit_i8(as, LJ_GC_BLACK);
|
||||
emit_rmro(as, XO_GROUP3b, XOg_TEST, obj,
|
||||
ofs + (int32_t)offsetof(GChead, marked));
|
||||
(int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
|
||||
}
|
||||
|
||||
/* -- FP/int arithmetic and logic operations ------------------------------ */
|
||||
@@ -2260,10 +2334,10 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
||||
}
|
||||
}
|
||||
emit_mrm(as, XO_UCOMISD, left, right);
|
||||
} else if (!(irt_isstr(ir->t) && (cc & 0xe) != CC_E)) {
|
||||
} else {
|
||||
IRRef lref = ir->op1, rref = ir->op2;
|
||||
IROp leftop = (IROp)(IR(lref)->o);
|
||||
lua_assert(irt_isint(ir->t) || irt_isaddr(ir->t));
|
||||
lua_assert(irt_isint(ir->t) || (irt_isaddr(ir->t) && (cc & 0xe) == CC_E));
|
||||
/* Swap constants (only for ABC) and fusable loads to the right. */
|
||||
if (irref_isk(lref) || (!irref_isk(rref) && opisfusableload(leftop))) {
|
||||
if ((cc & 0xc) == 0xc) cc ^= 3; /* L <-> G, LE <-> GE */
|
||||
@@ -2294,11 +2368,15 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
||||
} else {
|
||||
Reg left;
|
||||
if (opisfusableload((IROp)irl->o) &&
|
||||
((irt_isi8(irl->t) && checki8(imm)) ||
|
||||
(irt_isu8(irl->t) && checku8(imm)))) {
|
||||
/* Only the IRT_INT case is fused by asm_fuseload. The IRT_I8/IRT_U8
|
||||
** loads are handled here. The IRT_I16/IRT_U16 loads should never be
|
||||
** fused, since cmp word [mem], imm16 has a length-changing prefix.
|
||||
((irt_isu8(irl->t) && checku8(imm)) ||
|
||||
((irt_isi8(irl->t) || irt_isi16(irl->t)) && checki8(imm)) ||
|
||||
(irt_isu16(irl->t) && checku16(imm) && checki8((int16_t)imm)))) {
|
||||
/* Only the IRT_INT case is fused by asm_fuseload.
|
||||
** The IRT_I8/IRT_U8 loads and some IRT_I16/IRT_U16 loads
|
||||
** are handled here.
|
||||
** Note that cmp word [mem], imm16 should not be generated,
|
||||
** since it has a length-changing prefix. Compares of a word
|
||||
** against a sign-extended imm8 are ok, however.
|
||||
*/
|
||||
IRType1 origt = irl->t; /* Temporarily flip types. */
|
||||
irl->t.irt = (irl->t.irt & ~IRT_TYPE) | IRT_INT;
|
||||
@@ -2307,7 +2385,8 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
||||
if (left == RID_MRM) { /* Fusion succeeded? */
|
||||
asm_guardcc(as, cc);
|
||||
emit_i8(as, imm);
|
||||
emit_mrm(as, XO_ARITHib, XOg_CMP, RID_MRM);
|
||||
emit_mrm(as, (irt_isi8(origt) || irt_isu8(origt)) ?
|
||||
XO_ARITHib : XO_ARITHiw8, XOg_CMP, RID_MRM);
|
||||
return;
|
||||
} /* Otherwise handle register case as usual. */
|
||||
} else {
|
||||
@@ -2337,26 +2416,6 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
||||
asm_guardcc(as, cc);
|
||||
emit_mrm(as, XO_CMP, left, right);
|
||||
}
|
||||
} else { /* Handle ordered string compares. */
|
||||
RegSet allow = RSET_GPR;
|
||||
/* This assumes lj_str_cmp never uses any SSE registers. */
|
||||
ra_evictset(as, (RSET_SCRATCH & RSET_GPR));
|
||||
asm_guardcc(as, cc);
|
||||
emit_rr(as, XO_TEST, RID_RET, RID_RET);
|
||||
emit_call(as, lj_str_cmp); /* int32_t lj_str_cmp(GCstr *a, GCstr *b) */
|
||||
if (irref_isk(ir->op1)) {
|
||||
emit_setargi(as, 1, IR(ir->op1)->i);
|
||||
} else {
|
||||
Reg left = ra_alloc1(as, ir->op1, allow);
|
||||
rset_clear(allow, left);
|
||||
emit_setargr(as, 1, left);
|
||||
}
|
||||
if (irref_isk(ir->op2)) {
|
||||
emit_setargi(as, 2, IR(ir->op2)->i);
|
||||
} else {
|
||||
Reg right = ra_alloc1(as, ir->op2, allow);
|
||||
emit_setargr(as, 2, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2366,8 +2425,14 @@ static void asm_comp_(ASMState *as, IRIns *ir, int cc)
|
||||
/* -- GC handling --------------------------------------------------------- */
|
||||
|
||||
/* Sync all live GC values to Lua stack slots. */
|
||||
static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
|
||||
static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base)
|
||||
{
|
||||
/* Some care must be taken when allocating registers here, since this is
|
||||
** not part of the fast path. All scratch registers are evicted in the
|
||||
** fast path, so it's easiest to force allocation from scratch registers
|
||||
** only. This avoids register allocation state unification.
|
||||
*/
|
||||
RegSet allow = rset_exclude(RSET_SCRATCH & RSET_GPR, base);
|
||||
IRRef2 *map = &as->T->snapmap[snap->mapofs];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
@@ -2392,27 +2457,36 @@ static void asm_gc_sync(ASMState *as, SnapShot *snap, Reg base, RegSet allow)
|
||||
/* Check GC threshold and do one or more GC steps. */
|
||||
static void asm_gc_check(ASMState *as, SnapShot *snap)
|
||||
{
|
||||
const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
|
||||
IRRef args[2];
|
||||
MCLabel l_end;
|
||||
const BCIns *pc;
|
||||
Reg tmp, base;
|
||||
Reg base, lstate, tmp;
|
||||
RegSet drop = RSET_SCRATCH;
|
||||
/* Must evict BASE because the stack may be reallocated by the GC. */
|
||||
if (ra_hasreg(IR(REF_BASE)->r))
|
||||
drop |= RID2RSET(IR(REF_BASE)->r);
|
||||
if (ra_hasreg(IR(REF_BASE)->r)) /* Stack may be reallocated by the GC. */
|
||||
drop |= RID2RSET(IR(REF_BASE)->r); /* Need to evict BASE, too. */
|
||||
ra_evictset(as, drop);
|
||||
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_GPR, RID_RET));
|
||||
l_end = emit_label(as);
|
||||
/* void lj_gc_step_jit(lua_State *L, const BCIns *pc, MSize steps) */
|
||||
emit_call(as, lj_gc_step_jit);
|
||||
emit_movtomro(as, base, RID_RET, offsetof(lua_State, base));
|
||||
emit_setargr(as, 1, RID_RET);
|
||||
emit_setargi(as, 3, (int32_t)as->gcsteps);
|
||||
emit_getgl(as, RID_RET, jit_L);
|
||||
pc = (const BCIns *)(uintptr_t)as->T->snapmap[snap->mapofs+snap->nslots];
|
||||
emit_setargp(as, 2, pc);
|
||||
asm_gc_sync(as, snap, base, rset_exclude(RSET_SCRATCH & RSET_GPR, base));
|
||||
if (as->curins == as->loopref) /* BASE gets restored by LOOP anyway. */
|
||||
ra_restore(as, REF_BASE); /* Better do it inside the slow path. */
|
||||
args[0] = ASMREF_L;
|
||||
args[1] = ASMREF_TMP1;
|
||||
asm_gencall(as, ci, args);
|
||||
tmp = ra_releasetmp(as, ASMREF_TMP1);
|
||||
emit_loadi(as, tmp, (int32_t)as->gcsteps);
|
||||
/* We don't know spadj yet, so get the C frame from L->cframe. */
|
||||
emit_movmroi(as, tmp, CFRAME_OFS_PC,
|
||||
(int32_t)as->T->snapmap[snap->mapofs+snap->nslots]);
|
||||
emit_gri(as, XG_ARITHi(XOg_AND), tmp, CFRAME_RAWMASK);
|
||||
lstate = IR(ASMREF_L)->r;
|
||||
emit_movrmro(as, tmp, lstate, offsetof(lua_State, cframe));
|
||||
/* It's ok if lstate is already in a non-scratch reg. But all allocations
|
||||
** in the non-fast path must use a scratch reg. See comment above.
|
||||
*/
|
||||
base = ra_alloc1(as, REF_BASE, rset_exclude(RSET_SCRATCH & RSET_GPR, lstate));
|
||||
emit_movtomro(as, base, lstate, offsetof(lua_State, base));
|
||||
asm_gc_sync(as, snap, base);
|
||||
/* BASE/L get restored anyway, better do it inside the slow path. */
|
||||
if (as->parent || as->curins == as->loopref) ra_restore(as, REF_BASE);
|
||||
if (rset_test(RSET_SCRATCH, lstate) && ra_hasreg(IR(ASMREF_L)->r))
|
||||
ra_restore(as, ASMREF_L);
|
||||
/* Jump around GC step if GC total < GC threshold. */
|
||||
tmp = ra_scratch(as, RSET_SCRATCH & RSET_GPR);
|
||||
emit_sjcc(as, CC_B, l_end);
|
||||
@@ -2666,7 +2740,7 @@ static void asm_head_root(ASMState *as)
|
||||
{
|
||||
int32_t spadj;
|
||||
emit_setgli(as, vmstate, (int32_t)as->J->curtrace);
|
||||
spadj = sps_adjust(as);
|
||||
spadj = sps_adjust(as->evenspill);
|
||||
as->T->spadjust = (uint16_t)spadj;
|
||||
emit_addptr(as, RID_ESP, -spadj);
|
||||
}
|
||||
@@ -2676,11 +2750,13 @@ static void asm_head_base(ASMState *as)
|
||||
{
|
||||
IRIns *ir = IR(REF_BASE);
|
||||
Reg r = ir->r;
|
||||
lua_assert(ra_hasreg(r) && !ra_hasspill(ir->s));
|
||||
ra_free(as, r);
|
||||
if (r != RID_BASE) {
|
||||
ra_scratch(as, RID2RSET(RID_BASE));
|
||||
emit_rr(as, XO_MOV, r, RID_BASE);
|
||||
lua_assert(!ra_hasspill(ir->s));
|
||||
if (ra_hasreg(r)) {
|
||||
ra_free(as, r);
|
||||
if (r != RID_BASE) {
|
||||
ra_scratch(as, RID2RSET(RID_BASE));
|
||||
emit_rr(as, XO_MOV, r, RID_BASE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2749,7 +2825,7 @@ static void asm_head_side(ASMState *as)
|
||||
}
|
||||
|
||||
/* Calculate stack frame adjustment. */
|
||||
spadj = sps_adjust(as);
|
||||
spadj = sps_adjust(as->evenspill);
|
||||
spdelta = spadj - (int32_t)as->parent->spadjust;
|
||||
if (spdelta < 0) { /* Don't shrink the stack frame. */
|
||||
spadj = (int32_t)as->parent->spadjust;
|
||||
@@ -2877,9 +2953,11 @@ static void asm_tail_sync(ASMState *as)
|
||||
GCfunc *fn = ir_kfunc(IR(ir->op2));
|
||||
if (isluafunc(fn)) {
|
||||
BCReg fs = s + funcproto(fn)->framesize;
|
||||
newbase = s;
|
||||
if (secondbase == ~(BCReg)0) secondbase = s;
|
||||
if (fs > topslot) topslot = fs;
|
||||
if (s != 0) {
|
||||
newbase = s;
|
||||
if (secondbase == ~(BCReg)0) secondbase = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3063,20 +3141,18 @@ static void asm_ir(ASMState *as, IRIns *ir)
|
||||
|
||||
/* Loads and stores. */
|
||||
case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: asm_ahuload(as, ir); break;
|
||||
case IR_FLOAD: asm_fload(as, ir); break;
|
||||
case IR_FLOAD: case IR_XLOAD: asm_fxload(as, ir); break;
|
||||
case IR_SLOAD: asm_sload(as, ir); break;
|
||||
case IR_XLOAD: asm_xload(as, ir); break;
|
||||
|
||||
case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
|
||||
case IR_FSTORE: asm_fstore(as, ir); break;
|
||||
|
||||
/* String ops. */
|
||||
/* Allocations. */
|
||||
case IR_SNEW: asm_snew(as, ir); break;
|
||||
|
||||
/* Table ops. */
|
||||
case IR_TNEW: asm_tnew(as, ir); break;
|
||||
case IR_TDUP: asm_tdup(as, ir); break;
|
||||
case IR_TLEN: asm_tlen(as, ir); break;
|
||||
|
||||
/* Write barriers. */
|
||||
case IR_TBAR: asm_tbar(as, ir); break;
|
||||
case IR_OBAR: asm_obar(as, ir); break;
|
||||
|
||||
@@ -3092,6 +3168,10 @@ static void asm_ir(ASMState *as, IRIns *ir)
|
||||
case IR_TOSTR: asm_tostr(as, ir); break;
|
||||
case IR_STRTO: asm_strto(as, ir); break;
|
||||
|
||||
/* Calls. */
|
||||
case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
|
||||
case IR_CARG: break;
|
||||
|
||||
default:
|
||||
setintV(&as->J->errinfo, ir->o);
|
||||
lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
|
||||
@@ -3123,6 +3203,8 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
||||
IRRef i, nins;
|
||||
int inloop;
|
||||
|
||||
ra_setup(as);
|
||||
|
||||
/* Clear reg/sp for constants. */
|
||||
for (i = T->nk; i < REF_BIAS; i++)
|
||||
IR(i)->prev = REGSP_INIT;
|
||||
@@ -3144,6 +3226,7 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
||||
as->curins = nins;
|
||||
|
||||
inloop = 0;
|
||||
as->evenspill = SPS_FIRST;
|
||||
for (i = REF_FIRST; i < nins; i++) {
|
||||
IRIns *ir = IR(i);
|
||||
switch (ir->o) {
|
||||
@@ -3166,8 +3249,23 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
||||
if (i == as->stopins+1 && ir->op1 == ir->op2)
|
||||
as->stopins++;
|
||||
break;
|
||||
case IR_CALLN: case IR_CALLL: case IR_CALLS: {
|
||||
const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
|
||||
/* NYI: not fastcall-aware, but doesn't matter (yet). */
|
||||
if (CCI_NARGS(ci) > (uint32_t)as->evenspill) /* Leave room for args. */
|
||||
as->evenspill = (int32_t)CCI_NARGS(ci);
|
||||
#if LJ_64
|
||||
ir->prev = REGSP_HINT(irt_isnum(ir->t) ? RID_FPRET : RID_RET);
|
||||
#else
|
||||
ir->prev = REGSP_HINT(RID_RET);
|
||||
#endif
|
||||
if (inloop)
|
||||
as->modset |= (ci->flags & CCI_NOFPRCLOBBER) ?
|
||||
(RSET_SCRATCH & ~RSET_FPR) : RSET_SCRATCH;
|
||||
continue;
|
||||
}
|
||||
/* C calls evict all scratch regs and return results in RID_RET. */
|
||||
case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TLEN: case IR_TOSTR:
|
||||
case IR_SNEW: case IR_TNEW: case IR_TDUP: case IR_TOSTR:
|
||||
case IR_NEWREF:
|
||||
ir->prev = REGSP_HINT(RID_RET);
|
||||
if (inloop)
|
||||
@@ -3177,11 +3275,6 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
||||
if (inloop)
|
||||
as->modset = RSET_SCRATCH;
|
||||
break;
|
||||
/* Ordered string compares evict all integer scratch registers. */
|
||||
case IR_LT: case IR_GE: case IR_LE: case IR_GT:
|
||||
if (irt_isstr(ir->t) && inloop)
|
||||
as->modset |= (RSET_SCRATCH & RSET_GPR);
|
||||
break;
|
||||
/* Non-constant shift counts need to be in RID_ECX. */
|
||||
case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
|
||||
if (!irref_isk(ir->op2) && !ra_hashint(IR(ir->op2)->r))
|
||||
@@ -3200,6 +3293,10 @@ static void asm_setup_regsp(ASMState *as, Trace *T)
|
||||
}
|
||||
ir->prev = REGSP_INIT;
|
||||
}
|
||||
if ((as->evenspill & 1))
|
||||
as->oddspill = as->evenspill++;
|
||||
else
|
||||
as->oddspill = 0;
|
||||
}
|
||||
|
||||
/* -- Assembler core ------------------------------------------------------ */
|
||||
@@ -3263,7 +3360,6 @@ void lj_asm_trace(jit_State *J, Trace *T)
|
||||
as->fuseref = (as->flags & JIT_F_OPT_FUSE) ? as->loopref : FUSE_DISABLED;
|
||||
|
||||
/* Setup register allocation. */
|
||||
ra_setup(as);
|
||||
asm_setup_regsp(as, T);
|
||||
|
||||
if (!as->loopref) {
|
||||
|
||||
Reference in New Issue
Block a user