Split up FP IR instructions with SPLIT pass for soft-float targets.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
|
||||
#include "lj_obj.h"
|
||||
|
||||
#if LJ_HASJIT && LJ_HASFFI && LJ_32
|
||||
#if LJ_HASJIT && (LJ_SOFTFP || (LJ_32 && LJ_HASFFI))
|
||||
|
||||
#include "lj_err.h"
|
||||
#include "lj_str.h"
|
||||
@@ -21,9 +21,9 @@
|
||||
/* SPLIT pass:
|
||||
**
|
||||
** This pass splits up 64 bit IR instructions into multiple 32 bit IR
|
||||
** instructions. It's only active for 32 bit CPUs which lack native 64 bit
|
||||
** operations. The FFI is currently the only emitter for 64 bit
|
||||
** instructions, so this pass is disabled if the FFI is disabled.
|
||||
** instructions. It's only active for soft-float targets or for 32 bit CPUs
|
||||
** which lack native 64 bit integer operations (the FFI is currently the
|
||||
** only emitter for 64 bit integer instructions).
|
||||
**
|
||||
** Splitting the IR in a separate pass keeps each 32 bit IR assembler
|
||||
** backend simple. Only a small amount of extra functionality needs to be
|
||||
@@ -41,14 +41,19 @@
|
||||
** The operands of HIOP hold the hiword input references. The output of HIOP
|
||||
** is the hiword output reference, which is also used to hold the hiword
|
||||
** register or spill slot information. The register allocator treats this
|
||||
** instruction independent of any other instruction, which improves code
|
||||
** instruction independently of any other instruction, which improves code
|
||||
** quality compared to using fixed register pairs.
|
||||
**
|
||||
** It's easier to split up some instructions into two regular 32 bit
|
||||
** instructions. E.g. XLOAD is split up into two XLOADs with two different
|
||||
** addresses. Obviously 64 bit constants need to be split up into two 32 bit
|
||||
** constants, too. Some hiword instructions can be entirely omitted, e.g.
|
||||
** when zero-extending a 32 bit value to 64 bits.
|
||||
** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
|
||||
** are split up into two 32 bit arguments each.
|
||||
**
|
||||
** On soft-float targets, floating-point instructions are directly converted
|
||||
** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
|
||||
** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
|
||||
**
|
||||
** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
|
||||
** two int64_t fields:
|
||||
@@ -101,9 +106,42 @@ static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
|
||||
return nref;
|
||||
}
|
||||
|
||||
/* Emit a CALLN with two split 64 bit arguments. */
|
||||
static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
|
||||
#if LJ_SOFTFP
|
||||
/* Emit a CALLN with one split 64 bit argument. */
|
||||
static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
|
||||
IRIns *ir, IRCallID id)
|
||||
{
|
||||
IRRef tmp, op1 = ir->op1;
|
||||
J->cur.nins--;
|
||||
#if LJ_LE
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
|
||||
#else
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
|
||||
#endif
|
||||
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
|
||||
return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
|
||||
}
|
||||
|
||||
/* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
|
||||
static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
|
||||
IRIns *ir, IRCallID id)
|
||||
{
|
||||
IRRef tmp, op1 = ir->op1, op2 = ir->op2;
|
||||
J->cur.nins--;
|
||||
#if LJ_LE
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
|
||||
#else
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
|
||||
#endif
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
|
||||
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
|
||||
return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Emit a CALLN with two split 64 bit arguments. */
|
||||
static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
|
||||
IRIns *ir, IRCallID id)
|
||||
{
|
||||
IRRef tmp, op1 = ir->op1, op2 = ir->op2;
|
||||
J->cur.nins--;
|
||||
@@ -117,7 +155,9 @@ static IRRef split_call64(jit_State *J, IRRef1 *hisubst, IRIns *oir,
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
|
||||
#endif
|
||||
ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
|
||||
return split_emit(J, IRTI(IR_HIOP), tmp, tmp);
|
||||
return split_emit(J,
|
||||
IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
|
||||
tmp, tmp);
|
||||
}
|
||||
|
||||
/* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
|
||||
@@ -155,7 +195,8 @@ static void split_ir(jit_State *J)
|
||||
/* Process constants and fixed references. */
|
||||
for (ref = nk; ref <= REF_BASE; ref++) {
|
||||
IRIns *ir = &oir[ref];
|
||||
if (ir->o == IR_KINT64) { /* Split up 64 bit constant. */
|
||||
if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
|
||||
/* Split up 64 bit constant. */
|
||||
TValue tv = *ir_k64(ir);
|
||||
ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
|
||||
hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
|
||||
@@ -181,6 +222,106 @@ static void split_ir(jit_State *J)
|
||||
hisubst[ref] = 0;
|
||||
|
||||
/* Split 64 bit instructions. */
|
||||
#if LJ_SOFTFP
|
||||
if (irt_isnum(ir->t)) {
|
||||
nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
|
||||
/* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
|
||||
switch (ir->o) {
|
||||
case IR_ADD:
|
||||
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
|
||||
break;
|
||||
case IR_SUB:
|
||||
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
|
||||
break;
|
||||
case IR_MUL:
|
||||
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
|
||||
break;
|
||||
case IR_DIV:
|
||||
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
|
||||
break;
|
||||
case IR_POW:
|
||||
hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
|
||||
break;
|
||||
case IR_FPMATH:
|
||||
hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
|
||||
break;
|
||||
case IR_ATAN2:
|
||||
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
|
||||
break;
|
||||
case IR_LDEXP:
|
||||
hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
|
||||
break;
|
||||
case IR_NEG: case IR_ABS:
|
||||
nir->o = IR_CONV; /* Pass through loword. */
|
||||
nir->op2 = (IRT_INT << 5) | IRT_INT;
|
||||
hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
|
||||
hisubst[ir->op1], hisubst[ir->op2]);
|
||||
break;
|
||||
case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
|
||||
case IR_MIN: case IR_MAX:
|
||||
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
|
||||
break;
|
||||
case IR_XLOAD:
|
||||
hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP),
|
||||
split_ptr(J, nir->op1), ir->op2);
|
||||
#if LJ_BE
|
||||
ir->prev = hi; hi = nref;
|
||||
#endif
|
||||
break;
|
||||
case IR_ASTORE: case IR_HSTORE: case IR_USTORE:
|
||||
split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
|
||||
break;
|
||||
case IR_XSTORE: {
|
||||
#if LJ_LE
|
||||
IRRef hiref = hisubst[ir->op2];
|
||||
#else
|
||||
IRRef hiref = nir->op2; nir->op2 = hisubst[ir->op2];
|
||||
#endif
|
||||
split_emit(J, IRT(IR_XSTORE, IRT_SOFTFP),
|
||||
split_ptr(J, nir->op1), hiref);
|
||||
break;
|
||||
}
|
||||
case IR_CONV: { /* Conversion to number. Others handled below. */
|
||||
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
|
||||
#if LJ_32 && LJ_HASFFI
|
||||
if (st == IRT_I64 || st == IRT_U64) {
|
||||
hi = split_call_l(J, hisubst, oir, ir,
|
||||
st == IRT_I64 ? IRCALL_softfp_l2d : IRCALL_softfp_ul2d);
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
lua_assert(st == IRT_INT ||
|
||||
(LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
|
||||
nir->o = IR_CALLN;
|
||||
#if LJ_32 && LJ_HASFFI
|
||||
nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
|
||||
st == IRT_FLOAT ? IRCALL_softfp_f2d :
|
||||
IRCALL_softfp_ui2d;
|
||||
#else
|
||||
nir->op2 = IRCALL_softfp_i2d;
|
||||
#endif
|
||||
hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
|
||||
break;
|
||||
}
|
||||
case IR_CALLS:
|
||||
case IR_CALLXS:
|
||||
goto split_call;
|
||||
case IR_PHI:
|
||||
if (nir->op1 == nir->op2)
|
||||
J->cur.nins--; /* Drop useless PHIs. */
|
||||
if (hisubst[ir->op1] != hisubst[ir->op2])
|
||||
split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
|
||||
hisubst[ir->op1], hisubst[ir->op2]);
|
||||
break;
|
||||
default:
|
||||
lua_assert(ir->o <= IR_NE);
|
||||
split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
|
||||
hisubst[ir->op1], hisubst[ir->op2]);
|
||||
break;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
#if LJ_32 && LJ_HASFFI
|
||||
if (irt_isint64(ir->t)) {
|
||||
IRRef hiref = hisubst[ir->op1];
|
||||
nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */
|
||||
@@ -199,22 +340,22 @@ static void split_ir(jit_State *J)
|
||||
hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
|
||||
break;
|
||||
case IR_MUL:
|
||||
hi = split_call64(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
|
||||
hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
|
||||
break;
|
||||
case IR_DIV:
|
||||
hi = split_call64(J, hisubst, oir, ir,
|
||||
irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
|
||||
IRCALL_lj_carith_divu64);
|
||||
hi = split_call_ll(J, hisubst, oir, ir,
|
||||
irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
|
||||
IRCALL_lj_carith_divu64);
|
||||
break;
|
||||
case IR_MOD:
|
||||
hi = split_call64(J, hisubst, oir, ir,
|
||||
irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
|
||||
IRCALL_lj_carith_modu64);
|
||||
hi = split_call_ll(J, hisubst, oir, ir,
|
||||
irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
|
||||
IRCALL_lj_carith_modu64);
|
||||
break;
|
||||
case IR_POW:
|
||||
hi = split_call64(J, hisubst, oir, ir,
|
||||
irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
|
||||
IRCALL_lj_carith_powu64);
|
||||
hi = split_call_ll(J, hisubst, oir, ir,
|
||||
irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
|
||||
IRCALL_lj_carith_powu64);
|
||||
break;
|
||||
case IR_FLOAD:
|
||||
lua_assert(ir->op2 == IRFL_CDATA_INT64);
|
||||
@@ -239,9 +380,21 @@ static void split_ir(jit_State *J)
|
||||
break;
|
||||
case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */
|
||||
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
|
||||
#if LJ_SOFTFP
|
||||
if (st == IRT_NUM) { /* NUM to 64 bit int conv. */
|
||||
split_call_l(J, hisubst, oir, ir,
|
||||
irt_isi64(ir->t) ? IRCALL_softfp_d2l : IRCALL_softfp_d2ul);
|
||||
} else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */
|
||||
nir->o = IR_CALLN;
|
||||
nir->op2 = irt_isi64(ir->t) ? IRCALL_softfp_f2l : IRCALL_softfp_f2ul;
|
||||
hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
|
||||
}
|
||||
#else
|
||||
if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */
|
||||
hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
|
||||
} else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
|
||||
}
|
||||
#endif
|
||||
else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */
|
||||
/* Drop cast, since assembler doesn't care. */
|
||||
goto fwdlo;
|
||||
} else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */
|
||||
@@ -274,13 +427,37 @@ static void split_ir(jit_State *J)
|
||||
split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
|
||||
break;
|
||||
}
|
||||
} else if (ir->o == IR_CONV) { /* See above, too. */
|
||||
} else
|
||||
#endif
|
||||
#if LJ_SOFTFP
|
||||
if (ir->o == IR_TOBIT) {
|
||||
IRRef tmp, op1 = ir->op1;
|
||||
J->cur.nins--;
|
||||
#if LJ_LE
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
|
||||
#else
|
||||
tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
|
||||
#endif
|
||||
ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
|
||||
} else
|
||||
#endif
|
||||
if (ir->o == IR_CONV) { /* See above, too. */
|
||||
IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
|
||||
#if LJ_32 && LJ_HASFFI
|
||||
if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */
|
||||
#if LJ_SOFTFP
|
||||
if (irt_isfloat(ir->t)) {
|
||||
split_call_l(J, hisubst, oir, ir,
|
||||
st == IRT_I64 ? IRCALL_softfp_l2f : IRCALL_softfp_ul2f);
|
||||
J->cur.nins--; /* Drop unused HIOP. */
|
||||
}
|
||||
#else
|
||||
if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */
|
||||
ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
|
||||
hisubst[ir->op1], nref);
|
||||
} else { /* Truncate to lower 32 bits. */
|
||||
}
|
||||
#endif
|
||||
else { /* Truncate to lower 32 bits. */
|
||||
fwdlo:
|
||||
ir->prev = nir->op1; /* Forward loword. */
|
||||
/* Replace with NOP to avoid messing up the snapshot logic. */
|
||||
@@ -288,6 +465,36 @@ static void split_ir(jit_State *J)
|
||||
nir->op1 = nir->op2 = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if LJ_SOFTFP && LJ_32 && LJ_HASFFI
|
||||
else if (irt_isfloat(ir->t)) {
|
||||
if (st == IRT_NUM) {
|
||||
split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
|
||||
J->cur.nins--; /* Drop unused HIOP. */
|
||||
} else {
|
||||
nir->o = IR_CALLN;
|
||||
nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
|
||||
}
|
||||
} else if (st == IRT_FLOAT) {
|
||||
nir->o = IR_CALLN;
|
||||
nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
|
||||
} else
|
||||
#endif
|
||||
#if LJ_SOFTFP
|
||||
if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
|
||||
if (irt_isguard(ir->t)) {
|
||||
lua_assert(0); /* NYI: missing check. */
|
||||
}
|
||||
split_call_l(J, hisubst, oir, ir,
|
||||
#if LJ_32 && LJ_HASFFI
|
||||
st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
|
||||
#else
|
||||
IRCALL_softfp_d2i
|
||||
#endif
|
||||
);
|
||||
J->cur.nins--; /* Drop unused HIOP. */
|
||||
}
|
||||
#endif
|
||||
} else if (ir->o == IR_CALLXS) {
|
||||
IRRef hiref;
|
||||
split_call:
|
||||
@@ -303,8 +510,10 @@ static void split_ir(jit_State *J)
|
||||
#endif
|
||||
ir->prev = nref = split_emit(J, ot, nref, op2);
|
||||
}
|
||||
if (irt_isint64(ir->t))
|
||||
hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
|
||||
if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
|
||||
hi = split_emit(J,
|
||||
IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
|
||||
nref, nref);
|
||||
} else if (ir->o == IR_CARG) {
|
||||
IRRef hiref = hisubst[ir->op1];
|
||||
if (hiref) {
|
||||
@@ -367,17 +576,18 @@ static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef LUA_USE_ASSERT
|
||||
#if defined(LUA_USE_ASSERT) || LJ_SOFTFP
|
||||
/* Slow, but sure way to check whether a SPLIT pass is needed. */
|
||||
static int split_needsplit(jit_State *J)
|
||||
{
|
||||
IRIns *ir, *irend;
|
||||
IRRef ref;
|
||||
for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
|
||||
if (irt_isint64(ir->t))
|
||||
if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
|
||||
return 1;
|
||||
for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev)
|
||||
if ((IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
|
||||
if ((LJ_SOFTFP && (IR(ref)->op2 & IRCONV_SRCMASK) == IRT_NUM) ||
|
||||
(IR(ref)->op2 & IRCONV_SRCMASK) == IRT_I64 ||
|
||||
(IR(ref)->op2 & IRCONV_SRCMASK) == IRT_U64)
|
||||
return 1;
|
||||
return 0; /* Nope. */
|
||||
@@ -387,7 +597,12 @@ static int split_needsplit(jit_State *J)
|
||||
/* SPLIT pass. */
|
||||
void lj_opt_split(jit_State *J)
|
||||
{
|
||||
#if LJ_SOFTFP
|
||||
if (!J->needsplit)
|
||||
J->needsplit = split_needsplit(J);
|
||||
#else
|
||||
lua_assert(J->needsplit >= split_needsplit(J)); /* Verify flag. */
|
||||
#endif
|
||||
if (J->needsplit) {
|
||||
int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
|
||||
if (errcode) {
|
||||
|
||||
Reference in New Issue
Block a user