Unify Lua number to FFI integer conversions.

Phew. #1411
2025-11-27 17:45:17 +01:00
parent 3215838aa7
commit f80b349d54
41 changed files with 1070 additions and 434 deletions
--- a/doc/ext_ffi_semantics.html
+++ b/doc/ext_ffi_semantics.html
@@ -338,42 +338,44 @@ pointer or type compatibility:
 <tr class="odd">
 <td class="convin">Integer</td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>double</tt>, <tt>float</tt></td></tr>
 <tr class="even">
-<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup> <tt>int32_t</tt> &rarr;<sup>narrow</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt></td></tr>
+<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup> <tt>int64_t</tt> &rarr;<sup>narrow</sup> <sup>*</sup></td><td class="convout"><tt>(u)int8_t</tt>, <tt>(u)int16_t</tt>, <tt>(u)int32_t</tt></td></tr>
 <tr class="odd">
-<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup></td><td class="convout"><tt>(u)int32_t</tt>, <tt>(u)int64_t</tt></td></tr>
+<td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup></td><td class="convout"><tt>int64_t</tt></td></tr>
 <tr class="even">
 <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>trunc</sup> uint64_t &cup; int64_t &rarr;<sup>reinterpret</sup> <sup>*</sup></td><td class="convout"><tt>uint64_t</tt></td></tr>
 <tr class="odd">
 <td class="convin"><tt>double</tt>, <tt>float</tt></td><td class="convop">&rarr;<sup>round</sup></td><td class="convout"><tt>float</tt>, <tt>double</tt></td></tr>
-<tr class="odd separate">
+<tr class="even separate">
 <td class="convin">Number</td><td class="convop">n == 0 &rarr; 0, otherwise 1</td><td class="convout"><tt>bool</tt></td></tr>
-<tr class="even">
+<tr class="odd">
 <td class="convin"><tt>bool</tt></td><td class="convop"><tt>false</tt> &rarr; 0, <tt>true</tt> &rarr; 1</td><td class="convout">Number</td></tr>
-<tr class="odd separate">
+<tr class="even separate">
 <td class="convin">Complex number</td><td class="convop">convert real part</td><td class="convout">Number</td></tr>
-<tr class="even">
+<tr class="odd">
 <td class="convin">Number</td><td class="convop">convert real part, imag = 0</td><td class="convout">Complex number</td></tr>
-<tr class="odd">
+<tr class="even">
 <td class="convin">Complex number</td><td class="convop">convert real and imag part</td><td class="convout">Complex number</td></tr>
-<tr class="even separate">
+<tr class="odd separate">
 <td class="convin">Number</td><td class="convop">convert scalar and replicate</td><td class="convout">Vector</td></tr>
-<tr class="odd">
+<tr class="even">
 <td class="convin">Vector</td><td class="convop">copy (same size)</td><td class="convout">Vector</td></tr>
-<tr class="even separate">
+<tr class="odd separate">
 <td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr>
-<tr class="odd">
+<tr class="even">
 <td class="convin">Array</td><td class="convop">take base address (compat)</td><td class="convout">Pointer</td></tr>
 <tr class="even">
 <td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr>
 <tr class="odd separate">
 <td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr>
 <tr class="even">
 <td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr>
 <tr class="odd">
 <td class="convin">Function</td><td class="convop">take function address</td><td class="convout">Function pointer</td></tr>
 <tr class="even separate">
 <td class="convin">Number</td><td class="convop">convert via <tt>uintptr_t</tt> (cast)</td><td class="convout">Pointer</td></tr>
 <tr class="odd">
 <td class="convin">Pointer</td><td class="convop">convert address (compat/cast)</td><td class="convout">Pointer</td></tr>
 <tr class="even">
 <td class="convin">Pointer</td><td class="convop">convert address (cast)</td><td class="convout">Integer</td></tr>
-<tr class="even">
+<tr class="odd">
 <td class="convin">Array</td><td class="convop">convert base address (cast)</td><td class="convout">Integer</td></tr>
-<tr class="odd separate">
+<tr class="even separate">
 <td class="convin">Array</td><td class="convop">copy (compat)</td><td class="convout">Array</td></tr>
-<tr class="even">
+<tr class="odd">
 <td class="convin"><tt>struct</tt>/<tt>union</tt></td><td class="convop">copy (identical type)</td><td class="convout"><tt>struct</tt>/<tt>union</tt></td></tr>
 </table>
 <p>
@@ -384,6 +386,24 @@ type.
 Conversions not listed above will raise an error. E.g. it's not
 possible to convert a pointer to a complex number or vice versa.
 </p>
 <p>
 * Some conversions from <tt>double</tt> have a larger defined range to
 allow for mixed-signedness conversions, which are common in C code.
 E.g. initializing an <tt>int32_t</tt> field with <tt>0xffffffff</tt>
 or initializing an <tt>uint32_t</tt> or <tt>uint64_t</tt> field with
 <tt>-1</tt>. Under strict conversion rules, these assignments would
 give undefined results, since Lua numbers are doubles. The extended
 ranges make these conversions defined. Lua numbers that are even
 outside that range give an architecture-specific result.
 </p>
 <p>
 Please note that doubles do not have the precision to represent the
 whole signed or unsigned 64 bit integer range. Beware of large hex
 constants in particular: e.g. <tt>0xffffffffffffffff</tt> is a double
 rounded up to <tt>0x1p64</tt> during parsing. This will <em>not</em>
 convert to a defined 64 bit integer value. Use the 64 bit literal
 syntax instead, i.e. <tt>0xffffffffffffffffULL</tt>.
 </p>
 <h3 id="convert_vararg">Conversions for vararg C&nbsp;function arguments</h3>
 <p>
--- a/src/lib_io.c
+++ b/src/lib_io.c
@@ -127,8 +127,9 @@ static int io_file_readnum(lua_State *L, FILE *fp)
  lua_Number d;
  if (fscanf(fp, LUA_NUMBER_SCAN, &d) == 1) {
    if (LJ_DUALNUM) {
-      int32_t i = lj_num2int(d);
+      int64_t i64;
-      if (d == (lua_Number)i && !tvismzero((cTValue *)&d)) {
+      int32_t i;
      if (lj_num2int_check(d, i64, i) && !tvismzero((cTValue *)&d)) {
 	setintV(L->top++, i);
 	return 1;
      }
@@ -335,7 +336,7 @@ LJLIB_CF(io_method_seek)
    if (tvisint(o))
      ofs = (int64_t)intV(o);
    else if (tvisnum(o))
-      ofs = (int64_t)numV(o);
+      ofs = lj_num2i64(numV(o));
    else if (!tvisnil(o))
      lj_err_argt(L, 3, LUA_TNUMBER);
  }
--- a/src/lib_os.c
+++ b/src/lib_os.c
@@ -171,7 +171,8 @@ static int getfield(lua_State *L, const char *key, int d)
 LJLIB_CF(os_date)
 {
  const char *s = luaL_optstring(L, 1, "%c");
-  time_t t = luaL_opt(L, (time_t)luaL_checknumber, 2, time(NULL));
+  time_t t = lua_isnoneornil(L, 2) ? time(NULL) :
 	     lj_num2int_type(luaL_checknumber(L, 2), time_t);
  struct tm *stm;
 #if LJ_TARGET_POSIX
  struct tm rtm;
@@ -253,8 +254,9 @@ LJLIB_CF(os_time)
 LJLIB_CF(os_difftime)
 {
-  lua_pushnumber(L, difftime((time_t)(luaL_checknumber(L, 1)),
+  lua_pushnumber(L,
-			     (time_t)(luaL_optnumber(L, 2, (lua_Number)0))));
+    difftime(lj_num2int_type(luaL_checknumber(L, 1), time_t),
 	     lj_num2int_type(luaL_optnumber(L, 2, (lua_Number)0), time_t)));
  return 1;
 }
--- a/src/lj_api.c
+++ b/src/lj_api.c
@@ -416,11 +416,7 @@ LUA_API lua_Integer lua_tointeger(lua_State *L, int idx)
      return intV(&tmp);
    n = numV(&tmp);
  }
-#if LJ_64
+  return lj_num2int_type(n, lua_Integer);
  return (lua_Integer)n;
 #else
  return lj_num2int(n);
 #endif
 }
 LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
@@ -445,11 +441,7 @@ LUA_API lua_Integer lua_tointegerx(lua_State *L, int idx, int *ok)
    n = numV(&tmp);
  }
  if (ok) *ok = 1;
-#if LJ_64
+  return lj_num2int_type(n, lua_Integer);
  return (lua_Integer)n;
 #else
  return lj_num2int(n);
 #endif
 }
 LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
@@ -468,11 +460,7 @@ LUALIB_API lua_Integer luaL_checkinteger(lua_State *L, int idx)
      return (lua_Integer)intV(&tmp);
    n = numV(&tmp);
  }
-#if LJ_64
+  return lj_num2int_type(n, lua_Integer);
  return (lua_Integer)n;
 #else
  return lj_num2int(n);
 #endif
 }
 LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
@@ -493,11 +481,7 @@ LUALIB_API lua_Integer luaL_optinteger(lua_State *L, int idx, lua_Integer def)
      return (lua_Integer)intV(&tmp);
    n = numV(&tmp);
  }
-#if LJ_64
+  return lj_num2int_type(n, lua_Integer);
  return (lua_Integer)n;
 #else
  return lj_num2int(n);
 #endif
 }
 LUA_API int lua_toboolean(lua_State *L, int idx)
--- a/src/lj_asm.c
+++ b/src/lj_asm.c
@@ -1329,27 +1329,32 @@ static void asm_conv64(ASMState *as, IRIns *ir)
  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
  IRCallID id;
  const CCallInfo *ci;
 #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
  CCallInfo cim;
 #endif
  IRRef args[2];
  lj_assertA((ir-1)->o == IR_CONV && ir->o == IR_HIOP,
 	     "not a CONV/HIOP pair at IR %04d", (int)(ir - as->ir) - REF_BIAS);
  args[LJ_BE] = (ir-1)->op1;
  args[LJ_LE] = ir->op1;
-  if (st == IRT_NUM || st == IRT_FLOAT) {
+  lj_assertA(st != IRT_FLOAT, "bad CONV *64.float emitted");
-    id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
+  if (st == IRT_NUM) {
    id = IRCALL_lj_vm_num2u64;
    ir--;
    ci = &lj_ir_callinfo[id];
  } else {
    id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
  }
  {
 #if LJ_TARGET_ARM && !LJ_ABI_SOFTFP
-    CCallInfo cim = lj_ir_callinfo[id], *ci = &cim;
+    cim = lj_ir_callinfo[id];
    cim.flags |= CCI_VARARG;  /* These calls don't use the hard-float ABI! */
    ci = &cim;
 #else
-    const CCallInfo *ci = &lj_ir_callinfo[id];
+    ci = &lj_ir_callinfo[id];
 #endif
    asm_setupresult(as, ir, ci);
    asm_gencall(as, ci, args);
  }
  asm_setupresult(as, ir, ci);
  asm_gencall(as, ci, args);
 }
 #endif
--- a/src/lj_asm_arm.h
+++ b/src/lj_asm_arm.h
@@ -624,10 +624,9 @@ static void asm_conv(ASMState *as, IRIns *ir)
      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
      Reg dest = ra_dest(as, ir, RSET_GPR);
      ARMIns ai;
      lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
      emit_dn(as, ARMI_VMOV_R_S, dest, (tmp & 15));
-      ai = irt_isint(ir->t) ?
+      ai = st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32;
 	(st == IRT_NUM ? ARMI_VCVT_S32_F64 : ARMI_VCVT_S32_F32) :
 	(st == IRT_NUM ? ARMI_VCVT_U32_F64 : ARMI_VCVT_U32_F32);
      emit_dm(as, ai, (tmp & 15), (left & 15));
    }
  } else
--- a/src/lj_asm_arm64.h
+++ b/src/lj_asm_arm64.h
@@ -648,14 +648,18 @@ static void asm_conv(ASMState *as, IRIns *ir)
    } else {
      Reg left = ra_alloc1(as, lref, RSET_FPR);
      Reg dest = ra_dest(as, ir, RSET_GPR);
-      A64Ins ai = irt_is64(ir->t) ?
+      lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
-	(st == IRT_NUM ?
+      if (irt_isu64(ir->t)) {
-	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
+	emit_dnm(as, A64I_CSELx | A64F_CC(CC_VC), dest, dest, RID_TMP);
-	 (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
+	emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), dest);
-	(st == IRT_NUM ?
+	emit_dn(as, st == IRT_NUM ? A64I_FCVT_U64_F64 : A64I_FCVT_U64_F32, RID_TMP, (left & 31));
-	 (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
+	emit_dn(as, st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32, dest, (left & 31));
-	 (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
+      } else {
-      emit_dn(as, ai, dest, (left & 31));
+	A64Ins ai = irt_is64(ir->t) ?
 	  (st == IRT_NUM ? A64I_FCVT_S64_F64 : A64I_FCVT_S64_F32) :
 	  (st == IRT_NUM ? A64I_FCVT_S32_F64 : A64I_FCVT_S32_F32);
 	emit_dn(as, ai, dest, (left & 31));
      }
    }
  } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
    Reg dest = ra_dest(as, ir, RSET_GPR);
--- a/src/lj_asm_mips.h
+++ b/src/lj_asm_mips.h
@@ -635,64 +635,38 @@ static void asm_conv(ASMState *as, IRIns *ir)
      Reg dest = ra_dest(as, ir, RSET_GPR);
      Reg left = ra_alloc1(as, lref, RSET_FPR);
      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
-      if (irt_isu32(ir->t)) {  /* FP to U32 conversion. */
+      lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
 	/* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
 	emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
 	emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
 	emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
 		tmp, tmp);
 	emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
 		 tmp, left, tmp);
 	if (st == IRT_FLOAT)
 	  emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
 		     (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
 	else
 	  emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
 		     (void *)&as->J->k64[LJ_K64_2P31], RSET_GPR);
 #if LJ_64
-      } else if (irt_isu64(ir->t)) {  /* FP to U64 conversion. */
+      if (irt_isu64(ir->t)) {  /* FP to U64 conversion. */
-	MCLabel l_end;
+	MCLabel l_end = emit_label(as);
 	emit_tg(as, MIPSI_DMFC1, dest, tmp);
-	l_end = emit_label(as);
+	/* For result == INT64_MAX add -2^64 and convert again. */
 	/* For inputs >= 2^63 add -2^64 and convert again. */
 	if (st == IRT_NUM) {
 	  emit_fg(as, MIPSI_TRUNC_L_D, tmp, tmp);
 	  emit_fgh(as, MIPSI_ADD_D, tmp, left, tmp);
 	  emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
 		     (void *)&as->J->k64[LJ_K64_M2P64],
-		     rset_exclude(RSET_GPR, dest));
+		     rset_exclude(RSET_GPR, dest));  /* Delay slot. */
-	  emit_fg(as, MIPSI_TRUNC_L_D, tmp, left);  /* Delay slot. */
+	  emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end);  /* != INT64_MAX? */
-#if !LJ_TARGET_MIPSR6
+	  emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
-	emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+	  emit_ti(as, MIPSI_LI, RID_TMP, -1);
-	emit_fgh(as, MIPSI_C_OLT_D, 0, left, tmp);
+	  emit_tg(as, MIPSI_DMFC1, dest, tmp);
-#else
+	  emit_fg(as, MIPSI_TRUNC_L_D, tmp, left);
 	emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
 	emit_fgh(as, MIPSI_CMP_LT_D, tmp, left, tmp);
 #endif
 	  emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
 		     (void *)&as->J->k64[LJ_K64_2P63],
 		     rset_exclude(RSET_GPR, dest));
 	} else {
 	  emit_fg(as, MIPSI_TRUNC_L_S, tmp, tmp);
 	  emit_fgh(as, MIPSI_ADD_S, tmp, left, tmp);
 	  emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
 		     (void *)&as->J->k32[LJ_K32_M2P64],
-		     rset_exclude(RSET_GPR, dest));
+		     rset_exclude(RSET_GPR, dest));  /* Delay slot. */
-	  emit_fg(as, MIPSI_TRUNC_L_S, tmp, left);  /* Delay slot. */
+	  emit_branch(as, MIPSI_BNE, RID_TMP, dest, l_end);  /* != INT64_MAX? */
-#if !LJ_TARGET_MIPSR6
+	  emit_dta(as, MIPSI_DSRL, RID_TMP, RID_TMP, 1);
-	emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
+	  emit_ti(as, MIPSI_LI, RID_TMP, -1);
-	emit_fgh(as, MIPSI_C_OLT_S, 0, left, tmp);
+	  emit_tg(as, MIPSI_DMFC1, dest, tmp);
-#else
+	  emit_fg(as, MIPSI_TRUNC_L_S, tmp, left);
 	emit_branch(as, MIPSI_BC1NEZ, 0, (tmp&31), l_end);
 	emit_fgh(as, MIPSI_CMP_LT_S, tmp, left, tmp);
 #endif
 	  emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
 		     (void *)&as->J->k32[LJ_K32_2P63],
 		     rset_exclude(RSET_GPR, dest));
 	}
      } else
 #endif
-      } else {
+      {
 #if LJ_32
 	emit_tg(as, MIPSI_MFC1, dest, tmp);
 	emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
@@ -733,13 +707,11 @@ static void asm_conv(ASMState *as, IRIns *ir)
 		 "bad type for checked CONV");
      asm_tointg(as, ir, RID_NONE);
    } else {
-      IRCallID cid = irt_is64(ir->t) ?
+      IRCallID cid;
-	((st == IRT_NUM) ?
+      lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
-	 (irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul) :
+      lj_assertA(!(irt_is64(ir->t) && st != IRT_NUM), "bad CONV *64.float emitted");
-	 (irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul)) :
+      cid = irt_is64(ir->t) ? IRCALL_lj_vm_num2u64 :
-	((st == IRT_NUM) ?
+	    (st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i);
 	 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
 	 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui));
      asm_callid(as, ir, cid);
    }
  } else
@@ -780,7 +752,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
 	  }
 	}
      } else {
-	if (st64 && !(ir->op2 & IRCONV_NONE)) {
+	if (!irt_isu32(ir->t)) {  /* Implicit sign extension. */
 	  Reg left = ra_alloc1(as, lref, RSET_GPR);
 	  emit_dta(as, MIPSI_SLL, dest, left, 0);
 	} else if (st64 && !(ir->op2 & IRCONV_NONE)) {
 	  /* This is either a 32 bit reg/reg mov which zeroes the hiword
 	  ** or a load of the loword from a 64 bit address.
 	  */
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -512,29 +512,10 @@ static void asm_conv(ASMState *as, IRIns *ir)
      Reg dest = ra_dest(as, ir, RSET_GPR);
      Reg left = ra_alloc1(as, lref, RSET_FPR);
      Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
-      if (irt_isu32(ir->t)) {
+      lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
-	/* Convert both x and x-2^31 to int and merge results. */
+      emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
-	Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest));
+      emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
-	emit_asb(as, PPCI_OR, dest, dest, tmpi);  /* Select with mask idiom. */
+      emit_fb(as, PPCI_FCTIWZ, tmp, left);
 	emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP);
 	emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP);
 	emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO);  /* tmp = (int)(x) */
 	emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000);  /* dest += 2^31 */
 	emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31);  /* mask = -(dest < 0) */
 	emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
 	emit_tai(as, PPCI_LWZ, dest,
 		 RID_SP, SPOFS_TMPLO);  /* dest = (int)(x-2^31) */
 	emit_fb(as, PPCI_FCTIWZ, tmp, left);
 	emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
 	emit_fb(as, PPCI_FCTIWZ, tmp, tmp);
 	emit_fab(as, PPCI_FSUB, tmp, left, tmp);
 	emit_lsptr(as, PPCI_LFS, (tmp & 31),
 		   (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR);
      } else {
 	emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
 	emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
 	emit_fb(as, PPCI_FCTIWZ, tmp, left);
      }
    }
  } else
 #endif
--- a/src/lj_asm_x86.h
+++ b/src/lj_asm_x86.h
@@ -905,29 +905,28 @@ static void asm_conv(ASMState *as, IRIns *ir)
    } else {
      Reg dest = ra_dest(as, ir, RSET_GPR);
      x86Op op = st == IRT_NUM ? XO_CVTTSD2SI : XO_CVTTSS2SI;
-      if (LJ_64 ? irt_isu64(ir->t) : irt_isu32(ir->t)) {
+      lj_assertA(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
-	/* LJ_64: For inputs >= 2^63 add -2^64, convert again. */
+#if LJ_64
-	/* LJ_32: For inputs >= 2^31 add -2^31, convert again and add 2^31. */
+      if (irt_isu64(ir->t)) {
 	/* For the indefinite result -2^63, add -2^64 and convert again. */
 	Reg tmp = ra_noreg(IR(lref)->r) ? ra_alloc1(as, lref, RSET_FPR) :
 					  ra_scratch(as, RSET_FPR);
 	MCLabel l_end = emit_label(as);
 	if (LJ_32)
 	  emit_gri(as, XG_ARITHi(XOg_ADD), dest, (int32_t)0x80000000);
 	emit_rr(as, op, dest|REX_64, tmp);
 	if (st == IRT_NUM)
-	  emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64_31]);
+	  emit_rma(as, XO_ADDSD, tmp, &as->J->k64[LJ_K64_M2P64]);
 	else
-	  emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64_31]);
+	  emit_rma(as, XO_ADDSS, tmp, &as->J->k32[LJ_K32_M2P64]);
-	emit_sjcc(as, CC_NS, l_end);
+	emit_sjcc(as, CC_NO, l_end);
-	emit_rr(as, XO_TEST, dest|REX_64, dest);  /* Check if dest negative. */
+	emit_gmrmi(as, XG_ARITHi(XOg_CMP), dest|REX_64, 1);
 	emit_rr(as, op, dest|REX_64, tmp);
 	ra_left(as, tmp, lref);
-      } else {
+
-	if (LJ_64 && irt_isu32(ir->t))
+      } else
-	  emit_rr(as, XO_MOV, dest, dest);  /* Zero hiword. */
+#endif
      {
 	emit_mrm(as, op,
-		 dest|((LJ_64 &&
+		 dest|((LJ_64 && irt_is64(ir->t)) ? REX_64 : 0),
 			(irt_is64(ir->t) || irt_isu32(ir->t))) ? REX_64 : 0),
 		 asm_fuseload(as, lref, RSET_FPR));
      }
    }
@@ -1020,6 +1019,7 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
  IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
  IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
  Reg lo, hi;
  int usehi = ra_used(ir);
  lj_assertA(st == IRT_NUM || st == IRT_FLOAT, "bad type for CONV");
  lj_assertA(dt == IRT_I64 || dt == IRT_U64, "bad type for CONV");
  hi = ra_dest(as, ir, RSET_GPR);
@@ -1032,21 +1032,24 @@ static void asm_conv_int64_fp(ASMState *as, IRIns *ir)
    emit_gri(as, XG_ARITHi(XOg_AND), lo, 0xf3ff);
  }
  if (dt == IRT_U64) {
-    /* For inputs in [2^63,2^64-1] add -2^64 and convert again. */
+    /* For the indefinite result -2^63, add -2^64 and convert again. */
    MCLabel l_pop, l_end = emit_label(as);
    emit_x87op(as, XI_FPOP);
    l_pop = emit_label(as);
    emit_sjmp(as, l_end);
-    emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
+    if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
    if ((as->flags & JIT_F_SSE3))
      emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
    else
      emit_rmro(as, XO_FISTPq, XOg_FISTPq, RID_ESP, 0);
-    emit_rma(as, XO_FADDq, XOg_FADDq, &as->J->k64[LJ_K64_M2P64]);
+    emit_rma(as, XO_FADDd, XOg_FADDd, &as->J->k32[LJ_K32_M2P64]);
-    emit_sjcc(as, CC_NS, l_pop);
+    emit_sjcc(as, CC_NE, l_pop);
-    emit_rr(as, XO_TEST, hi, hi);  /* Check if out-of-range (2^63). */
+    emit_gmroi(as, XG_ARITHi(XOg_CMP), RID_ESP, 0, 0);
    emit_sjcc(as, CC_NO, l_pop);
    emit_gmrmi(as, XG_ARITHi(XOg_CMP), hi, 1);
    usehi = 1;
  }
-  emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
+  if (usehi) emit_rmro(as, XO_MOV, hi, RID_ESP, 4);
  if ((as->flags & JIT_F_SSE3)) {  /* Truncation is easy with SSE3. */
    emit_rmro(as, XO_FISTTPq, XOg_FISTTPq, RID_ESP, 0);
  } else {  /* Otherwise set FPU rounding mode to truncate before the store. */
--- a/src/lj_bcwrite.c
+++ b/src/lj_bcwrite.c
@@ -59,9 +59,9 @@ static void bcwrite_ktabk(BCWriteCtx *ctx, cTValue *o, int narrow)
    p = lj_strfmt_wuleb128(p, intV(o));
  } else if (tvisnum(o)) {
    if (!LJ_DUALNUM && narrow) {  /* Narrow number constants to integers. */
-      lua_Number num = numV(o);
+      int64_t i64;
-      int32_t k = lj_num2int(num);
+      int32_t k;
-      if (num == (lua_Number)k) {  /* -0 is never a constant. */
+      if (lj_num2int_check(numV(o), i64, k)) {  /* -0 is never a constant. */
 	*p++ = BCDUMP_KTAB_INT;
 	p = lj_strfmt_wuleb128(p, k);
 	ctx->sb.w = p;
@@ -270,9 +270,8 @@ static void bcwrite_knum(BCWriteCtx *ctx, GCproto *pt)
      /* Write a 33 bit ULEB128 for the int (lsb=0) or loword (lsb=1). */
      if (!LJ_DUALNUM && o->u32.hi != LJ_KEYINDEX) {
 	/* Narrow number constants to integers. */
-	lua_Number num = numV(o);
+	int64_t i64;
-	k = lj_num2int(num);
+	if (lj_num2int_check(numV(o), i64, k)) {  /* -0 is never a constant. */
 	if (num == (lua_Number)k) {  /* -0 is never a constant. */
 	save_int:
 	  p = lj_strfmt_wuleb128(p, 2*(uint32_t)k | ((uint32_t)k&0x80000000u));
 	  if (k < 0)
--- a/src/lj_cconv.c
+++ b/src/lj_cconv.c
@@ -197,18 +197,16 @@ void lj_cconv_ct_ct(CTState *cts, CType *d, CType *s,
    else goto err_conv;  /* NYI: long double. */
    /* Then convert double to integer. */
    /* The conversion must exactly match the semantics of JIT-compiled code! */
-    if (dsize < 4 || (dsize == 4 && !(dinfo & CTF_UNSIGNED))) {
+    if (dsize < 8) {
-      int32_t i = (int32_t)n;
+      int64_t i = lj_num2i64(n);  /* Always convert via int64_t. */
      if (dsize == 4) *(int32_t *)dp = i;
      else if (dsize == 2) *(int16_t *)dp = (int16_t)i;
      else *(int8_t *)dp = (int8_t)i;
    } else if (dsize == 4) {
      *(uint32_t *)dp = (uint32_t)n;
    } else if (dsize == 8) {
-      if (!(dinfo & CTF_UNSIGNED))
+      if ((dinfo & CTF_UNSIGNED))
 	*(int64_t *)dp = (int64_t)n;
      else
 	*(uint64_t *)dp = lj_num2u64(n);
      else
 	*(int64_t *)dp = lj_num2i64(n);
    } else {
      goto err_conv;  /* NYI: conversion to >64 bit integers. */
    }
--- a/src/lj_cdata.c
+++ b/src/lj_cdata.c
@@ -133,12 +133,7 @@ collect_attrib:
    idx = (ptrdiff_t)intV(key);
    goto integer_key;
  } else if (tvisnum(key)) {  /* Numeric key. */
-#ifdef _MSC_VER
+    idx = lj_num2int_type(numV(key), ptrdiff_t);
    /* Workaround for MSVC bug. */
    volatile
 #endif
    lua_Number n = numV(key);
    idx = LJ_64 ? (ptrdiff_t)n : (ptrdiff_t)lj_num2int(n);
  integer_key:
    if (ctype_ispointer(ct->info)) {
      CTSize sz = lj_ctype_size(cts, ctype_cid(ct->info));  /* Element size. */
--- a/src/lj_crecord.c
+++ b/src/lj_crecord.c
@@ -445,7 +445,20 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
    /* fallthrough */
  case CCX(I, F):
    if (dt == IRT_CDATA || st == IRT_CDATA) goto err_nyi;
-    sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, st, IRCONV_ANY);
+  conv_I_F:
 #if LJ_SOFTFP || LJ_32
    if (st == IRT_FLOAT) {  /* Uncommon. Simplify split backends. */
      sp = emitconv(sp, IRT_NUM, IRT_FLOAT, 0);
      st = IRT_NUM;
    }
 #endif
    if (dsize < 8) {
      lj_needsplit(J);
      sp = emitconv(sp, IRT_I64, st, IRCONV_ANY);
      sp = emitconv(sp, dsize < 4 ? IRT_INT : dt, IRT_I64, 0);
    } else {
      sp = emitconv(sp, dt, st, IRCONV_ANY);
    }
    goto xstore;
  case CCX(I, P):
  case CCX(I, A):
@@ -523,10 +536,9 @@ static TRef crec_ct_ct(jit_State *J, CType *d, CType *s, TRef dp, TRef sp,
    goto xstore;
  case CCX(P, F):
    if (st == IRT_CDATA) goto err_nyi;
-    /* The signed conversion is cheaper. x64 really has 47 bit pointers. */
+    /* The signed 64 bit conversion is cheaper. */
-    sp = emitconv(sp, (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32,
+    dt = (LJ_64 && dsize == 8) ? IRT_I64 : IRT_U32;
-		  st, IRCONV_ANY);
+    goto conv_I_F;
    goto xstore;
  /* Destination is an array. */
  case CCX(A, A):
@@ -1878,7 +1890,7 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
  if (J->base[0] && tref_iscdata(J->base[1])) {
    tsh = crec_bit64_arg(J, ctype_get(cts, CTID_INT64),
 			 J->base[1], &rd->argv[1]);
-    if (!tref_isinteger(tsh))
+    if (LJ_32 && !tref_isinteger(tsh))
      tsh = emitconv(tsh, IRT_INT, tref_type(tsh), 0);
    J->base[1] = tsh;
  }
@@ -1886,15 +1898,17 @@ int LJ_FASTCALL recff_bit64_shift(jit_State *J, RecordFFData *rd)
  if (id) {
    TRef tr = crec_bit64_arg(J, ctype_get(cts, id), J->base[0], &rd->argv[0]);
    uint32_t op = rd->data;
    IRType t;
    if (!tsh) tsh = lj_opt_narrow_tobit(J, J->base[1]);
    t = tref_isinteger(tsh) ? IRT_INT : tref_type(tsh);
    if (!(op < IR_BROL ? LJ_TARGET_MASKSHIFT : LJ_TARGET_MASKROT) &&
 	!tref_isk(tsh))
-      tsh = emitir(IRTI(IR_BAND), tsh, lj_ir_kint(J, 63));
+      tsh = emitir(IRT(IR_BAND, t), tsh, lj_ir_kint(J, 63));
 #ifdef LJ_TARGET_UNIFYROT
-      if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
+    if (op == (LJ_TARGET_UNIFYROT == 1 ? IR_BROR : IR_BROL)) {
-	op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
+      op = LJ_TARGET_UNIFYROT == 1 ? IR_BROL : IR_BROR;
-	tsh = emitir(IRTI(IR_NEG), tsh, tsh);
+      tsh = emitir(IRT(IR_NEG, t), tsh, tsh);
-      }
+    }
 #endif
    tr = emitir(IRT(op, id-CTID_INT64+IRT_I64), tr, tsh);
    J->base[0] = emitir(IRTG(IR_CNEWI, IRT_CDATA), lj_ir_kint(J, id), tr);
--- a/src/lj_def.h
+++ b/src/lj_def.h
@@ -127,6 +127,7 @@ typedef uintptr_t BloomFilter;
 #define LJ_INLINE	inline
 #define LJ_AINLINE	inline __attribute__((always_inline))
 #define LJ_NOINLINE	__attribute__((noinline))
 #define LJ_CONSTF	__attribute__((nothrow,const))
 #if defined(__ELF__) || defined(__MACH__) || defined(__psp2__)
 #if !((defined(__sun__) && defined(__svr4__)) || defined(__CELLOS_LV2__))
@@ -245,6 +246,7 @@ static LJ_AINLINE uint32_t lj_getu32(const void *p)
 #define LJ_INLINE	__inline
 #define LJ_AINLINE	__forceinline
 #define LJ_NOINLINE	__declspec(noinline)
 #define LJ_CONSTF	__declspec(nothrow noalias)
 #if defined(_M_IX86)
 #define LJ_FASTCALL	__fastcall
 #endif
--- a/src/lj_ffrecord.c
+++ b/src/lj_ffrecord.c
@@ -70,7 +70,7 @@ static int32_t argv2int(jit_State *J, TValue *o)
 {
  if (!lj_strscan_numberobj(o))
    lj_trace_err(J, LJ_TRERR_BADTYPE);
-  return tvisint(o) ? intV(o) : lj_num2int(numV(o));
+  return numberVint(o);
 }
 /* Get runtime value of string argument. */
@@ -586,7 +586,7 @@ static void LJ_FASTCALL recff_math_round(jit_State *J, RecordFFData *rd)
    /* Result is integral (or NaN/Inf), but may not fit an int32_t. */
    if (LJ_DUALNUM) {  /* Try to narrow using a guarded conversion to int. */
      lua_Number n = lj_vm_foldfpm(numberVnum(&rd->argv[0]), rd->data);
-      if (n == (lua_Number)lj_num2int(n))
+      if (lj_num2int_ok(n))
 	tr = emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_CHECK);
    }
    J->base[0] = tr;
--- a/src/lj_ir.c
+++ b/src/lj_ir.c
@@ -248,28 +248,15 @@ TRef lj_ir_kint64(jit_State *J, uint64_t u64)
  return lj_ir_k64(J, IR_KINT64, u64);
 }
 /* Check whether a number is int and return it. -0 is NOT considered an int. */
 static int numistrueint(lua_Number n, int32_t *kp)
 {
  int32_t k = lj_num2int(n);
  if (n == (lua_Number)k) {
    if (kp) *kp = k;
    if (k == 0) {  /* Special check for -0. */
      TValue tv;
      setnumV(&tv, n);
      if (tv.u32.hi != 0)
 	return 0;
    }
    return 1;
  }
  return 0;
 }
 /* Intern number as int32_t constant if possible, otherwise as FP constant. */
 TRef lj_ir_knumint(jit_State *J, lua_Number n)
 {
  int64_t i64;
  int32_t k;
-  if (numistrueint(n, &k))
+  TValue tv;
  setnumV(&tv, n);
  /* -0 is NOT considered an int. */
  if (lj_num2int_check(n, i64, k) && !tvismzero(&tv))
    return lj_ir_kint(J, k);
  else
    return lj_ir_knum(J, n);
--- a/src/lj_ircall.h
+++ b/src/lj_ircall.h
@@ -233,20 +233,15 @@ typedef struct CCallInfo {
  _(SOFTFP_MIPS64, lj_vm_tointg,	1,   N, INT, 0) \
  _(SOFTFP_FFI,	softfp_ui2d,		1,   N, NUM, 0) \
  _(SOFTFP_FFI,	softfp_f2d,		1,   N, NUM, 0) \
  _(SOFTFP_FFI,	softfp_d2ui,		1,   N, INT, XA_FP32) \
  _(SOFTFP_FFI,	softfp_d2f,		1,   N, FLOAT, XA_FP32) \
  _(SOFTFP_FFI,	softfp_i2f,		1,   N, FLOAT, 0) \
  _(SOFTFP_FFI,	softfp_ui2f,		1,   N, FLOAT, 0) \
  _(SOFTFP_FFI,	softfp_f2i,		1,   N, INT, 0) \
  _(SOFTFP_FFI,	softfp_f2ui,		1,   N, INT, 0) \
  _(FP64_FFI,	fp64_l2d,		1,   N, NUM, XA_64) \
  _(FP64_FFI,	fp64_ul2d,		1,   N, NUM, XA_64) \
  _(FP64_FFI,	fp64_l2f,		1,   N, FLOAT, XA_64) \
  _(FP64_FFI,	fp64_ul2f,		1,   N, FLOAT, XA_64) \
-  _(FP64_FFI,	fp64_d2l,		1,   N, I64, XA_FP) \
+  _(FP64_FFI,	lj_vm_num2u64,		1,   N, U64, XA_FP) \
  _(FP64_FFI,	fp64_d2ul,		1,   N, U64, XA_FP) \
  _(FP64_FFI,	fp64_f2l,		1,   N, I64, 0) \
  _(FP64_FFI,	fp64_f2ul,		1,   N, U64, 0) \
  _(FFI,	lj_carith_divi64,	2,   N, I64, XA2_64|CCI_NOFPRCLOBBER) \
  _(FFI,	lj_carith_divu64,	2,   N, U64, XA2_64|CCI_NOFPRCLOBBER) \
  _(FFI,	lj_carith_modi64,	2,   N, I64, XA2_64|CCI_NOFPRCLOBBER) \
@@ -291,27 +286,14 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
 #define softfp_d2i __aeabi_d2iz
 #define softfp_ui2d __aeabi_ui2d
 #define softfp_f2d __aeabi_f2d
 #define softfp_d2ui __aeabi_d2uiz
 #define softfp_d2f __aeabi_d2f
 #define softfp_i2f __aeabi_i2f
 #define softfp_ui2f __aeabi_ui2f
 #define softfp_f2i __aeabi_f2iz
 #define softfp_f2ui __aeabi_f2uiz
 #define fp64_l2d __aeabi_l2d
 #define fp64_ul2d __aeabi_ul2d
 #define fp64_l2f __aeabi_l2f
 #define fp64_ul2f __aeabi_ul2f
 #if LJ_TARGET_IOS
 #define fp64_d2l __fixdfdi
 #define fp64_d2ul __fixunsdfdi
 #define fp64_f2l __fixsfdi
 #define fp64_f2ul __fixunssfdi
 #else
 #define fp64_d2l __aeabi_d2lz
 #define fp64_d2ul __aeabi_d2ulz
 #define fp64_f2l __aeabi_f2lz
 #define fp64_f2ul __aeabi_f2ulz
 #endif
 #elif LJ_TARGET_MIPS || LJ_TARGET_PPC
 #define softfp_add __adddf3
 #define softfp_sub __subdf3
@@ -322,12 +304,10 @@ LJ_DATA const CCallInfo lj_ir_callinfo[IRCALL__MAX+1];
 #define softfp_d2i __fixdfsi
 #define softfp_ui2d __floatunsidf
 #define softfp_f2d __extendsfdf2
 #define softfp_d2ui __fixunsdfsi
 #define softfp_d2f __truncdfsf2
 #define softfp_i2f __floatsisf
 #define softfp_ui2f __floatunsisf
 #define softfp_f2i __fixsfsi
 #define softfp_f2ui __fixunssfsi
 #else
 #error "Missing soft-float definitions for target architecture"
 #endif
@@ -341,12 +321,10 @@ extern int32_t softfp_d2i(double a);
 #if LJ_HASFFI
 extern double softfp_ui2d(uint32_t a);
 extern double softfp_f2d(float a);
 extern uint32_t softfp_d2ui(double a);
 extern float softfp_d2f(double a);
 extern float softfp_i2f(int32_t a);
 extern float softfp_ui2f(uint32_t a);
 extern int32_t softfp_f2i(float a);
 extern uint32_t softfp_f2ui(float a);
 #endif
 #if LJ_TARGET_MIPS
 extern double lj_vm_sfmin(double a, double b);
@@ -360,10 +338,6 @@ extern double lj_vm_sfmax(double a, double b);
 #define fp64_ul2d __floatundidf
 #define fp64_l2f __floatdisf
 #define fp64_ul2f __floatundisf
 #define fp64_d2l __fixdfdi
 #define fp64_d2ul __fixunsdfdi
 #define fp64_f2l __fixsfdi
 #define fp64_f2ul __fixunssfdi
 #else
 #error "Missing fp64 helper definitions for this compiler"
 #endif
@@ -374,10 +348,6 @@ extern double fp64_l2d(int64_t a);
 extern double fp64_ul2d(uint64_t a);
 extern float fp64_l2f(int64_t a);
 extern float fp64_ul2f(uint64_t a);
 extern int64_t fp64_d2l(double a);
 extern uint64_t fp64_d2ul(double a);
 extern int64_t fp64_f2l(float a);
 extern uint64_t fp64_f2ul(float a);
 #endif
 #endif
--- a/src/lj_jit.h
+++ b/src/lj_jit.h
@@ -350,22 +350,18 @@ enum {
 };
 enum {
 #if LJ_TARGET_X64 || LJ_TARGET_MIPS64
  LJ_K64_M2P64,		/* -2^64 */
 #endif
 #if LJ_TARGET_X86ORX64
  LJ_K64_TOBIT,		/* 2^52 + 2^51 */
  LJ_K64_2P64,		/* 2^64 */
  LJ_K64_M2P64,		/* -2^64 */
 #if LJ_32
  LJ_K64_M2P64_31,	/* -2^64 or -2^31 */
 #else
  LJ_K64_M2P64_31 = LJ_K64_M2P64,
 #endif
 #if LJ_TARGET_MIPS64
  LJ_K64_2P63,		/* 2^63 */
 #endif
 #if LJ_TARGET_MIPS
  LJ_K64_2P31,		/* 2^31 */
 #if LJ_64
  LJ_K64_2P63,		/* 2^63 */
  LJ_K64_M2P64,		/* -2^64 */
 #endif
 #endif
 #if LJ_TARGET_ARM64 || LJ_TARGET_MIPS64
  LJ_K64_VM_EXIT_HANDLER,
@@ -376,20 +372,19 @@ enum {
 #define LJ_K64__USED	(LJ_TARGET_X86ORX64 || LJ_TARGET_ARM64 || LJ_TARGET_MIPS)
 enum {
-#if LJ_TARGET_X86ORX64
+#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
-  LJ_K32_M2P64_31,	/* -2^64 or -2^31 */
+  LJ_K32_M2P64,		/* -2^64 */
 #endif
 #if LJ_TARGET_MIPS64
  LJ_K32_2P63,		/* 2^63 */
 #endif
 #if LJ_TARGET_PPC
  LJ_K32_2P52_2P31,	/* 2^52 + 2^31 */
  LJ_K32_2P52,		/* 2^52 */
 #endif
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC
  LJ_K32_2P31,		/* 2^31 */
 #endif
 #if LJ_TARGET_MIPS64
  LJ_K32_2P63,		/* 2^63 */
  LJ_K32_M2P64,		/* -2^64 */
 #endif
 #if LJ_TARGET_PPC || LJ_TARGET_MIPS32
  LJ_K32_VM_EXIT_HANDLER,
  LJ_K32_VM_EXIT_INTERP,
--- a/src/lj_lib.c
+++ b/src/lj_lib.c
@@ -349,7 +349,7 @@ int32_t lj_lib_checkintrange(lua_State *L, int narg, int32_t a, int32_t b)
      ** integer overflow. Overflow detection still works, since all FPUs
      ** return either MININT or MAXINT, which is then out of range.
      */
-      int32_t i = (int32_t)numV(o);
+      int32_t i = lj_num2int(numV(o));
      if (i >= a && i <= b) return i;
 #if LJ_HASFFI
    } else if (tviscdata(o)) {
--- a/src/lj_meta.c
+++ b/src/lj_meta.c
@@ -465,7 +465,8 @@ void LJ_FASTCALL lj_meta_for(lua_State *L, TValue *o)
      if (tvisint(o+i)) {
 	k[i] = intV(o+i); nint++;
      } else {
-	k[i] = lj_num2int(numV(o+i)); nint += ((lua_Number)k[i] == numV(o+i));
+	int64_t i64;
 	if (lj_num2int_check(numV(o+i), i64, k[i])) nint++;
      }
    }
    if (nint == 3) {  /* Narrow to integers. */
--- a/src/lj_obj.h
+++ b/src/lj_obj.h
@@ -981,43 +981,68 @@ static LJ_AINLINE void copyTV(lua_State *L, TValue *o1, const TValue *o2)
 /* -- Number to integer conversion ---------------------------------------- */
 #if LJ_SOFTFP
 LJ_ASMF int32_t lj_vm_tobit(double x);
 #if LJ_TARGET_MIPS64
 LJ_ASMF int32_t lj_vm_tointg(double x);
 #endif
 #endif
 static LJ_AINLINE int32_t lj_num2bit(lua_Number n)
 {
 #if LJ_SOFTFP
  return lj_vm_tobit(n);
 #else
  TValue o;
  o.n = n + 6755399441055744.0;  /* 2^52 + 2^51 */
  return (int32_t)o.u32.lo;
 #endif
 }
 #define lj_num2int(n)   ((int32_t)(n))
 /*
-** This must match the JIT backend behavior. In particular for archs
+** The C standard leaves many aspects of FP to integer conversions as
-** that don't have a common hardware instruction for this conversion.
+** undefined behavior. Portability is a mess, hardware support varies,
-** Note that signed FP to unsigned int conversions have an undefined
+** and modern C compilers are like a box of chocolates -- you never know
-** result and should never be relied upon in portable FFI code.
+** what you're gonna get.
-** See also: C99 or C11 standard, 6.3.1.4, footnote of (1).
+**
 ** However, we need 100% matching behavior between the interpreter (asm + C),
 ** optimizations (C) and the code generated by the JIT compiler (asm).
 ** Mixing Lua numbers with FFI numbers creates some extra requirements.
 **
 ** These conversions have been moved to assembler code, even if they seem
 ** trivial, to foil unanticipated C compiler 'optimizations' with the
 ** surrounding code. Only the unchecked double to int32_t conversion
 ** is still in C, because it ought to be pretty safe -- we'll see.
 **
 ** These macros also serve to document all places where FP to integer
 ** conversions happen.
 */
-static LJ_AINLINE uint64_t lj_num2u64(lua_Number n)
+
-{
+/* Unchecked double to int32_t conversion. */
-#if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS
+#define lj_num2int(n)		((int32_t)(n))
-  int64_t i = (int64_t)n;
+
-  if (i < 0) i = (int64_t)(n - 18446744073709551616.0);
+/* Unchecked double to arch/os-dependent signed integer type conversion.
-  return (uint64_t)i;
+** This assumes the 32/64-bit signed conversions are NOT range-extended.
-#else
+*/
-  return (uint64_t)n;
+#define lj_num2int_type(n, tp)	((tp)(n))
-#endif
+
-}
+/* Convert a double to int32_t and check for exact conversion.
 ** Returns the zero-extended int32_t on success. -0 is OK, too.
 ** Returns 0x8000000080000000LL on failure (simplifies range checks).
 */
 LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
 /* Check for exact conversion only, without storing the result. */
 #define lj_num2int_ok(x)	(lj_vm_num2int_check((x)) >= 0)
 /* Check for exact conversion and conditionally store result.
 ** Note: conditions that fail for 0x80000000 may check only the lower
 ** 32 bits. This generates good code for both 32 and 64 bit archs.
 */
 #define lj_num2int_cond(x, i64, i, cond) \
  (i64 = lj_vm_num2int_check((x)), cond ? (i = (int32_t)i64, 1) : 0)
 /* This is the generic check for a full-range int32_t result. */
 #define lj_num2int_check(x, i64, i) \
  lj_num2int_cond((x), i64, i, i64 >= 0)
 /* Predictable conversion from double to int64_t or uint64_t.
 ** Truncates towards zero. Out-of-range values, NaN and +-Inf return
 ** an arch-dependent result, but do not cause C undefined behavior.
 ** The uint64_t conversion accepts the union of the unsigned + signed range.
 */
 LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
 LJ_ASMF LJ_CONSTF int64_t lj_vm_num2u64(double x);
 #define lj_num2i64(x)		(lj_vm_num2i64((x)))
 #define lj_num2u64(x)		(lj_vm_num2u64((x)))
 /* Lua BitOp conversion semantics use the 2^52 + 2^51 trick. */
 LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
 #define lj_num2bit(x)	lj_vm_tobit((x))
 static LJ_AINLINE int32_t numberVint(cTValue *o)
 {
--- a/src/lj_opt_fold.c
+++ b/src/lj_opt_fold.c
@@ -303,17 +303,18 @@ LJFOLDF(kfold_intarith)
  return INTFOLD(kfold_intop(fleft->i, fright->i, (IROp)fins->o));
 }
 /* Forward declaration. */
 static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
 				 IROp op);
 LJFOLD(ADDOV KINT KINT)
 LJFOLD(SUBOV KINT KINT)
 LJFOLD(MULOV KINT KINT)
 LJFOLDF(kfold_intovarith)
 {
-  lua_Number n = lj_vm_foldarith((lua_Number)fleft->i, (lua_Number)fright->i,
+  int64_t k = kfold_int64arith(J, (int64_t)fleft->i, (int64_t)fright->i,
-				 fins->o - IR_ADDOV);
+		(IROp)((int)fins->o - (int)IR_ADDOV + (int)IR_ADD));
-  int32_t k = lj_num2int(n);
+  return checki32(k) ? INTFOLD(k) : FAILFOLD;
  if (n != (lua_Number)k)
    return FAILFOLD;
  return INTFOLD(k);
 }
 LJFOLD(BNOT KINT)
@@ -368,11 +369,11 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
 				 IROp op)
 {
  UNUSED(J);
 #if LJ_HASFFI
  switch (op) {
  case IR_ADD: k1 += k2; break;
  case IR_SUB: k1 -= k2; break;
  case IR_MUL: k1 *= k2; break;
 #if LJ_HASFFI
  case IR_BAND: k1 &= k2; break;
  case IR_BOR: k1 |= k2; break;
  case IR_BXOR: k1 ^= k2; break;
@@ -382,11 +383,8 @@ static uint64_t kfold_int64arith(jit_State *J, uint64_t k1, uint64_t k2,
  case IR_BROL: k1 = lj_rol(k1, (k2 & 63)); break;
  case IR_BROR: k1 = lj_ror(k1, (k2 & 63)); break;
  default: lj_assertJ(0, "bad IR op %d", op); break;
  }
 #else
  UNUSED(k2); UNUSED(op);
  lj_assertJ(0, "FFI IR op without FFI");
 #endif
  }
  return k1;
 }
@@ -883,8 +881,11 @@ LJFOLD(CONV KNUM IRCONV_INT_NUM)
 LJFOLDF(kfold_conv_knum_int_num)
 {
  lua_Number n = knumleft;
-  int32_t k = lj_num2int(n);
+  if (irt_isguard(fins->t)) {
-  if (irt_isguard(fins->t) && n != (lua_Number)k) {
+    int64_t i64;
    int32_t k;
    if (lj_num2int_check(n, i64, k))
      return INTFOLD(k);
    /* We're about to create a guard which always fails, like CONV +1.5.
    ** Some pathological loops cause this during LICM, e.g.:
    **   local x,k,t = 0,1.5,{1,[1.5]=2}
@@ -892,27 +893,15 @@ LJFOLDF(kfold_conv_knum_int_num)
    **   assert(x == 300)
    */
    return FAILFOLD;
  } else {
    return INTFOLD(lj_num2int(n));
  }
  return INTFOLD(k);
 }
 LJFOLD(CONV KNUM IRCONV_U32_NUM)
 LJFOLDF(kfold_conv_knum_u32_num)
 {
 #ifdef _MSC_VER
  {  /* Workaround for MSVC bug. */
    volatile uint32_t u = (uint32_t)knumleft;
    return INTFOLD((int32_t)u);
  }
 #else
  return INTFOLD((int32_t)(uint32_t)knumleft);
 #endif
 }
 LJFOLD(CONV KNUM IRCONV_I64_NUM)
 LJFOLDF(kfold_conv_knum_i64_num)
 {
-  return INT64FOLD((uint64_t)(int64_t)knumleft);
+  return INT64FOLD((uint64_t)lj_num2i64(knumleft));
 }
 LJFOLD(CONV KNUM IRCONV_U64_NUM)
@@ -1135,7 +1124,6 @@ LJFOLDF(shortcut_conv_num_int)
 }
 LJFOLD(CONV CONV IRCONV_INT_NUM)  /* _INT */
 LJFOLD(CONV CONV IRCONV_U32_NUM)  /* _U32 */
 LJFOLDF(simplify_conv_int_num)
 {
  /* Fold even across PHI to avoid expensive num->int conversions in loop. */
@@ -1334,6 +1322,24 @@ LJFOLDF(narrow_convert)
  return lj_opt_narrow_convert(J);
 }
 LJFOLD(XSTORE any CONV)
 LJFOLDF(xstore_conv)
 {
 #if LJ_64
  PHIBARRIER(fright);
  if (!irt_is64(fins->t) &&
      irt_type(fins->t) == (IRType)((fright->op2&IRCONV_DSTMASK)>>IRCONV_DSH) &&
      ((fright->op2&IRCONV_SRCMASK) == IRT_I64 ||
       (fright->op2&IRCONV_SRCMASK) == IRT_U64)) {
    fins->op2 = fright->op1;
    return RETRYFOLD;
  }
 #else
  UNUSED(J);
 #endif
  return NEXTFOLD;
 }
 /* -- Integer algebraic simplifications ----------------------------------- */
 LJFOLD(ADD any KINT)
--- a/src/lj_opt_narrow.c
+++ b/src/lj_opt_narrow.c
@@ -281,22 +281,20 @@ static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)
    return 0;
  } else if (ir->o == IR_KNUM) {  /* Narrow FP constant. */
    lua_Number n = ir_knum(ir)->n;
    int64_t i64;
    int32_t k;
    if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
-      /* Allows a wider range of constants. */
+      /* Allows a wider range of constants, if const doesn't lose precision. */
-      int64_t k64 = (int64_t)n;
+      if (lj_num2int_check(n, i64, k)) {
      if (n == (lua_Number)k64) {  /* Only if const doesn't lose precision. */
 	*nc->sp++ = NARROWINS(NARROW_INT, 0);
 	*nc->sp++ = (NarrowIns)k64;  /* But always truncate to 32 bits. */
 	return 0;
      }
    } else {
      int32_t k = lj_num2int(n);
      /* Only if constant is a small integer. */
      if (checki16(k) && n == (lua_Number)k) {
 	*nc->sp++ = NARROWINS(NARROW_INT, 0);
 	*nc->sp++ = (NarrowIns)k;
 	return 0;
      }
    } else if (lj_num2int_cond(n, i64, k, checki16((int32_t)i64))) {
      /* Only if constant is a small integer. */
      *nc->sp++ = NARROWINS(NARROW_INT, 0);
      *nc->sp++ = (NarrowIns)k;
      return 0;
    }
    return 10;  /* Never narrow other FP constants (this is rare). */
  }
@@ -512,12 +510,6 @@ TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
 /* -- Narrowing of arithmetic operators ----------------------------------- */
 /* Check whether a number fits into an int32_t (-0 is ok, too). */
 static int numisint(lua_Number n)
 {
  return (n == (lua_Number)lj_num2int(n));
 }
 /* Convert string to number. Error out for non-numeric string values. */
 static TRef conv_str_tonum(jit_State *J, TRef tr, TValue *o)
 {
@@ -539,8 +531,8 @@ TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
  /* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
  if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
      tref_isinteger(rb) && tref_isinteger(rc) &&
-      numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
+      lj_num2int_ok(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
-			       (int)op - (int)IR_ADD)))
+				    (int)op - (int)IR_ADD)))
    return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
  if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
  if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
@@ -591,7 +583,7 @@ TRef lj_opt_narrow_mod(jit_State *J, TRef rb, TRef rc, TValue *vb, TValue *vc)
 static int narrow_forl(jit_State *J, cTValue *o)
 {
  if (tvisint(o)) return 1;
-  if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o));
+  if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return lj_num2int_ok(numV(o));
  return 0;
 }
--- a/src/lj_opt_split.c
+++ b/src/lj_opt_split.c
@@ -573,13 +573,9 @@ static void split_ir(jit_State *J)
      case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
 	IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
 #if LJ_SOFTFP
 	lj_assertJ(st != IRT_FLOAT, "bad CONV *64.float emitted");
 	if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
-	  hi = split_call_l(J, hisubst, oir, ir,
+	  hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_num2u64);
 		 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
 	} else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
 	  nir->o = IR_CALLN;
 	  nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
 	  hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
 	}
 #else
 	if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
@@ -692,8 +688,9 @@ static void split_ir(jit_State *J)
 	  nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
 	}
      } else if (st == IRT_FLOAT) {
 	lj_assertJ(!irt_isu32(ir->t), "bad CONV u32.fp emitted");
 	nir->o = IR_CALLN;
-	nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
+	nir->op2 = IRCALL_softfp_f2i;
      } else
 #endif
 #if LJ_SOFTFP
@@ -705,9 +702,7 @@ static void split_ir(jit_State *J)
 	} else {
 	  split_call_l(J, hisubst, oir, ir,
 #if LJ_32 && LJ_HASFFI
-	    st == IRT_NUM ?
+	    st == IRT_NUM ? IRCALL_softfp_d2i : IRCALL_softfp_f2i
 	      (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
 	      (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
 #else
 	    IRCALL_softfp_d2i
 #endif
--- a/src/lj_parse.c
+++ b/src/lj_parse.c
@@ -522,9 +522,9 @@ static void expr_toreg_nobranch(FuncState *fs, ExpDesc *e, BCReg reg)
      ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)intV(tv));
    else
 #else
-    lua_Number n = expr_numberV(e);
+    int64_t i64;
-    int32_t k = lj_num2int(n);
+    int32_t k;
-    if (checki16(k) && n == (lua_Number)k)
+    if (lj_num2int_cond(expr_numberV(e), i64, k, checki16((int32_t)i64)))
      ins = BCINS_AD(BC_KSHORT, reg, (BCReg)(uint16_t)k);
    else
 #endif
@@ -782,8 +782,9 @@ static int foldarith(BinOpr opr, ExpDesc *e1, ExpDesc *e2)
  setnumV(&o, n);
  if (tvisnan(&o) || tvismzero(&o)) return 0;  /* Avoid NaN and -0 as consts. */
  if (LJ_DUALNUM) {
-    int32_t k = lj_num2int(n);
+    int64_t i64;
-    if ((lua_Number)k == n) {
+    int32_t k;
    if (lj_num2int_check(n, i64, k)) {
      setintV(&e1->u.nval, k);
      return 1;
    }
@@ -1386,10 +1387,10 @@ static void fs_fixup_k(FuncState *fs, GCproto *pt, void *kptr)
      if (tvisnum(&n->key)) {
 	TValue *tv = &((TValue *)kptr)[kidx];
 	if (LJ_DUALNUM) {
-	  lua_Number nn = numV(&n->key);
+	  int64_t i64;
-	  int32_t k = lj_num2int(nn);
+	  int32_t k;
 	  lj_assertFS(!tvismzero(&n->key), "unexpected -0 key");
-	  if ((lua_Number)k == nn)
+	  if (lj_num2int_check(numV(&n->key), i64, k))
 	    setintV(tv, k);
 	  else
 	    *tv = n->key;
@@ -1656,9 +1657,9 @@ static void expr_index(FuncState *fs, ExpDesc *t, ExpDesc *e)
      }
    }
 #else
-    lua_Number n = expr_numberV(e);
+    int64_t i64;
-    int32_t k = lj_num2int(n);
+    int32_t k;
-    if (checku8(k) && n == (lua_Number)k) {
+    if (lj_num2int_cond(expr_numberV(e), i64, k, checku8((int32_t)i64))) {
      t->u.s.aux = BCMAX_C+1+(uint32_t)k;  /* 256..511: const byte key */
      return;
    }
--- a/src/lj_record.c
+++ b/src/lj_record.c
@@ -351,9 +351,14 @@ static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
 	} else {
 	  cTValue *tv = proto_knumtv(J->pt, bc_d(ins));
 	  if (t == IRT_INT) {
-	    int32_t k = numberVint(tv);
+	    if (tvisint(tv)) {
-	    if (tvisint(tv) || numV(tv) == (lua_Number)k)  /* -0 is ok here. */
+	      return lj_ir_kint(J, intV(tv));
-	      return lj_ir_kint(J, k);
+	    } else {
 	      int64_t i64;
 	      int32_t k;
 	      if (lj_num2int_check(numV(tv), i64, k))  /* -0 is ok here. */
 		return lj_ir_kint(J, k);
 	    }
 	    return 0;  /* Type mismatch. */
 	  } else {
 	    return lj_ir_knum(J, numberVnum(tv));
@@ -1426,9 +1431,13 @@ static TRef rec_idx_key(jit_State *J, RecordIndex *ix, IRRef *rbref,
  /* Integer keys are looked up in the array part first. */
  key = ix->key;
  if (tref_isnumber(key)) {
-    int32_t k = numberVint(&ix->keyv);
+    int32_t k;
-    if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k)
+    if (tvisint(&ix->keyv)) {
-      k = LJ_MAX_ASIZE;
+      k = intV(&ix->keyv);
    } else {
      int64_t i64;
      if (!lj_num2int_check(numV(&ix->keyv), i64, k)) k = LJ_MAX_ASIZE;
    }
    if ((MSize)k < LJ_MAX_ASIZE) {  /* Potential array key? */
      TRef ikey = lj_opt_narrow_index(J, key);
      TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
--- a/src/lj_strfmt.c
+++ b/src/lj_strfmt.c
@@ -351,7 +351,7 @@ SBuf *lj_strfmt_putfxint(SBuf *sb, SFormat sf, uint64_t k)
 /* Add number formatted as signed integer to buffer. */
 SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
 {
-  int64_t k = (int64_t)n;
+  int64_t k = lj_num2i64(n);
  if (checki32(k) && sf == STRFMT_INT)
    return lj_strfmt_putint(sb, (int32_t)k);  /* Shortcut for plain %d. */
  else
@@ -361,12 +361,7 @@ SBuf *lj_strfmt_putfnum_int(SBuf *sb, SFormat sf, lua_Number n)
 /* Add number formatted as unsigned integer to buffer. */
 SBuf *lj_strfmt_putfnum_uint(SBuf *sb, SFormat sf, lua_Number n)
 {
-  int64_t k;
+  return lj_strfmt_putfxint(sb, sf, lj_num2u64(n));
  if (n >= 9223372036854775808.0)
    k = (int64_t)(n - 18446744073709551616.0);
  else
    k = (int64_t)n;
  return lj_strfmt_putfxint(sb, sf, (uint64_t)k);
 }
 /* Format stack arguments to buffer. */
--- a/src/lj_strscan.c
+++ b/src/lj_strscan.c
@@ -523,10 +523,10 @@ StrScanFmt lj_strscan_scan(const uint8_t *p, MSize len, TValue *o,
      fmt = strscan_dec(sp, o, fmt, opt, ex, neg, dig);
    /* Try to convert number to integer, if requested. */
-    if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT) && !tvismzero(o)) {
+    if (fmt == STRSCAN_NUM && (opt & STRSCAN_OPT_TOINT)) {
-      double n = o->n;
+      int64_t tmp;
-      int32_t i = lj_num2int(n);
+      if (lj_num2int_check(o->n, tmp, o->i) && !tvismzero(o))
-      if (n == (lua_Number)i) { o->i = i; return STRSCAN_INT; }
+	return STRSCAN_INT;
    }
    return fmt;
  }
--- a/src/lj_tab.c
+++ b/src/lj_tab.c
@@ -295,9 +295,9 @@ static uint32_t countint(cTValue *key, uint32_t *bins)
 {
  lj_assertX(!tvisint(key), "bad integer key");
  if (tvisnum(key)) {
-    lua_Number nk = numV(key);
+    int64_t i64;
-    int32_t k = lj_num2int(nk);
+    int32_t k;
-    if ((uint32_t)k < LJ_MAX_ASIZE && nk == (lua_Number)k) {
+    if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < LJ_MAX_ASIZE)) {
      bins[(k > 2 ? lj_fls((uint32_t)(k-1)) : 0)]++;
      return 1;
    }
@@ -409,9 +409,9 @@ cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key)
    if (tv)
      return tv;
  } else if (tvisnum(key)) {
-    lua_Number nk = numV(key);
+    int64_t i64;
-    int32_t k = lj_num2int(nk);
+    int32_t k;
-    if (nk == (lua_Number)k) {
+    if (lj_num2int_check(numV(key), i64, k)) {
      cTValue *tv = lj_tab_getint(t, k);
      if (tv)
 	return tv;
@@ -542,9 +542,9 @@ TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key)
  } else if (tvisint(key)) {
    return lj_tab_setint(L, t, intV(key));
  } else if (tvisnum(key)) {
-    lua_Number nk = numV(key);
+    int64_t i64;
-    int32_t k = lj_num2int(nk);
+    int32_t k;
-    if (nk == (lua_Number)k)
+    if (lj_num2int_check(numV(key), i64, k))
      return lj_tab_setint(L, t, k);
    if (tvisnan(key))
      lj_err_msg(L, LJ_ERR_NANIDX);
@@ -580,9 +580,9 @@ uint32_t LJ_FASTCALL lj_tab_keyindex(GCtab *t, cTValue *key)
    setnumV(&tmp, (lua_Number)k);
    key = &tmp;
  } else if (tvisnum(key)) {
-    lua_Number nk = numV(key);
+    int64_t i64;
-    int32_t k = lj_num2int(nk);
+    int32_t k;
-    if ((uint32_t)k < t->asize && nk == (lua_Number)k)
+    if (lj_num2int_cond(numV(key), i64, k, (uint32_t)i64 < t->asize))
      return (uint32_t)k + 1;
  }
  if (!tvisnil(key)) {
--- a/src/lj_target_x86.h
+++ b/src/lj_target_x86.h
@@ -314,6 +314,7 @@ typedef enum {
  XO_FSTPq =	XO_(dd), XOg_FSTPq = 3,
  XO_FISTPq =	XO_(df), XOg_FISTPq = 7,
  XO_FISTTPq =	XO_(dd), XOg_FISTTPq = 1,
  XO_FADDd =	XO_(d8), XOg_FADDd = 0,
  XO_FADDq =	XO_(dc), XOg_FADDq = 0,
  XO_FLDCW =	XO_(d9), XOg_FLDCW = 5,
  XO_FNSTCW =	XO_(d9), XOg_FNSTCW = 7
--- a/src/lj_trace.c
+++ b/src/lj_trace.c
@@ -317,32 +317,34 @@ void lj_trace_initstate(global_State *g)
  tv[1].u64 = U64x(80000000,00000000);
  /* Initialize 32/64 bit constants. */
 #if LJ_TARGET_X64 || LJ_TARGET_MIPS64
  J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
 #endif
 #if LJ_TARGET_X86ORX64
  J->k64[LJ_K64_TOBIT].u64 = U64x(43380000,00000000);
 #if LJ_32
  J->k64[LJ_K64_M2P64_31].u64 = U64x(c1e00000,00000000);
 #endif
  J->k64[LJ_K64_2P64].u64 = U64x(43f00000,00000000);
  J->k32[LJ_K32_M2P64_31] = LJ_64 ? 0xdf800000 : 0xcf000000;
 #endif
 #if LJ_TARGET_MIPS64
  J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
 #endif
 #if LJ_TARGET_MIPS
  J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
 #endif
 #if LJ_TARGET_X86ORX64 || LJ_TARGET_MIPS64
-  J->k64[LJ_K64_M2P64].u64 = U64x(c3f00000,00000000);
+  J->k32[LJ_K32_M2P64] = 0xdf800000;
 #endif
 #if LJ_TARGET_MIPS64
  J->k32[LJ_K32_2P63] = 0x5f000000;
 #endif
 #if LJ_TARGET_PPC
  J->k32[LJ_K32_2P52_2P31] = 0x59800004;
  J->k32[LJ_K32_2P52] = 0x59800000;
 #endif
-#if LJ_TARGET_PPC || LJ_TARGET_MIPS
+#if LJ_TARGET_PPC
  J->k32[LJ_K32_2P31] = 0x4f000000;
 #endif
-#if LJ_TARGET_MIPS
+
  J->k64[LJ_K64_2P31].u64 = U64x(41e00000,00000000);
 #if LJ_64
  J->k64[LJ_K64_2P63].u64 = U64x(43e00000,00000000);
  J->k32[LJ_K32_2P63] = 0x5f000000;
  J->k32[LJ_K32_M2P64] = 0xdf800000;
 #endif
 #endif
 #if LJ_TARGET_PPC || LJ_TARGET_MIPS32
  J->k32[LJ_K32_VM_EXIT_HANDLER] = (uintptr_t)(void *)lj_vm_exit_handler;
  J->k32[LJ_K32_VM_EXIT_INTERP] = (uintptr_t)(void *)lj_vm_exit_interp;
--- a/src/lj_vm.h
+++ b/src/lj_vm.h
@@ -37,13 +37,19 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint32_t res[4]);
 #if LJ_TARGET_PPC
 void lj_vm_cachesync(void *start, void *end);
 #endif
-LJ_ASMF double lj_vm_foldarith(double x, double y, int op);
+LJ_ASMF LJ_CONSTF double lj_vm_foldarith(double x, double y, int op);
 #if LJ_HASJIT
-LJ_ASMF double lj_vm_foldfpm(double x, int op);
+LJ_ASMF LJ_CONSTF double lj_vm_foldfpm(double x, int op);
 #endif
-#if !LJ_ARCH_HASFPU
+#if LJ_SOFTFP && LJ_TARGET_MIPS64
-/* Declared in lj_obj.h: LJ_ASMF int32_t lj_vm_tobit(double x); */
+LJ_ASMF LJ_CONSTF int32_t lj_vm_tointg(double x);
 #endif
 /* Declared in lj_obj.h:
 ** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2int_check(double x);
 ** LJ_ASMF LJ_CONSTF int64_t lj_vm_num2i64(double x);
 ** LJ_ASMF LJ_CONSTF uint64_t lj_vm_num2u64(double x);
 ** LJ_ASMF LJ_CONSTF int32_t lj_vm_tobit(double x);
 */
 /* Dispatch targets for recording and hooks. */
 LJ_ASMF void lj_vm_record(void);
@@ -62,15 +68,15 @@ LJ_ASMF char lj_vm_exit_interp[];
 #define lj_vm_floor	floor
 #define lj_vm_ceil	ceil
 #else
-LJ_ASMF double lj_vm_floor(double);
+LJ_ASMF LJ_CONSTF double lj_vm_floor(double);
-LJ_ASMF double lj_vm_ceil(double);
+LJ_ASMF LJ_CONSTF double lj_vm_ceil(double);
 #if LJ_TARGET_ARM
-LJ_ASMF double lj_vm_floor_sf(double);
+LJ_ASMF LJ_CONSTF double lj_vm_floor_sf(double);
-LJ_ASMF double lj_vm_ceil_sf(double);
+LJ_ASMF LJ_CONSTF double lj_vm_ceil_sf(double);
 #endif
 #endif
 #ifdef LUAJIT_NO_LOG2
-LJ_ASMF double lj_vm_log2(double);
+LJ_ASMF LJ_CONSTF double lj_vm_log2(double);
 #else
 #define lj_vm_log2	log2
 #endif
@@ -80,16 +86,16 @@ LJ_ASMF int32_t LJ_FASTCALL lj_vm_modi(int32_t, int32_t);
 #if LJ_HASJIT
 #if LJ_TARGET_X86ORX64
-LJ_ASMF void lj_vm_floor_sse(void);
+LJ_ASMF LJ_CONSTF void lj_vm_floor_sse(void);
-LJ_ASMF void lj_vm_ceil_sse(void);
+LJ_ASMF LJ_CONSTF void lj_vm_ceil_sse(void);
-LJ_ASMF void lj_vm_trunc_sse(void);
+LJ_ASMF LJ_CONSTF void lj_vm_trunc_sse(void);
 #endif
 #if LJ_TARGET_PPC || LJ_TARGET_ARM64
 #define lj_vm_trunc	trunc
 #else
-LJ_ASMF double lj_vm_trunc(double);
+LJ_ASMF LJ_CONSTF double lj_vm_trunc(double);
 #if LJ_TARGET_ARM
-LJ_ASMF double lj_vm_trunc_sf(double);
+LJ_ASMF LJ_CONSTF double lj_vm_trunc_sf(double);
 #endif
 #endif
 #if LJ_HASFFI
--- a/src/lj_vmmath.c
+++ b/src/lj_vmmath.c
@@ -59,7 +59,7 @@ double lj_vm_foldarith(double x, double y, int op)
  case IR_NEG - IR_ADD: return -x; break;
  case IR_ABS - IR_ADD: return fabs(x); break;
 #if LJ_HASJIT
-  case IR_LDEXP - IR_ADD: return ldexp(x, (int)y); break;
+  case IR_LDEXP - IR_ADD: return ldexp(x, lj_num2int(y)); break;
  case IR_MIN - IR_ADD: return x < y ? x : y; break;
  case IR_MAX - IR_ADD: return x > y ? x : y; break;
 #endif
--- a/src/vm_arm.dasc
+++ b/src/vm_arm.dasc
@@ -2452,6 +2452,118 @@ static void build_subroutines(BuildCtx *ctx)
  |  bx lr
  |
  |//-----------------------------------------------------------------------
  |//-- Number conversion functions ----------------------------------------
  |//-----------------------------------------------------------------------
  |
  |// int64_t lj_vm_num2int_check(double x)
  |->vm_num2int_check:
  |.if FPU
  |.if not HFABI
  |  vmov d0, CARG1, CARG2
  |.endif
  |  vcvt.s32.f64 s4, d0
  |  vcvt.f64.s32 d1, s4
  |  vcmp.f64 d0, d1
  |  vmrs
  |  bne >1
  |  vmov CRET1, s4
  |  mov CRET2, #0
  |  bx lr
  |
  |.else
  |
  |  asr CARG4, CARG2, #31		// sign = 0 or -1.
  |  lsl CARG2, CARG2, #1
  |  orrs RB, CARG2, CARG1
  |  bxeq lr				// Return 0 for +-0.
  |  mov RB, #1024
  |  add RB, RB, #30
  |  sub RB, RB, CARG2, lsr #21
  |  cmp RB, #32
  |  bhs >1				// Fail if |x| < 0x1p0 || |x| >= 0x1p32.
  |  lsr CARG3, CARG1, #21
  |  orr CARG2, CARG3, CARG2, lsl #10	// Left-aligned mantissa.
  |   rsb CARG3, RB, #32
  |   lsl CARG3, CARG2, CARG3
  |  orr CARG2, CARG2, #0x80000000	// Merge leading 1.
  |   orrs CARG3, CARG3, CARG1, lsl #11
  |  lsr CARG1, CARG2, RB		// lo = right-aligned absolute value.
  |   bne >1				// Fail if fractional part != 0.
  |  adds CRET1, CARG1, CARG4
  |  bmi >1				// Fail if lo+sign >= 0x80000000.
  |  eor CRET1, CRET1, CARG4		// lo = sign?-lo:lo = (lo+sign)^sign.
  |  mov CRET2, #0
  |  bx lr
  |.endif
  |1:
  |  mov CRET1, #0x80000000
  |  mov CRET2, #0x80000000
  |  bx lr
  |
  |// int64_t lj_vm_num2i64(double x)
  |->vm_num2i64:
  |// fallthrough, same as lj_vm_num2u64.
  |
  |// uint64_t lj_vm_num2u64(double x)
  |->vm_num2u64:
  |.if HFABI
  |  vmov CARG1, CARG2, d0
  |.endif
  |  lsl RB, CARG2, #1
  |  lsr RB, RB, #21
  |  sub RB, RB, #1020
  |  sub RB, RB, #3
  |  cmp RB, #116
  |  bhs >3				// Exponent out of range.
  |  asr CARG4, CARG2, #31		// sign = 0 or -1.
  |  lsl CARG2, CARG2, #12
  |  lsr CARG2, CARG2, #12
  |   rsbs RB, RB, #52
  |  orr CARG2, CARG2, #0x00100000
  |   bmi >2				// Shift mantissa left or right?
  |  lsr CARG1, CARG1, RB		// 64 bit right shift.
  |  lsr CARG3, CARG2, RB
  |  rsb RB, RB, #32
  |  orr CARG1, CARG1, CARG2, lsl RB
  |  rsb RB, RB, #0
  |  orr CARG1, CARG1, CARG2, lsr RB
  |  adds CRET1, CARG1, CARG4		// m = sign?-m:m = (m+sign)^sign.
  |  adc CRET2, CARG3, CARG4
  |1:
  |  eor CRET1, CRET1, CARG4
  |  eor CRET2, CRET2, CARG4
  |  bx lr
  |2:
  |  rsb RB, RB, #0
  |  lsl CARG2, CARG2, RB		// 64 bit left shift.
  |  lsl CARG3, CARG1, RB
  |  sub RB, RB, #32
  |  orr CARG2, CARG2, CARG1, lsl RB
  |  rsb RB, RB, #0
  |  orr CARG2, CARG2, CARG1, lsr RB
  |  adds CRET1, CARG3, CARG4
  |  adc CRET2, CARG2, CARG4
  |  b <1
  |3:
  |  mov CRET1, #0
  |  mov CRET2, #0
  |  bx lr
  |
  |// int32_t lj_vm_tobit(double x)
  |.if FPU
  |->vm_tobit:
  |  vldr d1, >9
  |.if not HFABI
  |  vmov d0, CARG1, CARG2
  |.endif
  |  vadd.f64 d0, d0, d1
  |  vmov CARG1, s0
  |  bx lr
  |9:
  |  .long 0, 0x43380000		// (double)(2^52 + 2^51).
  |.endif
  |
  |//-----------------------------------------------------------------------
  |//-- Miscellaneous functions --------------------------------------------
  |//-----------------------------------------------------------------------
  |
@@ -4097,7 +4209,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
    |  ldr TRACE:CARG1, [CARG1, CARG2, lsl #2]
    |  // Subsumes ins_next1 and ins_next2.
    |  ldr INS, TRACE:CARG1->startins
-    |  bfi INS, OP, #0, #8
+    |  bic INS, INS, #0xff
    |  orr INS, INS, OP
    |  str INS, [PC], #4
    |  b <1
    |.endif
--- a/src/vm_arm64.dasc
+++ b/src/vm_arm64.dasc
@@ -2156,6 +2156,42 @@ static void build_subroutines(BuildCtx *ctx)
  |  ret
  |
  |//-----------------------------------------------------------------------
  |//-- Number conversion functions ----------------------------------------
  |//-----------------------------------------------------------------------
  |
  |// int64_t lj_vm_num2int_check(double x)
  |->vm_num2int_check:
  |  fcvtzs CRET1w, FARG1
  |  scvtf FARG2, CRET1w
  |  fcmp FARG2, FARG1
  |  bne >1
  |  ret
  |1:
  |  mov CRET1, #0x8000000080000000
  |  ret
  |
  |// int64_t lj_vm_num2i64(double x)
  |->vm_num2i64:
  |  fcvtzs CRET1, FARG1
  |  ret
  |
  |// uint64_t lj_vm_num2u64(double x)
  |->vm_num2u64:
  |  fcvtzs CRET1, FARG1
  |  fcvtzu CARG2, FARG1
  |  cmn CRET1, #1			// Set overflow if CRET1 == INT64_MAX.
  |  csel CRET1, CRET1, CARG2, vc	// No overflow ? i64 : u64.
  |  ret
  |
  |// int32_t lj_vm_tobit(double x)
  |->vm_tobit:
  |  movz CRET1, #0x4338, lsl #48	// 2^52 + 2^51.
  |  fmov FARG2, CRET1
  |  fadd FARG1, FARG1, FARG2
  |  fmov CRET1w, s0
  |  ret
  |
  |//-----------------------------------------------------------------------
  |//-- Miscellaneous functions --------------------------------------------
  |//-----------------------------------------------------------------------
  |
--- a/src/vm_mips.dasc
+++ b/src/vm_mips.dasc
@@ -85,6 +85,7 @@
 |
 |.if FPU
 |.define FARG1,		f12
 |.define FARG1HI,	f13
 |.define FARG2,		f14
 |
 |.define FRET1,		f0
@@ -2560,7 +2561,7 @@ static void build_subroutines(BuildCtx *ctx)
  |  mtc1 r0, f4
  |  mtc1 TMP0, f5
  |  abs.d FRET2, FARG1			// |x|
-  |    mfc1 AT, f13
+  |    mfc1 AT, FARG1HI
  |  c.olt.d 0, FRET2, f4
  |   add.d FRET1, FRET2, f4		// (|x| + 2^52) - 2^52
  |  bc1f 0, >1				// Truncate only if |x| < 2^52.
@@ -2822,6 +2823,122 @@ static void build_subroutines(BuildCtx *ctx)
  |  sfmin_max max, vm_sfcmpogt
  |
  |//-----------------------------------------------------------------------
  |//-- Number conversion functions ----------------------------------------
  |//-----------------------------------------------------------------------
  |
  |// int64_t lj_vm_num2int_check(double x)
  |->vm_num2int_check:
  |.if FPU
  |  trunc.w.d FARG2, FARG1
  |  mfc1 SFRETLO, FARG2
  |  cvt.d.w FARG2, FARG2
  |  c.eq.d FARG1, FARG2
  |  bc1f 0, >2
  |.  nop
  |  jr ra
  |.  move SFRETHI, r0
  |
  |.else
  |
  |  sll SFRETLO, SFARG1HI, 1
  |  or SFRETHI, SFRETLO, SFARG1LO
  |  beqz SFRETHI, >1			// Return 0 for +-0.
  |.  li TMP0, 1054
  |  srl AT, SFRETLO, 21
  |  subu TMP0, TMP0, AT
  |  sltiu AT, TMP0, 32
  |  beqz AT, >2			// Fail if |x| < 0x1p0 || |x| >= 0x1p32.
  |.  sll SFRETLO, SFARG1HI, 11
  |   srl SFRETHI, SFARG1LO, 21
  |  negu TMP1, TMP0
  |   or SFRETLO, SFRETLO, SFRETHI	// Left-aligned mantissa.
  |  sllv TMP2, SFRETLO, TMP1
  |   lui AT, 0x8000
  |  sll SFRETHI, SFARG1LO, 11
  |   or SFRETLO, SFRETLO, AT		// Merge leading 1.
  |  or TMP2, TMP2, SFRETHI
  |   srlv SFRETLO, SFRETLO, TMP0	// lo = right-aligned absolute value.
  |  bnez TMP2, >2			// Fail if fractional part != 0.
  |.  sra SFARG1HI, SFARG1HI, 31	// sign = 0 or -1.
  |  addu SFRETLO, SFRETLO, SFARG1HI
  |  bltz SFRETLO, >2			// Fail if lo+sign >= 0x80000000.
  |.  xor SFRETLO, SFRETLO, SFARG1HI	// lo = sign?-lo:lo = (lo+sign)^sign.
  |1:
  |  jr ra
  |.  move SFRETHI, r0
  |.endif
  |2:  // Not an integer, return 0x8000000080000000LL.
  |  lui SFRETHI, 0x8000
  |  jr ra
  |.  lui SFRETLO, 0x8000
  |
  |// int64_t lj_vm_num2i64(double x)
  |->vm_num2i64:
  |// fallthrough, same as lj_vm_num2u64.
  |
  |// uint64_t lj_vm_num2u64(double x)
  |->vm_num2u64:
  |.if FPU
  |  mfc1 SFARG1HI, FARG1HI
  |  mfc1 SFARG1LO, FARG1
  |.endif
  |  srl TMP0, SFARG1HI, 20
  |  andi TMP0, TMP0, 0x7ff
  |  addiu SFRETLO, TMP0, -1023
  |  sltiu SFRETLO, SFRETLO, 116
  |  beqz SFRETLO, >3			// Exponent out of range.
  |.  sll SFRETHI, SFARG1HI, 12
  |  lui AT, 0x0010
  |  srl SFRETHI, SFRETHI, 12
  |  addiu TMP0, TMP0, -1075
  |  sra SFARG1HI, SFARG1HI, 31		// sign = 0 or -1.
  |  bgez TMP0, >2			// Shift mantissa left or right?
  |.  or SFRETHI, SFRETHI, AT		// Merge leading 1 into masked mantissa.
  |  subu TMP1, r0, TMP0
  |  sll AT, SFRETHI, 1
  |  nor TMP0, r0, TMP1
  |  srlv SFRETHI, SFRETHI, TMP1	// Shift hi mantissa right for low exp.
  |   sllv AT, AT, TMP0			// Shifted-out hi mantissa.
  |  srlv SFRETLO, SFARG1LO, TMP1	// Shift lo mantissa right for low exp.
  |   andi TMP1, TMP1, 0x20		// Conditional right shift by 32.
  |    or AT, AT, SFRETLO		// Merge into lo mantissa.
  |   movn AT, SFRETHI, TMP1
  |   movn SFRETHI, r0, TMP1
  |1:
  |  addu SFRETLO, AT, SFARG1HI		// m = sign?-m:m = (m+sign)^sign.
  |  addu SFRETHI, SFRETHI, SFARG1HI
  |  sltu TMP0, SFRETLO, AT		// Carry
  |  addu SFRETHI, SFRETHI, TMP0
  |  xor SFRETLO, SFRETLO, SFARG1HI
  |  jr ra
  |.  xor SFRETHI, SFRETHI, SFARG1HI
  |2:
  |  srl TMP2, SFARG1LO, 1
  |  nor AT, r0, TMP0
  |  sllv SFRETHI, SFRETHI, TMP0	// Shift hi mantissa left for high exp.
  |    srlv TMP2, TMP2, AT		// Shifted-out lo mantissa.
  |  sllv AT, SFARG1LO, TMP0		// Shift lo mantissa left for high exp.
  |   andi TMP0, TMP0, 0x20		// Conditional left shift by 32.
  |    or SFRETHI, SFRETHI, TMP2	// Merge into hi mantissa.
  |   movn SFRETHI, AT, TMP0
  |  b <1
  |.  movn AT, r0, TMP0
  |3:
  |  jr ra
  |.  li SFRETHI, 0
  |
  |// int32_t lj_vm_tobit(double x)
  |.if FPU
  |->vm_tobit:
  |  lui AT, 0x59c0		// 2^52 + 2^51 (float).
  |  mtc1 AT, FARG2
  |  cvt.d.s FARG2, FARG2
  |  add.d FARG1, FARG1, FARG2
  |  jr ra
  |.  mfc1 CRET1, FARG1
  |.endif
  |
  |//-----------------------------------------------------------------------
  |//-- Miscellaneous functions --------------------------------------------
  |//-----------------------------------------------------------------------
  |
--- a/src/vm_mips64.dasc
+++ b/src/vm_mips64.dasc
@@ -2113,7 +2113,7 @@ static void build_subroutines(BuildCtx *ctx)
  |  dinsu CRET2, AT, 21, 21
  |  slt AT, CARG1, r0
  |  dsrlv CRET1, CRET2, TMP0
-  |  dsubu CARG1, r0, CRET1
+  |  negu CARG1, CRET1
  |.if MIPSR6
  |  seleqz CRET1, CRET1, AT
  |  selnez CARG1, CARG1, AT
@@ -2121,20 +2121,12 @@ static void build_subroutines(BuildCtx *ctx)
  |.else
  |  movn CRET1, CARG1, AT
  |.endif
-  |  li CARG1, 64
+  |  negu TMP0, TMP0
  |  subu TMP0, CARG1, TMP0
  |  dsllv CRET2, CRET2, TMP0	// Integer check.
  |  sextw AT, CRET1
  |  xor AT, CRET1, AT		// Range check.
  |.if MIPSR6
  |  seleqz AT, AT, CRET2
  |  selnez CRET2, CRET2, CRET2
  |  jr ra
  |.  or CRET2, AT, CRET2
  |.else
  |  jr ra
  |.  movz CRET2, AT, CRET2
  |.endif
  |1:
  |  jr ra
  |.  li CRET2, 1
@@ -2929,6 +2921,136 @@ static void build_subroutines(BuildCtx *ctx)
  |  sfmin_max max, vm_sfcmpogt
  |
  |//-----------------------------------------------------------------------
  |//-- Number conversion functions ----------------------------------------
  |//-----------------------------------------------------------------------
  |
  |// int64_t lj_vm_num2int_check(double x)
  |->vm_num2int_check:
  |.if FPU
  |  trunc.w.d FARG2, FARG1
  |  mfc1 CRET1, FARG2
  |  cvt.d.w FARG2, FARG2
  |.if MIPSR6
  |  cmp.eq.d FARG2, FARG1, FARG2
  |  bc1eqz FARG2, >2
  |.else
  |  c.eq.d FARG1, FARG2
  |  bc1f 0, >2
  |.endif
  |.  nop
  |  jr ra
  |.  zextw CRET1, CRET1
  |
  |.else
  |
  |  dsll CRET2, CARG1, 1
  |  beqz CRET2, >1
  |.  li TMP0, 1076
  |  dsrl AT, CRET2, 53
  |  dsubu TMP0, TMP0, AT
  |  sltiu AT, TMP0, 54
  |  beqz AT, >2
  |.  dextm CRET2, CRET2, 0, 20
  |  dinsu CRET2, AT, 21, 21
  |  slt AT, CARG1, r0
  |  dsrlv CRET1, CRET2, TMP0
  |  negu CARG1, CRET1
  |.if MIPSR6
  |  seleqz CRET1, CRET1, AT
  |  selnez CARG1, CARG1, AT
  |  or CRET1, CRET1, CARG1
  |.else
  |  movn CRET1, CARG1, AT
  |.endif
  |  negu TMP0, TMP0
  |  dsllv CRET2, CRET2, TMP0	// Integer check.
  |  sextw AT, CRET1
  |  xor AT, CRET1, AT		// Range check.
  |  or AT, AT, CRET2
  |  bnez AT, >2
  |.  nop
  |  jr ra
  |.  zextw CRET1, CRET1
  |1:
  |  jr ra
  |.  move CRET1, r0
  |.endif
  |2:
  |  lui CRET1, 0x8000
  |  dsll CRET1, CRET1, 16
  |  ori CRET1, CRET1, 0x8000
  |  jr ra
  |.  dsll CRET1, CRET1, 16
  |
  |// int64_t lj_vm_num2i64(double x)
  |->vm_num2i64:
  |.if FPU
  |  trunc.l.d FARG1, FARG1
  |  jr ra
  |.  dmfc1 CRET1, FARG1
  |.else
  |// fallthrough, same as lj_vm_num2u64 for soft-float.
  |.endif
  |
  |// uint64_t lj_vm_num2u64(double x)
  |->vm_num2u64:
  |.if FPU
  |  trunc.l.d FARG2, FARG1
  |  dmfc1 CRET1, FARG2
  |  li AT, -1
  |  dsrl AT, AT, 1
  |  beq CRET1, AT, >1
  |.  lui AT, 0xdf80		// -2^64 (float).
  |  jr ra
  |.  nop
  |1:
  |  mtc1 AT, FARG2
  |  cvt.d.s FARG2, FARG2
  |  add.d FARG1, FARG1, FARG2
  |  trunc.l.d FARG2, FARG1
  |  jr ra
  |.  dmfc1 CRET1, FARG2
  |
  |.else
  |
  |  dextu CARG2, CARG1, 20, 10
  |  addiu AT, CARG2, -1023
  |  sltiu AT, AT, 116
  |  beqz AT, >2			// Exponent out of range.
  |.  addiu CARG2, CARG2, -1075
  |  dextm CRET1, CARG1, 0, 19
  |  dsll AT, AT, 52
  |  dsra CARG1, CARG1, 63		// sign = 0 or -1.
  |  bgez CARG2, >1			// Shift mantissa left or right?
  |.  or CRET1, CRET1, AT		// Merge leading 1 into masked mantissa.
  |  subu CARG2, r0, CARG2
  |  dsrlv CRET1, CRET1, CARG2		// Shift mantissa right for low exp.
  |  daddu CRET1, CRET1, CARG1
  |  jr ra
  |.  xor CRET1, CRET1, CARG1		// m = sign?-m:m = (m+sign)^sign.
  |1:
  |  dsllv CRET1, CRET1, CARG2		// Shift mantissa left for high exp.
  |  daddu CRET1, CRET1, CARG1
  |  jr ra
  |.  xor CRET1, CRET1, CARG1		// m = sign?-m:m = (m+sign)^sign.
  |2:
  |  jr ra
  |.  move CRET1, r0
  |.endif
  |
  |// int32_t lj_vm_tobit(double x)
  |.if FPU
  |->vm_tobit:
  |  lui AT, 0x59c0		// 2^52 + 2^51 (float).
  |  mtc1 AT, FARG2
  |  cvt.d.s FARG2, FARG2
  |  add.d FARG1, FARG1, FARG2
  |  mfc1 CRET1, FARG1
  |  jr ra
  |.  sextw CRET1, CRET1
  |.endif
  |
  |//-----------------------------------------------------------------------
  |//-- Miscellaneous functions --------------------------------------------
  |//-----------------------------------------------------------------------
  |
--- a/src/vm_ppc.dasc
+++ b/src/vm_ppc.dasc
@@ -3160,6 +3160,152 @@ static void build_subroutines(BuildCtx *ctx)
  |  blr
  |
  |//-----------------------------------------------------------------------
  |//-- Number conversion functions ----------------------------------------
  |//-----------------------------------------------------------------------
  |
  |// int64_t lj_vm_num2int_check(double x)
  |->vm_num2int_check:
  |.if FPU
  |  subi sp, sp, 16
  |  stfd FARG1, 0(sp)
  |  lwz CARG1, 0(sp)
  |  lwz CARG2, 4(sp)
  |.endif
  |  slwi TMP1, CARG1, 1
  |.if PPE
  |  or TMP1, TMP1, CARG2
  |  cmpwi TMP1, 0
  |.else
  |  or. TMP1, TMP1, CARG2
  |.endif
  |  beq >2				// Return 0 for +-0.
  |  rlwinm RB, CARG1, 12, 21, 31
  |  subfic RB, RB, 1054
  |  cmplwi RB, 32
  |  bge >1				// Fail if |x| < 0x1p0 || |x| >= 0x1p32.
  |  slwi CARG3, CARG1, 11
  |  rlwimi CARG3, CARG2, 11, 21, 31	// Left-aligned mantissa.
  |  subfic TMP1, RB, 32
  |  slw TMP1, CARG3, TMP1
  |  slwi TMP2, CARG2, 11
  |.if PPE
  |  or. TMP1, TMP1, TMP2
  |.else
  |  or TMP1, TMP1, TMP2
  |  cmpwi TMP1, 0
  |.endif
  |  bne >1				// Fail if fractional part != 0.
  |  oris CARG3, CARG3, 0x8000		// Merge leading 1.
  |  srw CRET2, CARG3, RB		// lo = right-aligned absolute value.
  |  srawi CARG4, CARG1, 31		// sign = 0 or -1.
  |.if GPR64
  |  add CRET2, CRET2, CARG4
  |  cmpwi CRET2, 0
  |.else
  |  add. CRET2, CRET2, CARG4
  |.endif
  |  blt >1				// Fail if fractional part != 0.
  |  xor CRET2, CRET2, CARG4		// lo = sign?-lo:lo = (lo+sign)^sign.
  |2:
  |.if GPR64
  |  rldicl CRET1, CRET1, 0, 32
  |.else
  |  li CRET1, 0
  |.endif
  |.if FPU
  |  addi sp, sp, 16
  |.endif
  |  blr
  |1:
  |.if GPR64
  |  lus CRET1, 0x8000
  |  rldicr CRET1, CRET1, 32, 32
  |.else
  |  lus CRET1, 0x8000
  |  lus CRET2, 0x8000
  |.endif
  |.if FPU
  |  addi sp, sp, 16
  |.endif
  |  blr
  |
  |// int64_t lj_vm_num2i64(double x)
  |->vm_num2i64:
  |// fallthrough, same as lj_vm_num2u64.
  |
  |// uint64_t lj_vm_num2u64(double x)
  |->vm_num2u64:
  |.if FPU
  |  subi sp, sp, 16
  |  stfd FARG1, 0(sp)
  |  lwz CARG1, 0(sp)
  |  lwz CARG2, 4(sp)
  |.endif
  |  rlwinm RB, CARG1, 12, 21, 31
  |  addi RB, RB, -1023
  |  cmplwi RB, 116
  |  bge >3				// Exponent out of range.
  |  srawi CARG4, CARG1, 31		// sign = 0 or -1.
  |  clrlwi CARG1, CARG1, 12
  |   subfic RB, RB, 52
  |  oris CARG1, CARG1, 0x0010
  |   cmpwi RB, 0
  |   blt >2				// Shift mantissa left or right?
  |   subfic TMP1, RB, 32		// 64 bit right shift.
  |  srw CARG2, CARG2, RB
  |   slw TMP2, CARG1, TMP1
  |   addi TMP1, RB, -32
  |  or CARG2, CARG2, TMP2
  |   srw TMP2, CARG1, TMP1
  |  or CARG2, CARG2, TMP2
  |  srw CARG1, CARG1, RB
  |1:
  |  addc CARG2, CARG2, CARG4
  |  adde CARG1, CARG1, CARG4
  |  xor CRET2, CARG2, CARG4
  |  xor CRET1, CARG1, CARG4
  |.if GPR64
  |  rldimi CRET2, CRET1, 0, 32
  |  mr CRET1, CRET2
  |.endif
  |  addi sp, sp, 16
  |  blr
  |2:
  |  subfic TMP1, RB, 0			// 64 bit left shift.
  |   addi RB, RB, -32
  |  slw CARG1, CARG1, TMP1
  |   srw TMP2, CARG2, RB
  |   addi RB, TMP1, -32
  |  or CARG1, CARG1, TMP2
  |   slw TMP2, CARG2, RB
  |  or CARG1, CARG1, TMP2
  |  slw CARG2, CARG2, TMP1
  |  b <1
  |3:
  |  li CRET1, 0
  |.if not GPR64
  |  li CRET2, 0
  |.endif
  |.if FPU
  |  addi sp, sp, 16
  |.endif
  |  blr
  |
  |// int32_t lj_vm_tobit(double x)
  |.if FPU
  |->vm_tobit:
  |  lus TMP0, 0x59c0			// 2^52 + 2^51 (float).
  |  subi sp, sp, 16
  |  stw TMP0, 0(sp)
  |  lfs FARG2, 0(sp)
  |  fadd FARG1, FARG1, FARG2
  |  stfd FARG1, 0(sp)
  |  lwz CRET1, 4(sp)
  |  addi sp, sp, 16
  |  blr
  |.endif
  |
  |//-----------------------------------------------------------------------
  |//-- Miscellaneous functions --------------------------------------------
  |//-----------------------------------------------------------------------
  |
--- a/src/vm_x64.dasc
+++ b/src/vm_x64.dasc
@@ -2625,6 +2625,49 @@ static void build_subroutines(BuildCtx *ctx)
  |  ret
  |
  |//-----------------------------------------------------------------------
  |//-- Number conversion functions ----------------------------------------
  |//-----------------------------------------------------------------------
  |
  |// int64_t lj_vm_num2int_check(double x)
  |->vm_num2int_check:
  |  cvttsd2si eax, xmm0
  |  xorps xmm1, xmm1
  |  cvtsi2sd xmm1, eax
  |  ucomisd xmm1, xmm0
  |  jp >1
  |  jne >1
  |  ret
  |1:
  |  mov64 rax, U64x(80000000,80000000)
  |  ret
  |
  |// int64_t lj_vm_num2i64(double x)
  |->vm_num2i64:
  |  cvttsd2si rax, xmm0
  |  ret
  |
  |// uint64_t lj_vm_num2u64(double x)
  |->vm_num2u64:
  |  cvttsd2si rax, xmm0		// Convert [-2^63..2^63) range.
  |  cmp rax, 1	// Indefinite result -0x8000000000000000LL - 1 sets overflow.
  |  jo >1
  |  ret
  |1:
  |  mov64 rdx, U64x(c3f00000,00000000)	// -0x1p64 (double).
  |  movd xmm1, rdx
  |  addsd xmm0,  xmm1
  |  cvttsd2si rax, xmm0		// Convert [2^63..2^64+2^63) range.
  |  // Note that -0x1p63 converts to -0x8000000000000000LL either way.
  |  ret
  |
  |// int32_t lj_vm_tobit(double x)
  |->vm_tobit:
  |  sseconst_tobit xmm1, RC
  |  addsd xmm0, xmm1
  |  movd eax, xmm0
  |  ret
  |
  |//-----------------------------------------------------------------------
  |//-- Miscellaneous functions --------------------------------------------
  |//-----------------------------------------------------------------------
  |
--- a/src/vm_x86.dasc
+++ b/src/vm_x86.dasc
@@ -3059,6 +3059,98 @@ static void build_subroutines(BuildCtx *ctx)
  |  ret
  |
  |//-----------------------------------------------------------------------
  |//-- Number conversion functions ----------------------------------------
  |//-----------------------------------------------------------------------
  |
  |// int64_t lj_vm_num2int_check(double x)
  |->vm_num2int_check:
  |.if not X64
  |  movsd xmm0, qword [esp+4]
  |.endif
  |  cvttsd2si eax, xmm0
  |  xorps xmm1, xmm1
  |  cvtsi2sd xmm1, eax
  |  ucomisd xmm1, xmm0
  |  jp >1
  |  jne >1
  |.if not X64
  |  xor edx, edx
  |.endif
  |  ret
  |1:
  |.if X64
  |  mov64 rax, U64x(80000000,80000000)
  |.else
  |  mov eax, 0x80000000
  |  mov edx, eax
  |.endif
  |  ret
  |
  |// int64_t lj_vm_num2i64(double x)
  |->vm_num2i64:
  |.if X64
  |  cvttsd2si rax, xmm0
  |  ret
  |.else
  |  sub esp, 12
  |  fld qword [esp+16]
  |  fisttp qword [esp]
  |  mov eax, dword [esp]
  |  mov edx, dword [esp+4]
  |  add esp, 12
  |  ret
  |.endif
  |
  |// uint64_t lj_vm_num2u64(double x)
  |->vm_num2u64:
  |.if X64
  |  cvttsd2si rax, xmm0		// Convert [-2^63..2^63) range.
  |  cmp rax, 1	// Indefinite result -0x8000000000000000LL - 1 sets overflow.
  |  jo >1
  |  ret
  |1:
  |  mov64 rdx, U64x(c3f00000,00000000)	// -0x1p64 (double).
  |  movd xmm1, rdx
  |  addsd xmm0,  xmm1
  |  cvttsd2si rax, xmm0		// Convert [2^63..2^64+2^63) range.
  |  // Note that -0x1p63 converts to -0x8000000000000000LL either way.
  |  ret
  |.else
  |  sub esp, 12
  |  fld qword [esp+16]
  |  fld st0
  |  fisttp qword [esp]
  |  mov edx, dword [esp+4]
  |  mov eax, dword [esp]
  |  cmp edx, 1
  |  jo >2
  |1:
  |  fpop
  |  add esp, 12
  |  ret
  |2:
  |  cmp eax, 0
  |  jne <1
  |  mov dword [esp+8], 0xdf800000	// -0x1p64 (float).
  |  fadd dword [esp+8]
  |  fisttp qword [esp]
  |  mov eax, dword [esp]
  |  mov edx, dword [esp+4]
  |  add esp, 12
  |  ret
  |.endif
  |
  |// int32_t lj_vm_tobit(double x)
  |->vm_tobit:
  |.if not X64
  |  movsd xmm0, qword [esp+4]
  |.endif
  |  sseconst_tobit xmm1, RCa
  |  addsd xmm0, xmm1
  |  movd eax, xmm0
  |  ret
  |
  |//-----------------------------------------------------------------------
  |//-- Miscellaneous functions --------------------------------------------
  |//-----------------------------------------------------------------------
  |