Miscellaneous cleanups for x64 interpreter.

This commit is contained in:
Mike Pall
2009-12-22 06:16:29 +01:00
parent 44a9d7b00c
commit c4e9dc0012
2 changed files with 54 additions and 27 deletions

View File

@@ -19,10 +19,14 @@
|// Fixed register assignments for the interpreter.
|// This is very fragile and has many dependencies. Caveat emptor.
|.define BASE, edx // Not C callee-save, refetched anyway.
|.if not X64 or X64WIN
|.if not X64
|.define KBASE, edi // Must be C callee-save.
|.define KBASEa, KBASE
|.define PC, esi // Must be C callee-save.
|.elif X64WIN
|.define KBASE, edi // Must be C callee-save.
|.define KBASEa, rdi
|.define PC, esi // Must be C callee-save.
|.else
|.define KBASE, r15d // Must be C callee-save.
|.define KBASEa, r15
@@ -136,7 +140,9 @@
|.define FPARG1, qword [esp]
|// TMPQ overlaps TMP1/TMP2. ARG5/NRESULTS overlap TMP1/TMP2 (and TMPQ).
|.define TMPQ, qword [esp+aword*4]
|.define TMP3, ARG4
|.define ARG5, TMP1
|.define TMPa, TMP1
|.define NRESULTS, TMP2
|
|// Arguments for vm_call and vm_pcall.
@@ -183,7 +189,9 @@
|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
|.define TMPQ, qword [rsp+aword*10]
|.define NRESULTS, TMP2
|.define TMPa, ARG5
|.define ARG5d, dword [rsp+aword*4]
|.define TMP3, ARG5d
|
|//-----------------------------------------------------------------------
|.else // x64/POSIX stack layout
@@ -205,7 +213,7 @@
|.define SAVE_R2, aword [rsp+aword*6]
|.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
|.define SAVE_CFRAME, aword [rsp+aword*4]
|.define UNUSED1, aword [rsp+aword*3]
|.define TMPa, aword [rsp+aword*3]
|//----- ^^^ awords above, vvv dwords below
|.define SAVE_PC, dword [rsp+dword*5]
|.define SAVE_L, dword [rsp+dword*4]
@@ -217,6 +225,7 @@
|
|// TMPQ overlaps TMP1/TMP2. NRESULTS overlaps TMP2 (and TMPQ).
|.define TMPQ, qword [rsp]
|.define TMP3, dword [rsp+aword*3]
|.define NRESULTS, TMP2
|
|.endif
@@ -900,9 +909,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| cvtsi2sd xmm0, RC
| movsd TMPQ, xmm0
} else {
|.if not X64
| mov ARG4, RC
| fild ARG4
| fstp TMPQ
|.endif
}
| lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
| jmp >1
@@ -971,9 +982,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| cvtsi2sd xmm0, RC
| movsd TMPQ, xmm0
} else {
|.if not X64
| mov ARG4, RC
| fild ARG4
| fstp TMPQ
|.endif
}
| lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
| jmp >1
@@ -2182,8 +2195,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| cvtsd2si RC, qword [RA+8]
| mov ARG3, RC
} else {
|.if not X64
| fld qword [RA+8]
| fistp ARG3
|.endif
}
| mov RC, TMP2
| cmp RB, RC // len < end? (unsigned compare)
@@ -2324,7 +2339,9 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
if (sse) {
| cvtsi2sd xmm0, RC; jmp ->fff_resxmm0
} else {
|.if not X64
| mov ARG1, RC; fild ARG1; jmp ->fff_resn
|.endif
}
|
|//-- Bit library --------------------------------------------------------
@@ -2339,12 +2356,14 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| cvtsi2sd xmm0, RB
| jmp ->fff_resxmm0
} else {
|.if not X64
|.ffunc_n bit_tobit
| mov TMP1, TOBIT_BIAS
| fadd TMP1
| fstp FPARG1 // 64 bit FP store.
| fild ARG1 // 32 bit integer load (s2lfwd ok).
| jmp ->fff_resn
|.endif
}
|
|.macro .ffunc_bit, name
@@ -2354,11 +2373,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| addsd xmm0, xmm1
| movd RB, xmm0
||} else {
|.if not X64
| .ffunc_n name
| mov TMP1, TOBIT_BIAS
| fadd TMP1
| fstp FPARG1
| mov RB, ARG1
|.endif
||}
|.endmacro
|
@@ -2409,11 +2430,13 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| mov BASE, TMP1
| jmp ->fff_resxmm0
} else {
|.if not X64
|->fff_resbit:
|->fff_resbit_op:
| mov ARG1, RB
| fild ARG1
| jmp ->fff_resn
|.endif
}
|
|->fff_fallback_bit_op:
@@ -2433,6 +2456,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| movd RB, xmm0
| movd RA, xmm1
||} else {
|.if not X64
| .ffunc_nn name
| mov TMP1, TOBIT_BIAS
| fadd TMP1
@@ -2442,6 +2466,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| mov RC, RA // Assumes RA is ecx.
| mov RA, ARG3
| mov RB, ARG1
|.endif
||}
| ins RB, cl
| mov RA, RC
@@ -2467,7 +2492,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| sub BASE, RA
| mov [RA-4], PC
| mov SAVE_PC, PC // Redundant (but a defined value).
| mov ARG3, BASE // Save old BASE (relative).
| mov TMP1, BASE // Save old BASE (relative).
| mov L:RB->base, RA
| lea RC, [RA+NARGS:RC*8-8]
| mov ARG1, L:RB
@@ -2486,7 +2511,7 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| shr RC, 3
| add NARGS:RC, 1
| mov LFUNC:RB, [RA-8]
| mov BASE, ARG3 // Restore old BASE.
| mov BASE, TMP1 // Restore old BASE.
| add BASE, RA
| cmp [RA-4], PC; jne >2 // Callable modified by handler?
| jmp aword LFUNC:RB->gate // Retry the call.
@@ -2509,11 +2534,11 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
|
|->fff_gcstep: // Call GC step function.
| // RA = new base, RC = nargs+1
| pop RB // Must keep stack at same level.
| mov ARG3, RB // Save return address
| pop RBa // Must keep stack at same level.
| mov TMPa, RBa // Save return address
| mov L:RB, SAVE_L
| sub BASE, RA
| mov ARG2, BASE // Save old BASE (relative).
| mov TMP2, BASE // Save old BASE (relative).
| mov [RA-4], PC
| mov SAVE_PC, PC // Redundant (but a defined value).
| mov L:RB->base, RA
@@ -2531,10 +2556,10 @@ static void build_subroutines(BuildCtx *ctx, int cmov, int sse)
| shr RC, 3
| add NARGS:RC, 1
| mov PC, [RA-4]
| mov BASE, ARG2 // Restore old BASE.
| mov BASE, TMP2 // Restore old BASE.
| add BASE, RA
| mov RB, ARG3
| push RB // Restore return address.
| mov RBa, TMPa
| push RBa // Restore return address.
| mov LFUNC:RB, [RA-8]
| ret
|
@@ -3369,9 +3394,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| cvtsi2sd xmm0, RC
| mov BASE, RB // Restore BASE.
} else {
|.if not X64
| mov ARG1, RC
| mov BASE, RB // Restore BASE.
| fild ARG1
|.endif
}
| movzx RA, PC_RA
| jmp <1
@@ -3994,7 +4021,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| mov TMP1, STR:RC
| mov TMP2, LJ_TSTR
| lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
| mov ARG4, TAB:RB // Save TAB:RB for us.
| mov TMP3, TAB:RB // Save TAB:RB for us.
| mov ARG2, TAB:RB
| mov L:RB, SAVE_L
| mov ARG3, RC
@@ -4004,7 +4031,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
| // Handles write barrier for the new key. TValue * returned in eax (RC).
| mov BASE, L:RB->base
| mov TAB:RB, ARG4 // Need TAB:RB for barrier.
| mov TAB:RB, TMP3 // Need TAB:RB for barrier.
| mov RA, eax
| jmp <2 // Must check write barrier for value.
|
@@ -4211,7 +4238,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| lea RA, [BASE+RA*8]
| mov PROTO:RC, LFUNC:RC->pt
| movzx RC, byte PROTO:RC->numparams
| mov ARG3, KBASE // Need one more free register.
| mov TMP1, KBASE // Need one more free register.
| lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
| sub KBASE, [BASE-4]
| // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
@@ -4237,7 +4264,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| cmp RA, RB
| jb <2
|3:
| mov KBASE, ARG3
| mov KBASE, TMP1
| ins_next
|
|5: // Copy all varargs.
@@ -4496,11 +4523,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop, int cmov, int sse)
| ins_AD // RA = base (ignored), RD = traceno
| mov RA, [DISPATCH+DISPATCH_J(trace)]
| mov TRACE:RD, [RA+RD*4]
| mov RD, TRACE:RD->mcode
| mov RDa, TRACE:RD->mcode
| mov L:RB, SAVE_L
| mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
| mov [DISPATCH+DISPATCH_GL(jit_L)], L:RB
| jmp RD
| jmp RDa
#endif
break;