ARM64: Add support for ARM BTI.

Note: this is not enabled by default, look for CET in lj_arch.h.
Thanks to Yuichiro Naito. #1398
This commit is contained in:
Mike Pall
2025-10-28 04:46:10 +01:00
parent e34a78acf6
commit 8651ef6df4
7 changed files with 124 additions and 3 deletions

View File

@@ -244,6 +244,10 @@ local map_cond = {
hs = 2, lo = 3, hs = 2, lo = 3,
} }
local map_bti = {
c = 0x40, j = 0x80, jc = 0xc0,
}
------------------------------------------------------------------------------ ------------------------------------------------------------------------------
local parse_reg_type local parse_reg_type
@@ -475,6 +479,12 @@ local function parse_cond(expr, inv)
return shl(bit.bxor(c, inv), 12) return shl(bit.bxor(c, inv), 12)
end end
local function parse_map(expr, map)
local x = map[expr]
if not x then werror("bad operand") end
return x
end
local function parse_load(params, nparams, n, op) local function parse_load(params, nparams, n, op)
if params[n+2] then werror("too many operands") end if params[n+2] then werror("too many operands") end
local scale = shr(op, 30) local scale = shr(op, 30)
@@ -823,11 +833,21 @@ map_op = {
tbz_3 = "36000000DTBw|36000000DTBx", tbz_3 = "36000000DTBw|36000000DTBx",
tbnz_3 = "37000000DTBw|37000000DTBx", tbnz_3 = "37000000DTBw|37000000DTBx",
-- Branch Target Identification.
bti_1 = "d503241ft",
-- ARM64e: Pointer authentication codes (PAC). -- ARM64e: Pointer authentication codes (PAC).
blraaz_1 = "d63f081fNx", blraaz_1 = "d63f081fNx",
blrabz_1 = "d63f0c1fNx",
braa_2 = "d71f0800NDx", braa_2 = "d71f0800NDx",
brab_2 = "d71f0c00NDx",
braaz_1 = "d61f081fNx", braaz_1 = "d61f081fNx",
brabz_1 = "d61f0c1fNx",
paciasp_0 = "d503233f",
pacibsp_0 = "d503237f", pacibsp_0 = "d503237f",
autiasp_0 = "d50323bf",
autibsp_0 = "d50323ff",
retaa_0 = "d65f0bff",
retab_0 = "d65f0fff", retab_0 = "d65f0fff",
-- Miscellaneous instructions. -- Miscellaneous instructions.
@@ -996,6 +1016,8 @@ local function parse_template(params, template, nparams, pos)
op = op + parse_cond(q, 0); n = n + 1 op = op + parse_cond(q, 0); n = n + 1
elseif p == "c" then elseif p == "c" then
op = op + parse_cond(q, 1); n = n + 1 op = op + parse_cond(q, 1); n = n + 1
elseif p == "t" then
op = op + parse_map(q, map_bti); n = n + 1
else else
assert(false) assert(false)

View File

@@ -695,7 +695,10 @@ local map_br = { -- Branches, exception generating and system instructions.
}, },
{ -- System instructions. { -- System instructions.
shift = 0, mask = 0x3fffff, shift = 0, mask = 0x3fffff,
[0x03201f] = "nop" [0x03201f] = "nop",
[0x03245f] = "bti c",
[0x03249f] = "bti j",
[0x0324df] = "bti jc",
}, },
{ -- Unconditional branch, register. { -- Unconditional branch, register.
shift = 0, mask = 0xfffc1f, shift = 0, mask = 0xfffc1f,
@@ -1171,6 +1174,9 @@ local function disass_ins(ctx)
end end
end end
second0 = true second0 = true
elseif p == " " then
operands[#operands+1] = pat:match(" (.*)")
break
else else
assert(false) assert(false)
end end

View File

@@ -288,6 +288,11 @@
#if !defined(LJ_ABI_PAUTH) && defined(__arm64e__) #if !defined(LJ_ABI_PAUTH) && defined(__arm64e__)
#define LJ_ABI_PAUTH 1 #define LJ_ABI_PAUTH 1
#endif #endif
#if !defined(LJ_ABI_BRANCH_TRACK) && (__ARM_FEATURE_BTI_DEFAULT & 1) && \
defined(LUAJIT_ENABLE_CET_BR)
/* See comments about LUAJIT_ENABLE_CET_BR above. */
#define LJ_ABI_BRANCH_TRACK 1
#endif
#define LJ_TARGET_ARM64 1 #define LJ_TARGET_ARM64 1
#define LJ_TARGET_EHRETREG 0 #define LJ_TARGET_EHRETREG 0
#define LJ_TARGET_EHRAREG 30 #define LJ_TARGET_EHRAREG 30

View File

@@ -64,6 +64,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#elif LJ_TARGET_ARM64 #elif LJ_TARGET_ARM64
#if LJ_ABI_BRANCH_TRACK
#define CALLBACK_MCODE_SLOTSZ 12
#endif
#define CALLBACK_MCODE_HEAD 32 #define CALLBACK_MCODE_HEAD 32
#elif LJ_TARGET_PPC #elif LJ_TARGET_PPC
@@ -88,8 +92,11 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs)
#endif #endif
#ifndef CALLBACK_SLOT2OFS #ifndef CALLBACK_SLOT2OFS
#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + 8*(slot)) #ifndef CALLBACK_MCODE_SLOTSZ
#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/8) #define CALLBACK_MCODE_SLOTSZ 8
#endif
#define CALLBACK_SLOT2OFS(slot) (CALLBACK_MCODE_HEAD + CALLBACK_MCODE_SLOTSZ*(slot))
#define CALLBACK_OFS2SLOT(ofs) (((ofs)-CALLBACK_MCODE_HEAD)/CALLBACK_MCODE_SLOTSZ)
#define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE)) #define CALLBACK_MAX_SLOT (CALLBACK_OFS2SLOT(CALLBACK_MCODE_SIZE))
#endif #endif
@@ -193,6 +200,9 @@ static void *callback_mcode_init(global_State *g, uint32_t *page)
((void **)p)[1] = g; ((void **)p)[1] = g;
p += 4; p += 4;
for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) {
#if LJ_ABI_BRANCH_TRACK
*p++ = A64I_BTI_C;
#endif
*p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot)); *p++ = A64I_LE(A64I_MOVZw | A64F_D(RID_X9) | A64F_U16(slot));
*p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu)); *p = A64I_LE(A64I_B | A64F_S26((page-p) & 0x03ffffffu));
p++; p++;

View File

@@ -409,6 +409,13 @@ static void emit_call(ASMState *as, ASMFunction target)
} }
} }
#if LJ_ABI_BRANCH_TRACK
static void emit_branch_track(ASMState *as)
{
*--as->mcp = A64I_BTI_J;
}
#endif
/* -- Emit generic operations --------------------------------------------- */ /* -- Emit generic operations --------------------------------------------- */
/* Generic move between two regs. */ /* Generic move between two regs. */

View File

@@ -265,6 +265,10 @@ typedef enum A64Ins {
A64I_BRAAZ = 0xd61f081f, A64I_BRAAZ = 0xd61f081f,
A64I_BLRAAZ = 0xd63f081f, A64I_BLRAAZ = 0xd63f081f,
A64I_BTI_C = 0xd503245f,
A64I_BTI_J = 0xd503249f,
A64I_BTI_JC = 0xd50324df,
A64I_NOP = 0xd503201f, A64I_NOP = 0xd503201f,
/* FP */ /* FP */

View File

@@ -92,6 +92,17 @@
|.macro ret_auth; ret; .endmacro |.macro ret_auth; ret; .endmacro
|.endif |.endif
| |
|// ARM64 branch target identification (BTI).
|.if BRANCH_TRACK
|.macro bti_jump; bti j; .endmacro
|.macro bti_call; bti c; .endmacro
|.macro bti_tailcall; bti jc; .endmacro
|.else
|.macro bti_jump; .endmacro
|.macro bti_call; .endmacro
|.macro bti_tailcall; .endmacro
|.endif
|
|//----------------------------------------------------------------------- |//-----------------------------------------------------------------------
| |
|// Stack layout while in interpreter. Must match with lj_frame.h. |// Stack layout while in interpreter. Must match with lj_frame.h.
@@ -439,24 +450,28 @@ static void build_subroutines(BuildCtx *ctx)
| |
|->vm_unwind_c: // Unwind C stack, return from vm_pcall. |->vm_unwind_c: // Unwind C stack, return from vm_pcall.
| // (void *cframe, int errcode) | // (void *cframe, int errcode)
| bti_tailcall
| add fp, CARG1, # SAVE_FP_LR_ | add fp, CARG1, # SAVE_FP_LR_
| mov sp, CARG1 | mov sp, CARG1
| mov CRET1, CARG2 | mov CRET1, CARG2
| ldr L, SAVE_L | ldr L, SAVE_L
| ldr GL, L->glref | ldr GL, L->glref
|->vm_unwind_c_eh: // Landing pad for external unwinder. |->vm_unwind_c_eh: // Landing pad for external unwinder.
| bti_tailcall
| mv_vmstate TMP0w, C | mv_vmstate TMP0w, C
| st_vmstate TMP0w | st_vmstate TMP0w
| b ->vm_leave_unw | b ->vm_leave_unw
| |
|->vm_unwind_ff: // Unwind C stack, return from ff pcall. |->vm_unwind_ff: // Unwind C stack, return from ff pcall.
| // (void *cframe) | // (void *cframe)
| bti_tailcall
| add fp, CARG1, # SAVE_FP_LR_ | add fp, CARG1, # SAVE_FP_LR_
| mov sp, CARG1 | mov sp, CARG1
| ldr L, SAVE_L | ldr L, SAVE_L
| init_constants | init_constants
| ldr GL, L->glref // Setup pointer to global state. | ldr GL, L->glref // Setup pointer to global state.
|->vm_unwind_ff_eh: // Landing pad for external unwinder. |->vm_unwind_ff_eh: // Landing pad for external unwinder.
| bti_tailcall
| mov RC, #16 // 2 results: false + error message. | mov RC, #16 // 2 results: false + error message.
| ldr BASE, L->base | ldr BASE, L->base
| mov_false TMP0 | mov_false TMP0
@@ -632,6 +647,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif |.endif
| |
|->cont_cat: // RA = resultptr, CARG4 = meta base |->cont_cat: // RA = resultptr, CARG4 = meta base
| bti_jump
| ldr INSw, [PC, #-4] | ldr INSw, [PC, #-4]
| sub CARG2, CARG4, #32 | sub CARG2, CARG4, #32
| ldr TMP0, [RA] | ldr TMP0, [RA]
@@ -789,9 +805,11 @@ static void build_subroutines(BuildCtx *ctx)
| sub RB, RB, #0x20000 | sub RB, RB, #0x20000
| csel PC, PC, RB, lo | csel PC, PC, RB, lo
|->cont_nop: |->cont_nop:
| bti_jump
| ins_next | ins_next
| |
|->cont_ra: // RA = resultptr |->cont_ra: // RA = resultptr
| bti_jump
| ldr INSw, [PC, #-4] | ldr INSw, [PC, #-4]
| ldr TMP0, [RA] | ldr TMP0, [RA]
| decode_RA TMP1, INS | decode_RA TMP1, INS
@@ -799,12 +817,14 @@ static void build_subroutines(BuildCtx *ctx)
| b ->cont_nop | b ->cont_nop
| |
|->cont_condt: // RA = resultptr |->cont_condt: // RA = resultptr
| bti_jump
| ldr TMP0, [RA] | ldr TMP0, [RA]
| mov_true TMP1 | mov_true TMP1
| cmp TMP1, TMP0 // Branch if result is true. | cmp TMP1, TMP0 // Branch if result is true.
| b <4 | b <4
| |
|->cont_condf: // RA = resultptr |->cont_condf: // RA = resultptr
| bti_jump
| ldr TMP0, [RA] | ldr TMP0, [RA]
| mov_false TMP1 | mov_false TMP1
| cmp TMP0, TMP1 // Branch if result is false. | cmp TMP0, TMP1 // Branch if result is false.
@@ -956,10 +976,12 @@ static void build_subroutines(BuildCtx *ctx)
| |
|.macro .ffunc, name |.macro .ffunc, name
|->ff_ .. name: |->ff_ .. name:
| bti_jump
|.endmacro |.endmacro
| |
|.macro .ffunc_1, name |.macro .ffunc_1, name
|->ff_ .. name: |->ff_ .. name:
| bti_jump
| ldr CARG1, [BASE] | ldr CARG1, [BASE]
| cmp NARGS8:RC, #8 | cmp NARGS8:RC, #8
| blo ->fff_fallback | blo ->fff_fallback
@@ -967,6 +989,7 @@ static void build_subroutines(BuildCtx *ctx)
| |
|.macro .ffunc_2, name |.macro .ffunc_2, name
|->ff_ .. name: |->ff_ .. name:
| bti_jump
| ldp CARG1, CARG2, [BASE] | ldp CARG1, CARG2, [BASE]
| cmp NARGS8:RC, #16 | cmp NARGS8:RC, #16
| blo ->fff_fallback | blo ->fff_fallback
@@ -1810,6 +1833,7 @@ static void build_subroutines(BuildCtx *ctx)
| |
|->vm_record: // Dispatch target for recording phase. |->vm_record: // Dispatch target for recording phase.
|.if JIT |.if JIT
| bti_jump
| ldrb CARG1w, GL->hookmask | ldrb CARG1w, GL->hookmask
| tst CARG1, #HOOK_VMEVENT // No recording while in vmevent. | tst CARG1, #HOOK_VMEVENT // No recording while in vmevent.
| bne >5 | bne >5
@@ -1825,6 +1849,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif |.endif
| |
|->vm_rethook: // Dispatch target for return hooks. |->vm_rethook: // Dispatch target for return hooks.
| bti_jump
| ldrb TMP2w, GL->hookmask | ldrb TMP2w, GL->hookmask
| tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active? | tbz TMP2w, #HOOK_ACTIVE_SHIFT, >1 // Hook already active?
|5: // Re-dispatch to static ins. |5: // Re-dispatch to static ins.
@@ -1832,6 +1857,7 @@ static void build_subroutines(BuildCtx *ctx)
| br_auth TMP0 | br_auth TMP0
| |
|->vm_inshook: // Dispatch target for instr/line hooks. |->vm_inshook: // Dispatch target for instr/line hooks.
| bti_jump
| ldrb TMP2w, GL->hookmask | ldrb TMP2w, GL->hookmask
| ldr TMP3w, GL->hookcount | ldr TMP3w, GL->hookcount
| tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active? | tbnz TMP2w, #HOOK_ACTIVE_SHIFT, <5 // Hook already active?
@@ -1858,6 +1884,7 @@ static void build_subroutines(BuildCtx *ctx)
| br_auth TMP0 | br_auth TMP0
| |
|->cont_hook: // Continue from hook yield. |->cont_hook: // Continue from hook yield.
| bti_jump
| ldr CARG1, [CARG4, #-40] | ldr CARG1, [CARG4, #-40]
| add PC, PC, #4 | add PC, PC, #4
| str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins. | str CARG1w, SAVE_MULTRES // Restore MULTRES for *M ins.
@@ -1881,6 +1908,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif |.endif
| |
|->vm_callhook: // Dispatch target for call hooks. |->vm_callhook: // Dispatch target for call hooks.
| bti_jump
| mov CARG2, PC | mov CARG2, PC
|.if JIT |.if JIT
| b >1 | b >1
@@ -1910,6 +1938,7 @@ static void build_subroutines(BuildCtx *ctx)
|->cont_stitch: // Trace stitching. |->cont_stitch: // Trace stitching.
|.if JIT |.if JIT
| // RA = resultptr, CARG4 = meta base | // RA = resultptr, CARG4 = meta base
| bti_jump
| ldr RBw, SAVE_MULTRES | ldr RBw, SAVE_MULTRES
| ldr INSw, [PC, #-4] | ldr INSw, [PC, #-4]
| ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace. | ldr TRACE:CARG3, [CARG4, #-40] // Save previous trace.
@@ -1958,6 +1987,7 @@ static void build_subroutines(BuildCtx *ctx)
| |
|->vm_profhook: // Dispatch target for profiler hook. |->vm_profhook: // Dispatch target for profiler hook.
#if LJ_HASPROFILE #if LJ_HASPROFILE
| bti_jump
| mov CARG1, L | mov CARG1, L
| str BASE, L->base | str BASE, L->base
| mov CARG2, PC | mov CARG2, PC
@@ -1979,6 +2009,7 @@ static void build_subroutines(BuildCtx *ctx)
| |
|->vm_exit_handler: |->vm_exit_handler:
|.if JIT |.if JIT
| bti_call
| sub sp, sp, #(64*8) | sub sp, sp, #(64*8)
| savex_, 0, 1 | savex_, 0, 1
| savex_, 2, 3 | savex_, 2, 3
@@ -2029,6 +2060,7 @@ static void build_subroutines(BuildCtx *ctx)
|.endif |.endif
| |
|->vm_exit_interp: |->vm_exit_interp:
| bti_jump
| // CARG1 = MULTRES or negated error code, BASE, PC and GL set. | // CARG1 = MULTRES or negated error code, BASE, PC and GL set.
|.if JIT |.if JIT
| ldr L, SAVE_L | ldr L, SAVE_L
@@ -2106,6 +2138,7 @@ static void build_subroutines(BuildCtx *ctx)
| |
| // int lj_vm_modi(int dividend, int divisor); | // int lj_vm_modi(int dividend, int divisor);
|->vm_modi: |->vm_modi:
| bti_call
| eor CARG4w, CARG1w, CARG2w | eor CARG4w, CARG1w, CARG2w
| cmp CARG4w, #0 | cmp CARG4w, #0
| eor CARG3w, CARG1w, CARG1w, asr #31 | eor CARG3w, CARG1w, CARG1w, asr #31
@@ -2142,6 +2175,7 @@ static void build_subroutines(BuildCtx *ctx)
|// Next idx returned in CRET2w. |// Next idx returned in CRET2w.
|->vm_next: |->vm_next:
|.if JIT |.if JIT
| bti_call
| ldr NEXT_LIM, NEXT_TAB->asize | ldr NEXT_LIM, NEXT_TAB->asize
| ldr NEXT_TMP1, NEXT_TAB->array | ldr NEXT_TMP1, NEXT_TAB->array
|1: // Traverse array part. |1: // Traverse array part.
@@ -2286,6 +2320,26 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop)
|=>defop: |=>defop:
switch (op) { switch (op) {
#if !LJ_HASJIT
case BC_FORL:
case BC_JFORI:
case BC_JFORL:
case BC_ITERL:
case BC_JITERL:
case BC_LOOP:
case BC_JLOOP:
case BC_FUNCF:
case BC_JFUNCF:
case BC_JFUNCV:
#endif
case BC_FUNCV: /* NYI: compiled vararg functions. */
break; /* Avoid redundant bti instructions. */
default:
| bti_jump
break;
}
switch (op) {
/* -- Comparison ops ---------------------------------------------------- */ /* -- Comparison ops ---------------------------------------------------- */
@@ -4122,6 +4176,19 @@ static void emit_asm_debug(BuildCtx *ctx)
"\t.align 3\n" "\t.align 3\n"
".LEFDE3:\n\n", (int)ctx->codesz - fcofs); ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
#endif #endif
#endif
#if LJ_TARGET_LINUX && LJ_ABI_BRANCH_TRACK
fprintf(ctx->fp,
"\t.section .note.gnu.property,\"a\"\n"
"\t.align 3\n"
"\t.long 4\n"
"\t.long 16\n"
"\t.long 5\n"
"\t.long 0x00554e47\n"
"\t.long 0xc0000000\n"
"\t.long 4\n"
"\t.long 1\n"
"\t.long 0\n");
#endif #endif
break; break;
#if !LJ_NO_UNWIND #if !LJ_NO_UNWIND