Compress snapshots using a simple, extensible 1D-compression.
Typically reduces storage overhead for snapshot maps by 60%. The extensible format is a prerequisite for the next redesign steps: Eliminate IR_FRAME and implement return-to-lower-frame.
This commit is contained in:
247
src/lj_snap.c
247
src/lj_snap.c
@@ -23,28 +23,50 @@
|
||||
/* Some local macros to save typing. Undef'd at the end. */
|
||||
#define IR(ref) (&J->cur.ir[(ref)])
|
||||
|
||||
/* -- Snapshot buffer allocation ------------------------------------------ */
|
||||
|
||||
/* Grow snapshot buffer. */
|
||||
void lj_snap_grow_buf_(jit_State *J, MSize need)
|
||||
{
|
||||
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
|
||||
if (need > maxsnap)
|
||||
lj_trace_err(J, LJ_TRERR_SNAPOV);
|
||||
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
|
||||
J->cur.snap = J->snapbuf;
|
||||
}
|
||||
|
||||
/* Grow snapshot map buffer. */
|
||||
void lj_snap_grow_map_(jit_State *J, MSize need)
|
||||
{
|
||||
if (need < 2*J->sizesnapmap)
|
||||
need = 2*J->sizesnapmap;
|
||||
else if (need < 64)
|
||||
need = 64;
|
||||
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
|
||||
J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
|
||||
J->cur.snapmap = J->snapmapbuf;
|
||||
J->sizesnapmap = need;
|
||||
}
|
||||
|
||||
/* -- Snapshot generation ------------------------------------------------- */
|
||||
|
||||
/* NYI: Snapshots are in need of a redesign. The current storage model for
|
||||
** snapshot maps is too wasteful. They could be compressed (1D or 2D) and
|
||||
** made more flexible at the same time. Iterators should no longer need to
|
||||
** skip unmodified slots. IR_FRAME should be eliminated, too.
|
||||
*/
|
||||
/* NYI: IR_FRAME should be eliminated, too. */
|
||||
|
||||
/* Add all modified slots to the snapshot. */
|
||||
static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
|
||||
{
|
||||
BCReg s;
|
||||
MSize n = 0;
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = tref_ref(J->slot[s]);
|
||||
if (ref) {
|
||||
IRIns *ir = IR(ref);
|
||||
if (ir->o == IR_SLOAD && ir->op1 == s && !(ir->op2 & IRSLOAD_INHERIT))
|
||||
ref = 0;
|
||||
if (!(ir->o == IR_SLOAD && ir->op1 == s &&
|
||||
!(ir->op2 & IRSLOAD_INHERIT)))
|
||||
map[n++] = SNAP(s, ir->o == IR_FRAME ? SNAP_FRAME : 0, ref);
|
||||
}
|
||||
map[s] = (SnapEntry)ref;
|
||||
}
|
||||
return nslots;
|
||||
return n;
|
||||
}
|
||||
|
||||
/* Add frame links at the end of the snapshot. */
|
||||
@@ -53,17 +75,17 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
|
||||
cTValue *frame = J->L->base - 1;
|
||||
cTValue *lim = J->L->base - J->baseslot;
|
||||
MSize f = 0;
|
||||
map[f++] = u32ptr(J->pc);
|
||||
while (frame > lim) {
|
||||
map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */
|
||||
while (frame > lim) { /* Backwards traversal of all frames above base. */
|
||||
if (frame_islua(frame)) {
|
||||
map[f++] = u32ptr(frame_pc(frame));
|
||||
map[f++] = SNAP_MKPC(frame_pc(frame));
|
||||
frame = frame_prevl(frame);
|
||||
} else if (frame_ispcall(frame)) {
|
||||
map[f++] = (uint32_t)frame_ftsz(frame);
|
||||
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
|
||||
frame = frame_prevd(frame);
|
||||
} else if (frame_iscont(frame)) {
|
||||
map[f++] = (uint32_t)frame_ftsz(frame);
|
||||
map[f++] = u32ptr(frame_contpc(frame));
|
||||
map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
|
||||
map[f++] = SNAP_MKPC(frame_contpc(frame));
|
||||
frame = frame_prevd(frame);
|
||||
} else {
|
||||
lua_assert(0);
|
||||
@@ -76,28 +98,19 @@ static MSize snapshot_framelinks(jit_State *J, SnapEntry *map)
|
||||
static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
|
||||
{
|
||||
BCReg nslots = J->baseslot + J->maxslot;
|
||||
MSize nsm, nframelinks;
|
||||
MSize nent, nframelinks;
|
||||
SnapEntry *p;
|
||||
/* Conservative estimate. Continuation frames need 2 slots. */
|
||||
nsm = nsnapmap + nslots + (uint32_t)J->framedepth*2+1;
|
||||
if (LJ_UNLIKELY(nsm > J->sizesnapmap)) { /* Need to grow snapshot map? */
|
||||
if (nsm < 2*J->sizesnapmap)
|
||||
nsm = 2*J->sizesnapmap;
|
||||
else if (nsm < 64)
|
||||
nsm = 64;
|
||||
J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
|
||||
J->sizesnapmap*sizeof(SnapEntry), nsm*sizeof(SnapEntry));
|
||||
J->cur.snapmap = J->snapmapbuf;
|
||||
J->sizesnapmap = nsm;
|
||||
}
|
||||
lj_snap_grow_map(J, nsnapmap + nslots + (MSize)J->framedepth*2+1);
|
||||
p = &J->cur.snapmap[nsnapmap];
|
||||
nslots = snapshot_slots(J, p, nslots);
|
||||
nframelinks = snapshot_framelinks(J, p + nslots);
|
||||
J->cur.nsnapmap = (uint16_t)(nsnapmap + nslots + nframelinks);
|
||||
nent = snapshot_slots(J, p, nslots);
|
||||
nframelinks = snapshot_framelinks(J, p + nent);
|
||||
J->cur.nsnapmap = (uint16_t)(nsnapmap + nent + nframelinks);
|
||||
snap->mapofs = (uint16_t)nsnapmap;
|
||||
snap->ref = (IRRef1)J->cur.nins;
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
snap->nent = (uint8_t)nent;
|
||||
snap->nframelinks = (uint8_t)nframelinks;
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
snap->count = 0;
|
||||
}
|
||||
|
||||
@@ -111,14 +124,7 @@ void lj_snap_add(jit_State *J)
|
||||
(nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins)) {
|
||||
nsnapmap = J->cur.snap[--nsnap].mapofs;
|
||||
} else {
|
||||
/* Need to grow snapshot buffer? */
|
||||
if (LJ_UNLIKELY(nsnap >= J->sizesnap)) {
|
||||
MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
|
||||
if (nsnap >= maxsnap)
|
||||
lj_trace_err(J, LJ_TRERR_SNAPOV);
|
||||
lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
|
||||
J->cur.snap = J->snapbuf;
|
||||
}
|
||||
lj_snap_grow_buf(J, nsnap+1);
|
||||
J->cur.nsnap = (uint16_t)(nsnap+1);
|
||||
}
|
||||
J->mergesnap = 0;
|
||||
@@ -131,14 +137,21 @@ void lj_snap_shrink(jit_State *J)
|
||||
{
|
||||
BCReg nslots = J->baseslot + J->maxslot;
|
||||
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
|
||||
SnapEntry *oflinks = &J->cur.snapmap[snap->mapofs + snap->nslots];
|
||||
SnapEntry *nflinks = &J->cur.snapmap[snap->mapofs + nslots];
|
||||
uint32_t s, nframelinks = snap->nframelinks;
|
||||
SnapEntry *map = &J->cur.snapmap[snap->mapofs];
|
||||
MSize nent = snap->nent;
|
||||
lua_assert(nslots < snap->nslots);
|
||||
snap->nslots = (uint8_t)nslots;
|
||||
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nslots + nframelinks);
|
||||
for (s = 0; s < nframelinks; s++) /* Move frame links down. */
|
||||
nflinks[s] = oflinks[s];
|
||||
if (nent > 0 && snap_slot(map[nent-1]) >= nslots) {
|
||||
MSize s, delta, nframelinks = snap->nframelinks;
|
||||
for (nent--; nent > 0 && snap_slot(map[nent-1]) >= nslots; nent--)
|
||||
;
|
||||
delta = snap->nent - nent;
|
||||
snap->nent = (uint8_t)nent;
|
||||
J->cur.nsnapmap = (uint16_t)(snap->mapofs + nent + nframelinks);
|
||||
map += nent;
|
||||
for (s = 0; s < nframelinks; s++) /* Move frame links down. */
|
||||
map[s] = map[s+delta];
|
||||
}
|
||||
}
|
||||
|
||||
/* -- Snapshot access ----------------------------------------------------- */
|
||||
@@ -167,21 +180,24 @@ static RegSP snap_renameref(Trace *T, SnapNo lim, IRRef ref, RegSP rs)
|
||||
return rs;
|
||||
}
|
||||
|
||||
/* Convert a snapshot into a linear slot -> RegSP map. */
|
||||
/* Convert a snapshot into a linear slot -> RegSP map.
|
||||
** Note: unused slots are not initialized!
|
||||
*/
|
||||
void lj_snap_regspmap(uint16_t *rsmap, Trace *T, SnapNo snapno)
|
||||
{
|
||||
SnapShot *snap = &T->snap[snapno];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
MSize n, nent = snap->nent;
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
BloomFilter rfilt = snap_renamefilter(T, snapno);
|
||||
for (s = 0; s < nslots; s++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
for (n = 0; n < nent; n++) {
|
||||
SnapEntry sn = map[n];
|
||||
IRRef ref = snap_ref(sn);
|
||||
if (!irref_isk(ref)) {
|
||||
IRIns *ir = &T->ir[ref];
|
||||
uint32_t rs = ir->prev;
|
||||
if (bloomtest(rfilt, ref))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
rsmap[s] = (uint16_t)rs;
|
||||
rsmap[snap_slot(sn)] = (uint16_t)rs;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,89 +209,88 @@ void lj_snap_restore(jit_State *J, void *exptr)
|
||||
SnapNo snapno = J->exitno; /* For now, snapno == exitno. */
|
||||
Trace *T = J->trace[J->parent];
|
||||
SnapShot *snap = &T->snap[snapno];
|
||||
BCReg s, nslots = snap->nslots;
|
||||
MSize n, nent = snap->nent;
|
||||
SnapEntry *map = &T->snapmap[snap->mapofs];
|
||||
SnapEntry *flinks = map + nslots + snap->nframelinks;
|
||||
TValue *o, *newbase, *ntop;
|
||||
SnapEntry *flinks = map + nent + snap->nframelinks;
|
||||
BCReg nslots = snap->nslots;
|
||||
TValue *frame;
|
||||
BloomFilter rfilt = snap_renamefilter(T, snapno);
|
||||
lua_State *L = J->L;
|
||||
|
||||
/* Make sure the stack is big enough for the slots from the snapshot. */
|
||||
if (L->base + nslots >= L->maxstack) {
|
||||
if (LJ_UNLIKELY(L->base + nslots > L->maxstack)) {
|
||||
L->top = curr_topL(L);
|
||||
lj_state_growstack(L, nslots - curr_proto(L)->framesize);
|
||||
}
|
||||
|
||||
/* Fill stack slots with data from the registers and spill slots. */
|
||||
newbase = NULL;
|
||||
ntop = L->base;
|
||||
for (s = 0, o = L->base-1; s < nslots; s++, o++) {
|
||||
IRRef ref = snap_ref(map[s]);
|
||||
if (ref) {
|
||||
IRIns *ir = &T->ir[ref];
|
||||
if (irref_isk(ref)) { /* Restore constant slot. */
|
||||
lj_ir_kvalue(L, o, ir);
|
||||
} else {
|
||||
IRType1 t = ir->t;
|
||||
RegSP rs = ir->prev;
|
||||
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
|
||||
int32_t *sps = &ex->spill[regsp_spill(rs)];
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, *sps);
|
||||
} else if (irt_isnum(t)) {
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
} else {
|
||||
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
|
||||
setgcrefi(o->gcr, *sps);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
|
||||
Reg r = regsp_reg(rs);
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, ex->gpr[r-RID_MIN_GPR]);
|
||||
} else if (irt_isnum(t)) {
|
||||
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
|
||||
} else {
|
||||
if (!irt_ispri(t))
|
||||
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else { /* Restore frame slot. */
|
||||
lua_assert(ir->o == IR_FRAME);
|
||||
/* This works for both PTR and FUNC IR_FRAME. */
|
||||
setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
o->fr.tp.ftsz = (int32_t)*--flinks;
|
||||
if (irt_isfunc(ir->t)) {
|
||||
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
|
||||
if (isluafunc(fn)) {
|
||||
TValue *fs;
|
||||
fs = o+1 + funcproto(fn)->framesize;
|
||||
if (fs > ntop) ntop = fs; /* Update top for newly added frames. */
|
||||
if (s != 0) newbase = o+1;
|
||||
frame = L->base-1;
|
||||
for (n = 0; n < nent; n++) {
|
||||
IRRef ref = snap_ref(map[n]);
|
||||
BCReg s = snap_slot(map[n]);
|
||||
TValue *o = &frame[s]; /* Stack slots are relative to start frame. */
|
||||
IRIns *ir = &T->ir[ref];
|
||||
if (irref_isk(ref)) { /* Restore constant slot. */
|
||||
lj_ir_kvalue(L, o, ir);
|
||||
} else {
|
||||
IRType1 t = ir->t;
|
||||
RegSP rs = ir->prev;
|
||||
if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
|
||||
rs = snap_renameref(T, snapno, ref, rs);
|
||||
if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */
|
||||
int32_t *sps = &ex->spill[regsp_spill(rs)];
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, *sps);
|
||||
} else if (irt_isnum(t)) {
|
||||
o->u64 = *(uint64_t *)sps;
|
||||
} else {
|
||||
lua_assert(!irt_ispri(t)); /* PRI refs never have a spill slot. */
|
||||
setgcrefi(o->gcr, *sps);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else if (ra_hasreg(regsp_reg(rs))) { /* Restore from register. */
|
||||
Reg r = regsp_reg(rs);
|
||||
if (irt_isinteger(t)) {
|
||||
setintV(o, ex->gpr[r-RID_MIN_GPR]);
|
||||
} else if (irt_isnum(t)) {
|
||||
setnumV(o, ex->fpr[r-RID_MIN_FPR]);
|
||||
} else {
|
||||
if (!irt_ispri(t))
|
||||
setgcrefi(o->gcr, ex->gpr[r-RID_MIN_GPR]);
|
||||
setitype(o, irt_toitype(t));
|
||||
}
|
||||
} else { /* Restore frame slot. */
|
||||
lua_assert(ir->o == IR_FRAME);
|
||||
/* This works for both PTR and FUNC IR_FRAME. */
|
||||
setgcrefp(o->fr.func, mref(T->ir[ir->op2].ptr, void));
|
||||
if (s != 0) /* Do not overwrite link to previous frame. */
|
||||
o->fr.tp.ftsz = (int32_t)*--flinks;
|
||||
if (irt_isfunc(ir->t)) {
|
||||
GCfunc *fn = gco2func(gcref(T->ir[ir->op2].gcr));
|
||||
if (isluafunc(fn)) {
|
||||
MSize framesize = funcproto(fn)->framesize;
|
||||
TValue *fs;
|
||||
L->base = ++o;
|
||||
if (LJ_UNLIKELY(o + framesize > L->maxstack)) { /* Grow again? */
|
||||
ptrdiff_t fsave = savestack(L, frame);
|
||||
L->top = o;
|
||||
lj_state_growstack(L, framesize);
|
||||
frame = restorestack(L, fsave);
|
||||
o = L->top;
|
||||
}
|
||||
fs = o + framesize;
|
||||
if (s == 0) /* Only partially clear tail call frame at #0. */
|
||||
o = &frame[nslots];
|
||||
while (o < fs) /* Clear slots of newly added frames. */
|
||||
setnilV(o++);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
lua_assert(!newbase);
|
||||
}
|
||||
}
|
||||
if (newbase) L->base = newbase;
|
||||
if (ntop >= L->maxstack) { /* Need to grow the stack again. */
|
||||
MSize need = (MSize)(ntop - o);
|
||||
L->top = o;
|
||||
lj_state_growstack(L, need);
|
||||
o = L->top;
|
||||
ntop = o + need;
|
||||
}
|
||||
L->top = curr_topL(L);
|
||||
for (; o < ntop; o++) /* Clear remainder of newly added frames. */
|
||||
setnilV(o);
|
||||
lua_assert(map + nslots == flinks-1);
|
||||
J->pc = (const BCIns *)(uintptr_t)(*--flinks);
|
||||
J->pc = snap_pc(*--flinks);
|
||||
lua_assert(map + nent == flinks);
|
||||
}
|
||||
|
||||
#undef IR
|
||||
|
||||
Reference in New Issue
Block a user