Compress snapshots using a simple, extensible 1D-compression.

Typically reduces storage overhead for snapshot maps by 60%.
The extensible format is a prerequisite for the next redesign steps:
Eliminate IR_FRAME and implement return-to-lower-frame.
This commit is contained in:
Mike Pall
2010-01-26 21:49:04 +01:00
parent e058714a2e
commit 67ca399a30
11 changed files with 366 additions and 320 deletions

View File

@@ -1696,7 +1696,7 @@ static void optstate_comp(jit_State *J, int cond)
const BCIns *npc = J->pc + 2 + (cond ? bc_j(jmpins) : 0);
SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
/* Avoid re-recording the comparison in side traces. */
J->cur.snapmap[snap->mapofs + snap->nslots] = u32ptr(npc);
J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
J->needsnap = 1;
/* Shrink last snapshot if possible. */
if (bc_a(jmpins) < J->maxslot) {
@@ -2159,61 +2159,62 @@ static void rec_setup_side(jit_State *J, Trace *T)
{
SnapShot *snap = &T->snap[J->exitno];
SnapEntry *map = &T->snapmap[snap->mapofs];
BCReg s, nslots = snap->nslots;
MSize n, nent = snap->nent;
BloomFilter seen = 0;
for (s = 0; s < nslots; s++) {
IRRef ref = snap_ref(map[s]);
if (ref) {
IRIns *ir = &T->ir[ref];
TRef tr = 0;
/* The bloom filter avoids O(nslots^2) overhead for de-duping slots. */
if (bloomtest(seen, ref)) {
BCReg j;
for (j = 0; j < s; j++)
if (snap_ref(map[j]) == ref) {
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
lua_assert(s != 0);
J->baseslot = s+1;
J->framedepth++;
}
tr = J->slot[j];
goto dupslot;
}
}
bloomset(seen, ref);
switch ((IROp)ir->o) {
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
if (irt_isfunc(ir->t)) {
if (s != 0) {
/* Emit IR for slots inherited from parent snapshot. */
for (n = 0; n < nent; n++) {
IRRef ref = snap_ref(map[n]);
BCReg s = snap_slot(map[n]);
IRIns *ir = &T->ir[ref];
TRef tr;
/* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
if (bloomtest(seen, ref)) {
MSize j;
for (j = 0; j < n; j++)
if (snap_ref(map[j]) == ref) {
tr = J->slot[snap_slot(map[j])];
if (ir->o == IR_FRAME && irt_isfunc(ir->t)) {
lua_assert(s != 0);
J->baseslot = s+1;
J->framedepth++;
}
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
} else {
tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
goto dupslot;
}
break;
case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */
tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
default: /* Parent refs are already typed and don't need a guard. */
tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
}
dupslot:
J->slot[s] = tr;
}
bloomset(seen, ref);
switch ((IROp)ir->o) {
/* Only have to deal with constants that can occur in stack slots. */
case IR_KPRI: tr = TREF_PRI(irt_type(ir->t)); break;
case IR_KINT: tr = lj_ir_kint(J, ir->i); break;
case IR_KGC: tr = lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); break;
case IR_KNUM: tr = lj_ir_knum_addr(J, ir_knum(ir)); break;
case IR_FRAME: /* Placeholder FRAMEs don't need a guard. */
if (irt_isfunc(ir->t)) {
if (s != 0) {
J->baseslot = s+1;
J->framedepth++;
}
tr = lj_ir_kfunc(J, ir_kfunc(&T->ir[ir->op2]));
tr = emitir_raw(IRT(IR_FRAME, IRT_FUNC), tr, tr);
} else {
tr = lj_ir_kptr(J, mref(T->ir[ir->op2].ptr, void));
tr = emitir_raw(IRT(IR_FRAME, IRT_PTR), tr, tr);
}
break;
case IR_SLOAD: /* Inherited SLOADs don't need a guard or type check. */
tr = emitir_raw(ir->ot & ~IRT_GUARD, s,
(ir->op2&IRSLOAD_READONLY) | IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
default: /* Parent refs are already typed and don't need a guard. */
tr = emitir_raw(IRT(IR_SLOAD, irt_type(ir->t)), s,
IRSLOAD_INHERIT|IRSLOAD_PARENT);
break;
}
dupslot:
J->slot[s] = tr;
}
J->base = J->slot + J->baseslot;
J->maxslot = nslots - J->baseslot;
J->maxslot = snap->nslots - J->baseslot;
lj_snap_add(J);
}
@@ -2259,7 +2260,7 @@ void lj_record_setup(jit_State *J)
J->cur.root = (uint16_t)root;
J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
/* Check whether we could at least potentially form an extra loop. */
if (J->exitno == 0 && T->snap[0].nslots == 1 && T->snapmap[0] == 0) {
if (J->exitno == 0 && T->snap[0].nent == 0) {
/* We can narrow a FORL for some side traces, too. */
if (J->pc > J->pt->bc && bc_op(J->pc[-1]) == BC_JFORI &&
bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {