String buffers, part 2e: add serialization string dictionary.

Sponsored by fmad.io.
This commit is contained in:
Mike Pall
2021-06-07 12:03:22 +02:00
parent 4216bdfb2a
commit ac02a120ef
10 changed files with 214 additions and 65 deletions

View File

@@ -29,9 +29,7 @@
#include "lj_serialize.h"
#include "lj_lib.h"
/* ------------------------------------------------------------------------ */
#define LJLIB_MODULE_buffer_method
/* -- Helper functions ---------------------------------------------------- */
/* Check that the first argument is a string buffer. */
static SBufExt *buffer_tobuf(lua_State *L)
@@ -49,11 +47,16 @@ static LJ_AINLINE SBufExt *buffer_tobufw(lua_State *L)
return sbx;
}
#define buffer_toudata(sbx) ((GCudata *)(sbx)-1)
/* -- Buffer methods ------------------------------------------------------ */
#define LJLIB_MODULE_buffer_method
LJLIB_CF(buffer_method_free)
{
SBufExt *sbx = buffer_tobuf(L);
lj_bufx_free(G(L), sbx);
lj_bufx_init(L, sbx);
lj_bufx_free(L, sbx);
L->top = L->base+1; /* Chain buffer object. */
return 1;
}
@@ -83,6 +86,7 @@ LJLIB_CF(buffer_method_skip)
LJLIB_CF(buffer_method_set)
{
SBufExt *sbx = buffer_tobuf(L);
GCobj *ref;
const char *p;
MSize len;
#if LJ_HASFFI
@@ -98,9 +102,11 @@ LJLIB_CF(buffer_method_set)
p = strdata(str);
len = str->len;
}
lj_bufx_free(G(L), sbx);
lj_bufx_init_cow(L, sbx, p, len);
setgcref(sbx->cowref, gcV(L->base+1));
lj_bufx_free(L, sbx);
lj_bufx_set_cow(L, sbx, p, len);
ref = gcV(L->base+1);
setgcref(sbx->cowref, ref);
lj_gc_objbarrier(L, buffer_toudata(sbx), ref);
L->top = L->base+1; /* Chain buffer object. */
return 1;
}
@@ -249,8 +255,7 @@ LJLIB_CF(buffer_method_decode)
LJLIB_CF(buffer_method___gc)
{
SBufExt *sbx = buffer_tobuf(L);
lj_bufx_free(G(L), sbx);
lj_bufx_init(L, sbx);
lj_bufx_free(L, sbx);
return 0;
}
@@ -272,7 +277,7 @@ LJLIB_CF(buffer_method___len)
LJLIB_PUSH("buffer") LJLIB_SET(__metatable)
LJLIB_PUSH(top-1) LJLIB_SET(__index)
/* ------------------------------------------------------------------------ */
/* -- Buffer library functions -------------------------------------------- */
#define LJLIB_MODULE_buffer
@@ -280,16 +285,33 @@ LJLIB_PUSH(top-2) LJLIB_SET(!) /* Set environment. */
LJLIB_CF(buffer_new)
{
MSize sz = L->base == L->top ? 0u :
(MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
GCtab *env = tabref(curr_func(L)->c.env);
GCudata *ud = lj_udata_new(L, sizeof(SBufExt), env);
SBufExt *sbx = (SBufExt *)uddata(ud);
MSize sz = 0;
int targ = 1;
GCtab *env, *dict = NULL;
GCudata *ud;
SBufExt *sbx;
if (L->base < L->top && !tvistab(L->base)) {
targ = 2;
if (!tvisnil(L->base))
sz = (MSize)lj_lib_checkintrange(L, 1, 0, LJ_MAX_BUF);
}
if (L->base+targ-1 < L->top) {
GCtab *options = lj_lib_checktab(L, targ);
cTValue *opt_dict = lj_tab_getstr(options, lj_str_newlit(L, "dict"));
if (opt_dict && tvistab(opt_dict)) {
dict = tabV(opt_dict);
lj_serialize_dict_prep(L, dict);
}
}
env = tabref(curr_func(L)->c.env);
ud = lj_udata_new(L, sizeof(SBufExt), env);
ud->udtype = UDTYPE_BUFFER;
/* NOBARRIER: The GCudata is new (marked white). */
setgcref(ud->metatable, obj2gco(env));
setudataV(L, L->top++, ud);
sbx = (SBufExt *)uddata(ud);
lj_bufx_init(L, sbx);
setgcref(sbx->dict, obj2gco(dict));
if (sz > 0) lj_buf_need2((SBuf *)sbx, sz);
return 1;
}
@@ -298,7 +320,8 @@ LJLIB_CF(buffer_encode)
{
cTValue *o = lj_lib_checkany(L, 1);
SBufExt sbx;
lj_bufx_init_borrow(L, &sbx, &G(L)->tmpbuf);
memset(&sbx, 0, sizeof(SBufExt));
lj_bufx_set_borrow(L, &sbx, &G(L)->tmpbuf);
lj_serialize_put(&sbx, o);
setstrV(L, L->top++, lj_buf_str(L, (SBuf *)&sbx));
lj_gc_check(L);
@@ -309,7 +332,8 @@ LJLIB_CF(buffer_decode)
{
GCstr *str = lj_lib_checkstrx(L, 1);
SBufExt sbx;
lj_bufx_init_cow(L, &sbx, strdata(str), str->len);
memset(&sbx, 0, sizeof(SBufExt));
lj_bufx_set_cow(L, &sbx, strdata(str), str->len);
/* No need to set sbx.cowref here. */
setnilV(L->top++);
lj_serialize_get(&sbx, L->top-1);

View File

@@ -27,6 +27,7 @@ typedef struct SBufExt {
MRef bsb; /* Borrowed string buffer. */
};
char *r; /* Read pointer. */
GCRef dict; /* Serialization string dictionary table. */
int depth; /* Remaining recursion depth. */
} SBufExt;
@@ -114,19 +115,17 @@ static LJ_AINLINE void lj_bufx_init(lua_State *L, SBufExt *sbx)
setsbufXL(sbx, L, SBUF_FLAG_EXT);
}
static LJ_AINLINE void lj_bufx_init_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
static LJ_AINLINE void lj_bufx_set_borrow(lua_State *L, SBufExt *sbx, SBuf *sb)
{
memset(sbx, 0, sizeof(SBufExt));
setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_BORROW);
setmref(sbx->bsb, sb);
sbx->r = sbx->w = sbx->b = sb->b;
sbx->e = sb->e;
}
static LJ_AINLINE void lj_bufx_init_cow(lua_State *L, SBufExt *sbx,
const char *p, MSize len)
static LJ_AINLINE void lj_bufx_set_cow(lua_State *L, SBufExt *sbx,
const char *p, MSize len)
{
memset(sbx, 0, sizeof(SBufExt));
setsbufXL(sbx, L, SBUF_FLAG_EXT | SBUF_FLAG_COW);
sbx->r = sbx->b = (char *)p;
sbx->w = sbx->e = (char *)p + len;
@@ -142,9 +141,12 @@ static LJ_AINLINE void lj_bufx_reset(SBufExt *sbx)
sbx->r = sbx->w = sbx->b;
}
static LJ_AINLINE void lj_bufx_free(global_State *g, SBufExt *sbx)
static LJ_AINLINE void lj_bufx_free(lua_State *L, SBufExt *sbx)
{
if (!sbufiscow(sbx)) lj_mem_free(g, sbx->b, sbufsz(sbx));
if (!sbufiscow(sbx)) lj_mem_free(G(L), sbx->b, sbufsz(sbx));
setsbufXL(sbx, L, SBUF_FLAG_EXT);
setgcrefnull(sbx->cowref);
sbx->r = sbx->w = sbx->b = sbx->e = NULL;
}
/* Low-level buffer put operations */

View File

@@ -182,8 +182,10 @@ ERRDEF(FFI_NYICALL, "NYI: cannot call this C function (yet)")
#if LJ_HASBUFFER
/* String buffer errors. */
ERRDEF(BUFFER_BADOPT, "bad options table")
ERRDEF(BUFFER_BADENC, "cannot serialize " LUA_QS)
ERRDEF(BUFFER_BADDEC, "cannot deserialize tag 0x%02x")
ERRDEF(BUFFER_BADDICTX, "cannot deserialize dictionary index %d")
ERRDEF(BUFFER_DEPTH, "too deep to serialize")
ERRDEF(BUFFER_DUPKEY, "duplicate table key")
ERRDEF(BUFFER_EOB, "unexpected end of buffer")

View File

@@ -67,9 +67,10 @@ static void gc_mark(global_State *g, GCobj *o)
gc_markobj(g, tabref(gco2ud(o)->env));
if (LJ_HASBUFFER && gco2ud(o)->udtype == UDTYPE_BUFFER) {
SBufExt *sbx = (SBufExt *)uddata(gco2ud(o));
if (sbufiscow(sbx) && gcref(sbx->cowref) != NULL) {
if (sbufiscow(sbx) && gcref(sbx->cowref))
gc_markobj(g, gcref(sbx->cowref));
}
if (gcref(sbx->dict))
gc_markobj(g, gcref(sbx->dict));
}
} else if (LJ_UNLIKELY(gct == ~LJ_TUPVAL)) {
GCupval *uv = gco2uv(o);

View File

@@ -923,7 +923,7 @@ static LJ_AINLINE void setgcV(lua_State *L, TValue *o, GCobj *v, uint32_t it)
}
#define define_setV(name, type, tag) \
static LJ_AINLINE void name(lua_State *L, TValue *o, type *v) \
static LJ_AINLINE void name(lua_State *L, TValue *o, const type *v) \
{ \
setgcV(L, o, obj2gco(v), tag); \
}

View File

@@ -32,7 +32,7 @@ enum {
SER_TAG_NUM,
SER_TAG_TAB, /* 0x08 */
SER_TAG_0x0e = SER_TAG_TAB+6,
SER_TAG_0x0f,
SER_TAG_DICT,
SER_TAG_INT64, /* 0x10 */
SER_TAG_UINT64,
SER_TAG_COMPLEX,
@@ -120,6 +120,26 @@ static LJ_AINLINE char *serialize_ru124(char *r, char *w, uint32_t *pv)
return NULL;
}
/* Prepare string dictionary for use (once). */
void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict)
{
if (!dict->hmask) { /* No hash part means not prepared, yet. */
MSize i, len = lj_tab_len(dict);
if (!len) return;
lj_tab_resize(L, dict, dict->asize, hsize2hbits(len));
for (i = 1; i <= len && i < dict->asize; i++) {
cTValue *o = arrayslot(dict, i);
if (tvisstr(o)) {
if (!lj_tab_getstr(dict, strV(o))) { /* Ignore dups. */
lj_tab_newkey(L, dict, o)->u64 = (uint64_t)(i-1);
}
} else if (!tvisfalse(o)) {
lj_err_caller(L, LJ_ERR_BUFFER_BADOPT);
}
}
}
}
/* -- Internal serializer ------------------------------------------------- */
/* Put serialized object into buffer. */
@@ -174,12 +194,45 @@ static char *serialize_put(char *w, SBufExt *sbx, cTValue *o)
}
if (nhash) { /* Write hash entries. */
const Node *node = noderef(t->node) + t->hmask;
for (;; node--)
if (!tvisnil(&node->val)) {
w = serialize_put(w, sbx, &node->key);
w = serialize_put(w, sbx, &node->val);
if (--nhash == 0) break;
}
GCtab *dict = tabref(sbx->dict);
if (LJ_UNLIKELY(dict)) {
for (;; node--)
if (!tvisnil(&node->val)) {
if (LJ_LIKELY(tvisstr(&node->key))) {
/* Inlined lj_tab_getstr is 30% faster. */
const GCstr *str = strV(&node->key);
Node *n = hashstr(dict, str);
do {
if (tvisstr(&n->key) && strV(&n->key) == str) {
uint32_t idx = n->val.u32.lo;
w = serialize_more(w, sbx, 1+5);
*w++ = SER_TAG_DICT;
w = serialize_wu124(w, idx);
break;
}
n = nextnode(n);
if (!n) {
MSize len = str->len;
w = serialize_more(w, sbx, 5+len);
w = serialize_wu124(w, SER_TAG_STR + len);
w = lj_buf_wmem(w, strdata(str), len);
break;
}
} while (1);
} else {
w = serialize_put(w, sbx, &node->key);
}
w = serialize_put(w, sbx, &node->val);
if (--nhash == 0) break;
}
} else {
for (;; node--)
if (!tvisnil(&node->val)) {
w = serialize_put(w, sbx, &node->key);
w = serialize_put(w, sbx, &node->val);
if (--nhash == 0) break;
}
}
}
sbx->depth++;
#if LJ_HASFFI
@@ -266,6 +319,16 @@ static char *serialize_get(char *r, SBufExt *sbx, TValue *o)
if (!tvisnum(o)) setnanV(o);
} else if (tp <= SER_TAG_TRUE) {
setpriV(o, ~tp);
} else if (tp == SER_TAG_DICT) {
GCtab *dict;
uint32_t idx;
r = serialize_ru124(r, w, &idx);
idx++;
dict = tabref(sbx->dict);
if (dict && idx < dict->asize && tvisstr(arrayslot(dict, idx)))
copyTV(sbufL(sbx), o, arrayslot(dict, idx));
else
lj_err_callerv(sbufL(sbx), LJ_ERR_BUFFER_BADDICTX, idx);
} else if (tp >= SER_TAG_TAB && tp < SER_TAG_TAB+6) {
uint32_t narray = 0, nhash = 0;
GCtab *t;

View File

@@ -13,6 +13,7 @@
#define LJ_SERIALIZE_DEPTH 100 /* Default depth. */
LJ_FUNC void LJ_FASTCALL lj_serialize_dict_prep(lua_State *L, GCtab *dict);
LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_put(SBufExt *sbx, cTValue *o);
LJ_FUNC SBufExt * LJ_FASTCALL lj_serialize_get(SBufExt *sbx, TValue *o);

View File

@@ -16,25 +16,6 @@
/* -- Object hashing ------------------------------------------------------ */
/* Hash values are masked with the table hash mask and used as an index. */
static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
{
Node *n = noderef(t->node);
return &n[hash & t->hmask];
}
/* String IDs are generated when a string is interned. */
#define hashstr(t, s) hashmask(t, (s)->sid)
#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
#if LJ_GC64
#define hashgcref(t, r) \
hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
#else
#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
#endif
/* Hash an arbitrary key and return its anchor position in the hash table. */
static Node *hashkey(const GCtab *t, cTValue *key)
{
@@ -413,7 +394,7 @@ cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key)
return NULL;
}
cTValue *lj_tab_getstr(GCtab *t, GCstr *key)
cTValue *lj_tab_getstr(GCtab *t, const GCstr *key)
{
Node *n = hashstr(t, key);
do {
@@ -546,7 +527,7 @@ TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key)
return lj_tab_newkey(L, t, &k);
}
TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key)
TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key)
{
TValue k;
Node *n = hashstr(t, key);

View File

@@ -31,6 +31,25 @@ static LJ_AINLINE uint32_t hashrot(uint32_t lo, uint32_t hi)
return hi;
}
/* Hash values are masked with the table hash mask and used as an index. */
static LJ_AINLINE Node *hashmask(const GCtab *t, uint32_t hash)
{
Node *n = noderef(t->node);
return &n[hash & t->hmask];
}
/* String IDs are generated when a string is interned. */
#define hashstr(t, s) hashmask(t, (s)->sid)
#define hashlohi(t, lo, hi) hashmask((t), hashrot((lo), (hi)))
#define hashnum(t, o) hashlohi((t), (o)->u32.lo, ((o)->u32.hi << 1))
#if LJ_GC64
#define hashgcref(t, r) \
hashlohi((t), (uint32_t)gcrefu(r), (uint32_t)(gcrefu(r) >> 32))
#else
#define hashgcref(t, r) hashlohi((t), gcrefu(r), gcrefu(r) + HASH_BIAS)
#endif
#define hsize2hbits(s) ((s) ? ((s)==1 ? 1 : 1+lj_fls((uint32_t)((s)-1))) : 0)
LJ_FUNCA GCtab *lj_tab_new(lua_State *L, uint32_t asize, uint32_t hbits);
@@ -50,14 +69,14 @@ LJ_FUNCA void lj_tab_reasize(lua_State *L, GCtab *t, uint32_t nasize);
/* Caveat: all getters except lj_tab_get() can return NULL! */
LJ_FUNCA cTValue * LJ_FASTCALL lj_tab_getinth(GCtab *t, int32_t key);
LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, GCstr *key);
LJ_FUNC cTValue *lj_tab_getstr(GCtab *t, const GCstr *key);
LJ_FUNCA cTValue *lj_tab_get(lua_State *L, GCtab *t, cTValue *key);
/* Caveat: all setters require a write barrier for the stored value. */
LJ_FUNCA TValue *lj_tab_newkey(lua_State *L, GCtab *t, cTValue *key);
LJ_FUNCA TValue *lj_tab_setinth(lua_State *L, GCtab *t, int32_t key);
LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, GCstr *key);
LJ_FUNC TValue *lj_tab_setstr(lua_State *L, GCtab *t, const GCstr *key);
LJ_FUNC TValue *lj_tab_set(lua_State *L, GCtab *t, cTValue *key);
#define inarray(t, key) ((MSize)(key) < (MSize)(t)->asize)