diff --git a/src/lj_ir.c b/src/lj_ir.c index 0e56de9c1a..ad44ea14a8 100644 --- a/src/lj_ir.c +++ b/src/lj_ir.c @@ -65,49 +65,6 @@ IRCALLDEF(IRCALLCI) /* -- IR emitter ---------------------------------------------------------- */ -/* Grow IR buffer at the top. */ -void lj_ir_growtop(jit_State *J) -{ - IRIns *baseir = J->irbuf + J->irbotlim; - MSize szins = J->irtoplim - J->irbotlim; - if (szins) { - baseir = (IRIns *)lj_mem_realloc(J->L, baseir, szins*sizeof(IRIns), - 2*szins*sizeof(IRIns)); - J->irtoplim = J->irbotlim + 2*szins; - } else { - baseir = (IRIns *)lj_mem_realloc(J->L, NULL, 0, LJ_MIN_IRSZ*sizeof(IRIns)); - J->irbotlim = REF_BASE - LJ_MIN_IRSZ/4; - J->irtoplim = J->irbotlim + LJ_MIN_IRSZ; - } - J->cur.ir = J->irbuf = baseir - J->irbotlim; -} - -/* Grow IR buffer at the bottom or shift it up. */ -static void lj_ir_growbot(jit_State *J) -{ - IRIns *baseir = J->irbuf + J->irbotlim; - MSize szins = J->irtoplim - J->irbotlim; - lua_assert(szins != 0); - lua_assert(J->cur.nk == J->irbotlim || J->cur.nk-1 == J->irbotlim); - if (J->cur.nins + (szins >> 1) < J->irtoplim) { - /* More than half of the buffer is free on top: shift up by a quarter. */ - MSize ofs = szins >> 2; - memmove(baseir + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); - J->irbotlim -= ofs; - J->irtoplim -= ofs; - J->cur.ir = J->irbuf = baseir - J->irbotlim; - } else { - /* Double the buffer size, but split the growth amongst top/bottom. */ - IRIns *newbase = lj_mem_newt(J->L, 2*szins*sizeof(IRIns), IRIns); - MSize ofs = szins >= 256 ? 128 : (szins >> 1); /* Limit bottom growth. */ - memcpy(newbase + ofs, baseir, (J->cur.nins - J->irbotlim)*sizeof(IRIns)); - lj_mem_free(G(J->L), baseir, szins*sizeof(IRIns)); - J->irbotlim -= ofs; - J->irtoplim = J->irbotlim + 2*szins; - J->cur.ir = J->irbuf = newbase - J->irbotlim; - } -} - /* Emit IR without any optimizations. */ TRef lj_ir_emit(jit_State *J) { @@ -161,25 +118,19 @@ LJ_FUNC TRef lj_ir_ggfload(jit_State *J, IRType t, uintptr_t ofs) ** comparisons. The same constant must get the same reference. */ -/* Get ref of next IR constant and optionally grow IR. -** Note: this may invalidate all IRIns *! -*/ +/* Get ref of next IR constant. */ static LJ_AINLINE IRRef ir_nextk(jit_State *J) { IRRef ref = J->cur.nk; - if (LJ_UNLIKELY(ref <= J->irbotlim)) lj_ir_growbot(J); J->cur.nk = --ref; return ref; } -/* Get ref of next 64 bit IR constant and optionally grow IR. -** Note: this may invalidate all IRIns *! -*/ +/* Get ref of next 64 bit IR constant. */ static LJ_AINLINE IRRef ir_nextk64(jit_State *J) { IRRef ref = J->cur.nk - 2; lua_assert(J->state != LJ_TRACE_ASM); - if (LJ_UNLIKELY(ref < J->irbotlim)) lj_ir_growbot(J); J->cur.nk = ref; return ref; } diff --git a/src/lj_iropt.h b/src/lj_iropt.h index 214fb1a2c4..1305541607 100644 --- a/src/lj_iropt.h +++ b/src/lj_iropt.h @@ -24,13 +24,10 @@ static LJ_AINLINE void lj_ir_set_(jit_State *J, uint16_t ot, IRRef1 a, IRRef1 b) #define lj_ir_set(J, ot, a, b) \ lj_ir_set_(J, (uint16_t)(ot), (IRRef1)(a), (IRRef1)(b)) -/* Get ref of next IR instruction and optionally grow IR. -** Note: this may invalidate all IRIns*! -*/ +/* Get ref of next IR instruction. */ static LJ_AINLINE IRRef lj_ir_nextins(jit_State *J) { IRRef ref = J->cur.nins; - if (LJ_UNLIKELY(ref >= J->irtoplim)) lj_ir_growtop(J); J->cur.nins = ref + 1; return ref; } diff --git a/src/lj_jit.h b/src/lj_jit.h index 10f7db69a9..f52c8d33d5 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -298,6 +298,13 @@ typedef struct FoldState { IRIns right[2]; /* Instruction referenced by right operand. */ } FoldState; +/* Log entry for a bytecode that was recorded. */ +typedef struct BCRecLog { + GCproto *pt; /* Prototype of bytecode function (or NULL). */ + BCPos pos; /* Position of bytecode in prototype. */ + int32_t framedepth; /* Frame depth when recorded. */ +} BCRecLog; + /* JIT compiler state. */ typedef struct jit_State { GCtrace cur; /* Current trace. */ @@ -336,8 +343,6 @@ typedef struct jit_State { uint32_t k32[LJ_K32__MAX]; /* Ditto for 4 byte constants. */ IRIns *irbuf; /* Temp. IR instruction buffer. Biased with REF_BIAS. */ - IRRef irtoplim; /* Upper limit of instuction buffer (biased). */ - IRRef irbotlim; /* Lower limit of instuction buffer (biased). */ IRRef loopref; /* Last loop reference or ref of final LOOP (or 0). */ MSize sizesnap; /* Size of temp. snapshot buffer. */ @@ -345,6 +350,10 @@ typedef struct jit_State { SnapEntry *snapmapbuf; /* Temp. snapshot map buffer. */ MSize sizesnapmap; /* Size of temp. snapshot map buffer. */ + BCRecLog *bclog; /* Start of of recorded bytecode log. */ + uint32_t nbclog; /* Number of logged bytecodes. */ + uint32_t maxbclog; /* Max entries in the bytecode log. */ + PostProc postproc; /* Required post-processing after execution. */ uint8_t retryrec; /* Retry recording. */ diff --git a/src/lj_opt_loop.c b/src/lj_opt_loop.c index 697089ab2e..6bfd541eff 100644 --- a/src/lj_opt_loop.c +++ b/src/lj_opt_loop.c @@ -283,15 +283,7 @@ static void loop_unroll(LoopState *lps) /* LOOP separates the pre-roll from the loop body. */ emitir_raw(IRTG(IR_LOOP, IRT_NIL), 0, 0); - /* Grow snapshot buffer and map for copy-substituted snapshots. - ** Need up to twice the number of snapshots minus #0 and loop snapshot. - ** Need up to twice the number of entries plus fallback substitutions - ** from the loop snapshot entries for each new snapshot. - ** Caveat: both calls may reallocate J->cur.snap and J->cur.snapmap! - */ onsnap = J->cur.nsnap; - lj_snap_grow_buf(J, 2*onsnap-2); - lj_snap_grow_map(J, J->cur.nsnapmap*2+(onsnap-2)*J->cur.snap[onsnap-1].nent); /* The loop snapshot is used for fallback substitutions. */ loopsnap = &J->cur.snap[onsnap-1]; diff --git a/src/lj_record.c b/src/lj_record.c index cce8cf34e0..f4b3c0e0d5 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1864,6 +1864,13 @@ void lj_record_ins(jit_State *J) BCOp op; TRef ra, rb, rc; + if (J->nbclog < J->maxbclog) { + BCRecLog *log = &J->bclog[J->nbclog++]; + log->pt = J->pt; + log->pos = J->pt ? proto_bcpos(J->pt, J->pc) : -1; + log->framedepth = J->framedepth; + } + /* Perform post-processing action before recording the next instruction. */ if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) { switch (J->postproc) { @@ -2436,6 +2443,8 @@ void lj_record_setup(jit_State *J) J->bc_min = NULL; /* Means no limit. */ J->bc_extent = ~(MSize)0; + J->nbclog = 0; + /* Emit instructions for fixed references. Also triggers initial IR alloc. */ emitir_raw(IRT(IR_BASE, IRT_PGC), J->parent, J->exitno); for (i = 0; i <= 2; i++) { diff --git a/src/lj_snap.c b/src/lj_snap.c index 4b28bb9004..0390853684 100644 --- a/src/lj_snap.c +++ b/src/lj_snap.c @@ -29,31 +29,6 @@ /* Emit raw IR without passing through optimizations. */ #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) -/* -- Snapshot buffer allocation ------------------------------------------ */ - -/* Grow snapshot buffer. */ -void lj_snap_grow_buf_(jit_State *J, MSize need) -{ - MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; - if (need > maxsnap) - lj_trace_err(J, LJ_TRERR_SNAPOV); - lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); - J->cur.snap = J->snapbuf; -} - -/* Grow snapshot map buffer. */ -void lj_snap_grow_map_(jit_State *J, MSize need) -{ - if (need < 2*J->sizesnapmap) - need = 2*J->sizesnapmap; - else if (need < 64) - need = 64; - J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, - J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); - J->cur.snapmap = J->snapmapbuf; - J->sizesnapmap = need; -} - /* -- Snapshot generation ------------------------------------------------- */ /* Add all modified slots to the snapshot. */ @@ -130,7 +105,6 @@ static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) MSize nent; SnapEntry *p; /* Conservative estimate. */ - lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); p = &J->cur.snapmap[nsnapmap]; nent = snapshot_slots(J, p, nslots); snap->nent = (uint8_t)nent; @@ -157,7 +131,6 @@ void lj_snap_add(jit_State *J) nsnapmap = J->cur.snap[--nsnap].mapofs; } else { nomerge: - lj_snap_grow_buf(J, nsnap+1); J->cur.nsnap = (uint16_t)(nsnap+1); } J->mergesnap = 0; diff --git a/src/lj_snap.h b/src/lj_snap.h index 509742ea74..11c8b669e2 100644 --- a/src/lj_snap.h +++ b/src/lj_snap.h @@ -15,18 +15,5 @@ LJ_FUNC void lj_snap_shrink(jit_State *J); LJ_FUNC IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir); LJ_FUNC void lj_snap_replay(jit_State *J, GCtrace *T); LJ_FUNC const BCIns *lj_snap_restore(jit_State *J, void *exptr); -LJ_FUNC void lj_snap_grow_buf_(jit_State *J, MSize need); -LJ_FUNC void lj_snap_grow_map_(jit_State *J, MSize need); - -static LJ_AINLINE void lj_snap_grow_buf(jit_State *J, MSize need) -{ - if (LJ_UNLIKELY(need > J->sizesnap)) lj_snap_grow_buf_(J, need); -} - -static LJ_AINLINE void lj_snap_grow_map(jit_State *J, MSize need) -{ - if (LJ_UNLIKELY(need > J->sizesnapmap)) lj_snap_grow_map_(J, need); -} - #endif diff --git a/src/lj_state.c b/src/lj_state.c index 600d56f493..d5fccf6d4b 100644 --- a/src/lj_state.c +++ b/src/lj_state.c @@ -158,6 +158,7 @@ static TValue *cpluaopen(lua_State *L, lua_CFunction dummy, void *ud) static void close_state(lua_State *L) { global_State *g = G(L); + jit_State *J = L2J(L); lj_func_closeuv(L, tvref(L->stack)); lj_gc_freeall(g); lua_assert(gcref(g->gc.root) == obj2gco(L)); @@ -167,6 +168,10 @@ static void close_state(lua_State *L) lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef); lj_buf_free(g, &g->tmpbuf); lj_mem_freevec(g, tvref(L->stack), L->stacksize, TValue); + lj_mem_free(g, J->bclog, sizeof(BCRecLog)*65536); + lj_mem_free(g, J->snapmapbuf, J->sizesnapmap); + lj_mem_free(g, J->snapbuf, J->sizesnap); + lj_mem_free(g, J->irbuf-REF_BIAS, 65536*sizeof(IRIns)); lua_assert(g->gc.total == sizeof(GG_State)); #ifndef LUAJIT_USE_SYSMALLOC if (g->allocf == lj_alloc_f) @@ -181,6 +186,7 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) GG_State *GG = (GG_State *)f(ud, NULL, 0, sizeof(GG_State)); lua_State *L = &GG->L; global_State *g = &GG->g; + jit_State *J = &GG->J; if (GG == NULL || !checkptrGC(GG)) return NULL; memset(GG, 0, sizeof(GG_State)); L->gct = ~LJ_TTHREAD; @@ -206,6 +212,18 @@ LUA_API lua_State *lua_newstate(lua_Alloc f, void *ud) g->gc.total = sizeof(GG_State); g->gc.pause = LUAI_GCPAUSE; g->gc.stepmul = LUAI_GCMUL; + /* Statically allocate generous JIT scratch buffers. */ + J->sizesnap = sizeof(SnapShot)*65536; + J->sizesnapmap = sizeof(SnapEntry)*65536; + J->snapbuf = (SnapShot *)lj_mem_new(L, J->sizesnap); + J->snapmapbuf = (SnapEntry *)lj_mem_new(L, J->sizesnapmap); + J->maxbclog = 65536; + J->bclog = (BCRecLog *)lj_mem_new(L, sizeof(BCRecLog)*J->maxbclog); + J->nbclog = 0; + IRIns *irbufmem = (IRIns *)lj_mem_new(L, sizeof(IRIns)*65536); + if (irbufmem == NULL || J->snapbuf == NULL || J->snapmapbuf == NULL) + return NULL; + J->irbuf = irbufmem + REF_BIAS; lj_dispatch_init((GG_State *)L); L->status = LUA_ERRERR+1; /* Avoid touching the stack upon memory error. */ if (lj_vm_cpcall(L, NULL, NULL, cpluaopen) != 0) { diff --git a/src/lj_trace.c b/src/lj_trace.c index 6a4d16b8ec..97ca4fbf9a 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -296,9 +296,6 @@ void lj_trace_freestate(global_State *g) } #endif lj_mcode_free(J); - lj_mem_freevec(g, J->snapmapbuf, J->sizesnapmap, SnapEntry); - lj_mem_freevec(g, J->snapbuf, J->sizesnap, SnapShot); - lj_mem_freevec(g, J->irbuf + J->irbotlim, J->irtoplim - J->irbotlim, IRIns); lj_mem_freevec(g, J->trace, J->sizetrace, GCRef); }