From 9551dd3be671828b0539e485f4809841bc2e4a8b Mon Sep 17 00:00:00 2001 From: fsfod Date: Fri, 15 Dec 2017 19:31:51 +0000 Subject: [PATCH 1/8] Add per function hot counting Function hot counter have been moved from the shared hashtable to inside the function prototype object that is only shared between the closures of a function. The function hot counter field was added to the end of the GCproto struct so that its right next the function header bytecode in memory which decrements the counter. Since the function hot counters are now spread out around the function proto objects it will increase the number of dirty cache lines that need to be eventually be flushed back to memory vs only two cache lines of the shared hotcount hashtable, but this stops happening once the function is JIT'ed. Updated the interpreter assembly for the Lua function header bytecodes to use the new hot counter location. Added a separate JIT param for function hot trigger count currently set to the same effective value as it was with the shared hotcount table system of 112 calls. --- src/lib_jit.c | 45 ++++ src/lj_bcread.c | 5 + src/lj_jit.h | 3 +- src/lj_obj.h | 4 + src/lj_parse.c | 4 + src/lj_record.c | 4 +- src/lj_trace.c | 32 ++- src/vm_x64.dasc | 5 +- src/vm_x86.dasc | 5 +- .../test/trace/hotcounters.lua | 239 ++++++++++++++++++ tests/LuaJIT-test-cleanup/test/trace/index | 1 + 11 files changed, 331 insertions(+), 16 deletions(-) create mode 100644 tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua diff --git a/src/lib_jit.c b/src/lib_jit.c index 22ca0a1a24..8e285b5b1b 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -98,6 +98,51 @@ static void flagbits_to_strings(lua_State *L, uint32_t flags, uint32_t base, } #endif +static GCproto *check_Lproto(lua_State *L, int nolua); + +LJLIB_CF(jit_sethot) +{ + GCproto *pt = check_Lproto(L, 0); + int32_t count = lj_lib_checkint(L, 2); + int32_t loopid = lj_lib_optint(L, 3, -1); + /* + ** Loops decrement the count by two instead of one like functions when using + ** shared hot counters. + */ + if (loopid != -1) { + count = count * 2; + } + + if (count < 0 || count > 0xffff) { + luaL_error(L, "bad hot count value"); + } + + if (loopid == -1) { + int old = pt->hotcount; + pt->hotcount = count; + setintV(L->top-1, old); + return 1; + } else if (loopid > 0) { + BCIns *bc = proto_bc(pt); + MSize hci = 0, i = 0; + for (i = 0; i != pt->sizebc; i++) { + int iscountbc = bc_op(bc[i]) == BC_FORL || bc_op(bc[i]) == BC_ITERL || + bc_op(bc[i]) == BC_LOOP; + if (iscountbc) { + if (++hci == loopid) { + BCIns *hcbc = bc + i; + int old = hotcount_get(L2GG(L), hcbc); + hotcount_set(L2GG(L), hcbc, count); + setintV(L->top-1, old); + return 1; + } + } + } + } + lj_err_callerv(L, LJ_ERR_IDXRNG); + return 0; +} + LJLIB_CF(jit_status) { #if LJ_HASJIT diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 48c5e7c7f5..6a8c9cd560 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -13,6 +13,8 @@ #include "lj_str.h" #include "lj_tab.h" #include "lj_bc.h" +#include "lj_jit.h" +#include "lj_dispatch.h" #if LJ_HASFFI #include "lj_ctype.h" #include "lj_cdata.h" @@ -381,6 +383,9 @@ GCproto *lj_bcread_proto(LexState *ls) setmref(pt->uvinfo, NULL); setmref(pt->varinfo, NULL); } +#if LJ_HASJIT + pt->hotcount = L2J(ls->L)->param[JIT_P_hotfunc] - 1; +#endif return pt; } diff --git a/src/lj_jit.h b/src/lj_jit.h index f37e7927a4..a99a402790 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -104,7 +104,8 @@ _(\007, maxsnap, 500) /* Max. # of snapshots for a trace. */ \ _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ \ - _(\007, hotloop, 56) /* # of iter. to detect a hot loop/call. */ \ + _(\007, hotloop, 56) /* # of iter. to detect a hot loop. */ \ + _(\007, hotfunc, 56*2) /* # of iter. to detect a hot function. */ \ _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \ \ diff --git a/src/lj_obj.h b/src/lj_obj.h index c7e47422bf..67ec63e093 100644 --- a/src/lj_obj.h +++ b/src/lj_obj.h @@ -377,6 +377,10 @@ typedef struct GCproto { MRef lineinfo; /* Compressed map from bytecode ins. to source line. */ MRef uvinfo; /* Upvalue names. */ MRef varinfo; /* Names and compressed extents of local variables. */ +#if LJ_HASJIT + uint16_t unused; + uint16_t hotcount; /* Hot counter. */ +#endif } GCproto; /* Flags for prototype. */ diff --git a/src/lj_parse.c b/src/lj_parse.c index c8efafadb3..c165234a14 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -27,6 +27,7 @@ #include "lj_parse.h" #include "lj_vm.h" #include "lj_vmevent.h" +#include "lj_dispatch.h" /* -- Parser structures and definitions ----------------------------------- */ @@ -1575,6 +1576,9 @@ static GCproto *fs_finish(LexState *ls, BCLine line) pt->numparams = fs->numparams; pt->framesize = fs->framesize; setgcref(pt->chunkname, obj2gco(ls->chunkname)); +#if LJ_HASJIT + pt->hotcount = L2J(ls->L)->param[JIT_P_hotfunc] - 1; +#endif /* Close potentially uninitialized gap between bc and kgc. */ *(uint32_t *)((char *)pt + ofsk - sizeof(GCRef)*(fs->nkgc+1)) = 0; diff --git a/src/lj_record.c b/src/lj_record.c index 1a2b1c5d5b..5e5d524f49 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1667,9 +1667,11 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) } else { if (count > J->param[JIT_P_callunroll]) { if (lnk) { /* Possible tail- or up-recursion. */ + GCproto *pt = (GCproto *)(((char *)J->pc)- sizeof(GCproto)); + lua_assert(bc_op(*J->pc) == BC_JFUNCF); lj_trace_flush(J, lnk); /* Flush trace that only returns. */ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ - hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4)); + pt->hotcount = LJ_PRNG_BITS(J, 4); } lj_trace_err(J, LJ_TRERR_CUNROLL); } diff --git a/src/lj_trace.c b/src/lj_trace.c index d85b47f801..2a19e3a777 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -394,7 +394,12 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) setpenalty: J->penalty[i].val = (uint16_t)val; J->penalty[i].reason = e; - hotcount_set(J2GG(J), pc+1, val); + /* If the pc is the function header set the hot count in the proto */ + if (proto_bcpos(pt, pc) == 0) { + pt->hotcount = val; + } else { + hotcount_set(J2GG(J), pc+1, val); + } } /* -- Trace compiler state machine ---------------------------------------- */ @@ -571,10 +576,18 @@ static int trace_abort(jit_State *J) if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) { if (J->exitno == 0) { BCIns *startpc = mref(J->cur.startpc, BCIns); - if (e == LJ_TRERR_RETRY) - hotcount_set(J2GG(J), startpc+1, 1); /* Immediate retry. */ - else - penalty_pc(J, &gcref(J->cur.startpt)->pt, startpc, e); + GCproto *startpt = &gcref(J->cur.startpt)->pt; + if (e == LJ_TRERR_RETRY) { + /* Immediate retry. */ + if (proto_bcpos(startpt, startpc) == 0) { + startpt->hotcount = 1; + } else { + lua_assert(bc_op(startpc[0]) > BC_FORI || bc_op(startpc[0]) <= BC_JLOOP); + hotcount_set(J2GG(J), startpc+1, 1); + } + } else { + penalty_pc(J, startpt, startpc, e); + } } else { traceref(J, J->exitno)->link = J->exitno; /* Self-link is blacklisted. */ } @@ -730,7 +743,14 @@ void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc) /* Note: pc is the interpreter bytecode PC here. It's offset by 1. */ ERRNO_SAVE /* Reset hotcount. */ - hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]*HOTCOUNT_LOOP); + if (bc_op(pc[-1]) >= BC_FUNCF && bc_op(pc[-1]) <= BC_JFUNCV) { + GCproto *pt = (GCproto *)(((char *)(pc-1)) - sizeof(GCproto)); + lua_assert(pt->hotcount == 0xffff); + pt->hotcount = J->param[JIT_P_hotfunc] - 1; + } else { + hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]*HOTCOUNT_LOOP); + } + /* Only start a new trace if not recording or inside __gc call or vmevent. */ if (J->state == LJ_TRACE_IDLE && !(J2G(J)->hookmask & (HOOK_GC|HOOK_VMEVENT))) { diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index a003fb4f6b..caa49d45b6 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -330,10 +330,7 @@ |.endmacro | |.macro hotcall, reg -| mov reg, PCd -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL +| sub word [PCd-4+PC2PROTO(hotcount)], 1 | jb ->vm_hotcall |.endmacro | diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 211ae7b922..41260d6407 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -421,10 +421,7 @@ |.endmacro | |.macro hotcall, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL +| sub word [PC-4+PC2PROTO(hotcount)], 1 | jb ->vm_hotcall |.endmacro | diff --git a/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua new file mode 100644 index 0000000000..b61813a9f4 --- /dev/null +++ b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua @@ -0,0 +1,239 @@ +local jit = require"jit" +local jit_util = require"jit.util" +local format = string.format + +if not pcall(require, "jit.opt") then + return +end + +local fhot = 56 * 2 +local lhot = 56 +local func_penalty = 36 * 2 +local loop_penalty = 36 +local maxattemps_func = 11 +local maxattemps_loop = 11 +local random_backoff = 15 + +local function calln(f, n, ...) + for i = 1, n do + f(...) + end +end +jit.off(calln) + +local function forcejoff(f, ...) + jit.off(f) + calln(f, 130, ...) + return +end +jit.off(forcejoff) + +local function nop() end +jit.off(nop) + +local function force_nohotcount() + for i = 0, lhot + 1 do + calln(nop, 1) + end + for i = 0, lhot + 1 do + forcejoff(nop, 1) + end +end +jit.off(force_nohotcount) +force_nohotcount() + +local tstarts, tstops, taborts = 0, 0, 0 + +local function reset_tracestats() + tstarts, tstops, taborts = 0, 0, 0 +end +force_nohotcount(reset_tracestats) + +local function trace_event(event) + if event == "start" then + tstarts = tstarts + 1 + elseif event == "stop" then + tstops = tstops + 1 + elseif event == "abort" then + taborts = taborts + 1 + end +end + +jit.attach(trace_event, "trace") + +local function calltill_trace(f, n, ...) + local currtrace = tstarts + for i = 1, n do + f(...) + if tstarts > currtrace then + return i + end + end +end +jit.off(calltill_trace) + +local function teststart() + reset_tracestats() + jit.off() + jit.on() +end +force_nohotcount(teststart) + +do --- hotcounter function + teststart() + local function f1() return 1 end + jit.sethot(f1, fhot-1) + + f1() + assert(tstarts == 0, tstarts) + + calln(f1, fhot - 2) + assert(tstarts == 0, tstarts) + + -- Counter should be zero so this call triggers a trace + f1() + assert(tstarts == 1, tstarts) + assert(tstops == 1, tstops) + + -- Call it after its JIT'ed to make sure its not creating another trace + calln(f1, fhot * 3) + assert(tstarts == 1, tstarts) + assert(tstops == 1, tstops) +end + +do --- hotcounter loop + teststart() + local function f1(n) + local a = 0 + for i = 1, n do a = a + 1 end + return a + end + jit.sethot(f1, lhot-1, 1) + + f1(1) + assert(tstarts == 0, tstarts) + + -- Run the loop with an out of range count. no loop counts should change + f1(-1) + assert(tstarts == 0, tstarts) + + -- The loop hot counter should be zero after this call + f1(lhot - 2) + assert(tstarts == 0, tstarts) + + f1(3) + assert(tstarts == 1 and tstops == 1, tstarts) +end + +do --- backoff fuctions + teststart() + local function f1(loopn) + if loopn then + local a = 0 + -- Should abort root trace + nop() + return a + else + return 1 + end + end + jit.sethot(f1, fhot-1) + + calln(f1, fhot - 1) + assert(tstarts == 0, tstarts) + + -- Trigger first trace attempt that aborts hitting a inner loop + f1(2) + assert(tstarts == 1 and taborts == 1, tstarts) + + -- Decrement the hot counter to 0 or near it depending on random back off + calln(f1, func_penalty) + assert(tstarts == 1 and taborts == 1, tstarts) + + -- Should trigger a second trace attempt that succeeds + calln(f1, random_backoff + 1) + assert(tstarts == 2, tstarts) + assert(tstops == 1, tstops) + assert(taborts == 1, taborts) +end + +do --- backoff loop + teststart() + local function f1(n, abort) + local a = 0 + for i = 1, n do + a = a + 1 + if abort then + nop() + end + end + return a + end + + f1(lhot - 1) + assert(tstarts == 0, tstarts) + -- Trigger first trace attempt that aborts + f1(3, true) + assert(tstarts == 1 and taborts == 1, tstarts) + + -- Decrement the hot counter to 0 or near it depending on random back off + f1(loop_penalty - 3) + assert(tstarts == 1 and taborts == 1, tstarts) + + -- Trigger the next trace attempt that should succeed + f1(16) + assert(tstarts == 2 and tstops == 1 and taborts == 1, tstarts) +end + +do --- blacklist function + teststart() + local function f1(abort) + if abort then + local a = 0 + -- Should abort root trace + nop() + return a + else + return 1 + end + end + + calln(f1, fhot - 1) + assert(tstarts == 0) + + -- Trigger first abort + f1(true) + assert(tstarts == 1 and taborts == 1) + + calln(f1, func_penalty) + assert(tstarts == 1 and taborts == 1) + + -- Trigger second abort + f1(true) + assert(tstarts == 2 and taborts == 2) + + local count = func_penalty*2+1 + for i=2, maxattemps_func - 2 do + calln(f1, count) + assert(taborts == i, taborts .. i) + assert(tstarts == i, tstarts .. i) + + -- trigger the trace + local loopi = calltill_trace(f1, random_backoff*2, true) + assert(loopi <= random_backoff, loopi) + assert(tstarts == i + 1, tstarts.." != "..(i + 1)) + assert(taborts == i + 1, (taborts .. (i + 1))) + -- The random offset is added after the current count is doubled from an abort + count = (count + loopi-1) * 2 + end + assert(tstarts == maxattemps_func - 1 and taborts == maxattemps_func - 1) + + -- Trigger the last abort which should get the function blacklisted + calln(f1, count + random_backoff, true) + assert(tstarts == maxattemps_func and taborts == maxattemps_func, taborts) + + -- Should not get any more traces created now its blacklisted + calln(f1, 0xffff, true) + assert(tstarts == maxattemps_func, tstarts) + assert(taborts == maxattemps_func, taborts) +end diff --git a/tests/LuaJIT-test-cleanup/test/trace/index b/tests/LuaJIT-test-cleanup/test/trace/index index ea7a22e066..d6afb32bcd 100644 --- a/tests/LuaJIT-test-cleanup/test/trace/index +++ b/tests/LuaJIT-test-cleanup/test/trace/index @@ -5,3 +5,4 @@ gc64_slot_revival.lua phi snap.lua stitch.lua +hotcounters.lua \ No newline at end of file From 069a9db0aafe5bcd9a136be8d4380a44ce3d0978 Mon Sep 17 00:00:00 2001 From: fsfod Date: Fri, 26 Jan 2018 21:25:22 +0000 Subject: [PATCH 2/8] Add a new bytecode BC_LOOPHC that will store separate per loop hot counters The loop hot count will be stored in the 16 bit D part of the opcode and the opcode is emitted directly after either BC_LOOP, BC_FORL or BC_ITERL depending on what kind of loop its being generated. These three opcodes do the hot counting for loops so the hot count should be right next to them in memory. Update embedded bytecode in buildvm_libbc.h for bytecode changing from BC_LOOPHC --- src/host/buildvm_libbc.h | 70 ++++++++++++++++++++-------------------- src/lj_bc.h | 1 + src/lj_parse.c | 20 ++++++++++-- src/lj_record.c | 3 ++ src/vm_x64.dasc | 1 + src/vm_x86.dasc | 1 + 6 files changed, 59 insertions(+), 37 deletions(-) diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h index b2600bd590..179512d265 100644 --- a/src/host/buildvm_libbc.h +++ b/src/host/buildvm_libbc.h @@ -6,39 +6,39 @@ static const uint8_t libbc_code[] = { #if LJ_FR2 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, -16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, -0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1, -128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2, -0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7, -0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, -0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, -8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, -0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, -0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, -3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, -0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, -41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, -18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, -6,252,127,76,4,2,0,0 +16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,89,7,1, +128,76,6,2,0,79,2,248,127,88,0,60,0,75,0,1,0,0,2,11,0,0,0,17,16,0,12,0,16,1, +9,0,43,2,0,0,18,3,0,0,41,4,0,0,89,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7, +3,2,10,7,0,0,89,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,88,0,60,0,75,0,1,0,0, +1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,31,16,0,12,0,21,2,0,0, +11,1,0,0,89,3,7,128,8,2,0,0,89,3,24,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0, +89,3,19,128,16,1,14,0,41,3,1,0,3,3,1,0,89,3,15,128,3,1,2,0,89,3,13,128,59,3, +1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252, +127,88,0,60,0,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,37,16,0, +12,0,16,1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,89,5,1,128,18,4,0,0,16,4,12,0,3, +1,2,0,89,5,26,128,33,5,1,3,0,2,3,0,89,6,4,128,2,3,1,0,89,6,2,128,4,4,0,0,89, +6,10,128,18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10, +4,79,6,252,127,88,0,60,0,89,6,9,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128, +32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,0,60,0,76,4,2,0,0 #else 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, -16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, -0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1, -128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2, -0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0, -0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, -0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, -8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, -0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, -0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, -2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, -3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, -0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, -41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, -18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, -6,252,127,76,4,2,0,0 +16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,16,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,89,7,1, +128,76,6,2,0,79,2,248,127,88,0,60,0,75,0,1,0,0,2,10,0,0,0,17,16,0,12,0,16,1, +9,0,43,2,0,0,18,3,0,0,41,4,0,0,89,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3, +2,10,7,0,0,89,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,88,0,60,0,75,0,1,0,0,1, +2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,31,16,0,12,0,21,2,0,0,11, +1,0,0,89,3,7,128,8,2,0,0,89,3,24,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,89, +3,19,128,16,1,14,0,41,3,1,0,3,3,1,0,89,3,15,128,3,1,2,0,89,3,13,128,59,3,1, +0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252, +127,88,0,60,0,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,37,16,0, +12,0,16,1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,89,5,1,128,18,4,0,0,16,4,12,0,3, +1,2,0,89,5,26,128,33,5,1,3,0,2,3,0,89,6,4,128,2,3,1,0,89,6,2,128,4,4,0,0,89, +6,10,128,18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10, +4,79,6,252,127,88,0,60,0,89,6,9,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128, +32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,0,60,0,76,4,2,0,0 #endif }; @@ -47,10 +47,10 @@ static const struct { const char *name; int ofs; } libbc_map[] = { {"math_rad",25}, {"string_len",50}, {"table_foreachi",69}, -{"table_foreach",136}, -{"table_getn",207}, -{"table_remove",226}, -{"table_move",355}, -{NULL,502} +{"table_foreach",140}, +{"table_getn",215}, +{"table_remove",234}, +{"table_move",367}, +{NULL,522} }; diff --git a/src/lj_bc.h b/src/lj_bc.h index 69a45f281e..ff01e304fa 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h @@ -183,6 +183,7 @@ _(LOOP, rbase, ___, jump, ___) \ _(ILOOP, rbase, ___, jump, ___) \ _(JLOOP, rbase, ___, lit, ___) \ + _(LOOPHC, ___, ___, lit, ___) \ \ _(JMP, rbase, ___, jump, ___) \ \ diff --git a/src/lj_parse.c b/src/lj_parse.c index c165234a14..0cf85eef91 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -2351,14 +2351,25 @@ static void parse_break(LexState *ls) gola_new(ls, NAME_BREAK, VSTACK_GOTO, bcemit_jmp(ls->fs)); } +static void emit_loophotcount(FuncState *fs) +{ +#if LJ_HASJIT + bcemit_AD(fs, BC_LOOPHC, 0, L2J(fs->L)->param[JIT_P_hotloop] - 1); +#else + bcemit_AD(fs, BC_LOOPHC, 0, 0); +#endif +} + /* Parse 'goto' statement. */ static void parse_goto(LexState *ls) { FuncState *fs = ls->fs; GCstr *name = lex_str(ls); VarInfo *vl = gola_findlabel(ls, name); - if (vl) /* Treat backwards goto within same scope like a loop. */ + if (vl) { /* Treat backwards goto within same scope like a loop. */ bcemit_AJ(fs, BC_LOOP, vl->slot, -1); /* No BC range check. */ + emit_loophotcount(fs); + } fs->bl->flags |= FSCOPE_GOLA; gola_new(ls, name, VSTACK_GOTO, bcemit_jmp(fs)); } @@ -2419,6 +2430,7 @@ static void parse_while(LexState *ls, BCLine line) fscope_begin(fs, &bl, FSCOPE_LOOP); lex_check(ls, TK_do); loop = bcemit_AD(fs, BC_LOOP, fs->nactvar, 0); + emit_loophotcount(fs); parse_block(ls); jmp_patch(fs, bcemit_jmp(fs), start); lex_match(ls, TK_end, TK_while, line); @@ -2438,6 +2450,7 @@ static void parse_repeat(LexState *ls, BCLine line) fscope_begin(fs, &bl2, 0); /* Inner scope. */ lj_lex_next(ls); /* Skip 'repeat'. */ bcemit_AD(fs, BC_LOOP, fs->nactvar, 0); + emit_loophotcount(fs); parse_chunk(ls); lex_match(ls, TK_until, TK_repeat, line); condexit = expr_cond(ls); /* Parse condition (still inside inner scope). */ @@ -2487,9 +2500,10 @@ static void parse_for_num(LexState *ls, GCstr *varname, BCLine line) fscope_end(fs); /* Perform loop inversion. Loop control instructions are at the end. */ loopend = bcemit_AJ(fs, BC_FORL, base, NO_JMP); + emit_loophotcount(fs); fs->bcbase[loopend].line = line; /* Fix line for control ins. */ jmp_patchins(fs, loopend, loop+1); - jmp_patchins(fs, loop, fs->pc); + jmp_patchins(fs, loop, fs->pc-1); } /* Try to predict whether the iterator is next() and specialize the bytecode. @@ -2561,6 +2575,8 @@ static void parse_for_iter(LexState *ls, GCstr *indexname) jmp_patchins(fs, loop, fs->pc); bcemit_ABC(fs, isnext ? BC_ITERN : BC_ITERC, base, nvars-3+1, 2+1); loopend = bcemit_AJ(fs, BC_ITERL, base, NO_JMP); + emit_loophotcount(fs); + fs->bcbase[loopend-1].line = line; /* Fix line for control ins. */ fs->bcbase[loopend].line = line; jmp_patchins(fs, loopend, loop+1); diff --git a/src/lj_record.c b/src/lj_record.c index 5e5d524f49..9b76b635ed 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -2415,6 +2415,9 @@ void lj_record_ins(jit_State *J) lj_trace_err(J, LJ_TRERR_BLACKL); break; + case BC_LOOPHC: + break; + case BC_JMP: if (ra < J->maxslot) J->maxslot = ra; /* Shrink used slots. */ diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index caa49d45b6..13259560de 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -4474,6 +4474,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. break; + case BC_LOOPHC: case BC_ILOOP: | ins_A // RA = base, RD = target (loop extent) | ins_next diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 41260d6407..f21ab1f21c 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -5249,6 +5249,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. break; + case BC_LOOPHC: case BC_ILOOP: | ins_A // RA = base, RD = target (loop extent) | ins_next From 5f7a02e64aa04206c0b222654b134aaadd29f723 Mon Sep 17 00:00:00 2001 From: fsfod Date: Wed, 28 Mar 2018 15:20:27 +0100 Subject: [PATCH 3/8] Add skipping over of the LOOPHC bytecode to BC_ILOOP\BC_LOOP and BC_ITERL\BC_JITERL Skipping for BC_FORI NYI --- src/vm_x64.dasc | 27 +++++++++++++++++++++++---- src/vm_x86.dasc | 32 +++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 13259560de..1bc69ba021 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -210,10 +210,19 @@ | jmp aword [DISPATCH+OP*8] |.endmacro | +|.macro ins_NEXT2 +| mov RCd, [PC+4] +| movzx RAd, RCH +| movzx OP, RCL +| add PC, 8 +| shr RCd, 16 +| jmp aword [DISPATCH+OP*8] +|.endmacro |// Instruction footer. |.if 1 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | .define ins_next, ins_NEXT +| .define ins_next2, ins_NEXT2 | .define ins_next_, ins_NEXT |.else | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. @@ -222,6 +231,9 @@ | .macro ins_next | jmp ->ins_next | .endmacro +| .macro ins_next2 +| jmp ->ins_next2 +| .endmacro | .macro ins_next_ | ->ins_next: | ins_NEXT @@ -4452,16 +4464,21 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_AJ // RA = base, RD = target | lea RA, [BASE+RA*8] | mov RB, [RA] - | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + | cmp RB, LJ_TNIL; je >2 // Stop if iterator returned nil. if (op == BC_JITERL) { | mov [RA-8], RB | jmp =>BC_JLOOP + |2: + | ins_next2 // Skips over BC_LOOPHC } else { | branchPC RD // Otherwise save control var + branch. | mov [RA-8], RB + |1: + | ins_next + |2: + | add PC, 4 // Skip over BC_LOOPHC + | jmp <1 } - |1: - | ins_next break; case BC_LOOP: @@ -4474,9 +4491,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. break; - case BC_LOOPHC: case BC_ILOOP: | ins_A // RA = base, RD = target (loop extent) + | ins_next2 + break; + case BC_LOOPHC: | ins_next break; diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index f21ab1f21c..73c581a81e 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -345,11 +345,24 @@ | jmp aword [DISPATCH+OP*4] |.endif |.endmacro +|.macro ins_NEXT2 +| mov RC, [PC+4] +| movzx RA, RCH +| movzx OP, RCL +| add PC, 8 +| shr RC, 16 +|.if X64 +| jmp aword [DISPATCH+OP*8] +|.else +| jmp aword [DISPATCH+OP*4] +|.endif +|.endmacro | |// Instruction footer. |.if 1 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | .define ins_next, ins_NEXT +| .define ins_next2, ins_NEXT2 | .define ins_next_, ins_NEXT |.else | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. @@ -358,6 +371,9 @@ | .macro ins_next | jmp ->ins_next | .endmacro +| .macro ins_next2 +| jmp ->ins_next2 +| .endmacro | .macro ins_next_ | ->ins_next: | ins_NEXT @@ -5223,20 +5239,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | ins_AJ // RA = base, RD = target | lea RA, [BASE+RA*8] | mov RB, [RA+4] - | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. + | cmp RB, LJ_TNIL; je >2 // Stop if iterator returned nil. if (op == BC_JITERL) { | mov [RA-4], RB | mov RB, [RA] | mov [RA-8], RB | jmp =>BC_JLOOP + |2: + | ins_next2 // Skips over BC_LOOPHC } else { | branchPC RD // Otherwise save control var + branch. | mov RD, [RA] | mov [RA-4], RB | mov [RA-8], RD + |1: + | ins_next + |2: + | add PC, 4 // Skip over BC_LOOPHC + | jmp <1 } - |1: - | ins_next break; case BC_LOOP: @@ -5248,10 +5269,11 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) |.endif | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. break; - - case BC_LOOPHC: case BC_ILOOP: | ins_A // RA = base, RD = target (loop extent) + | ins_next2 + break; + case BC_LOOPHC: | ins_next break; From 7a03f51f269d59ff9e41e0efaa8968f906023265 Mon Sep 17 00:00:00 2001 From: fsfod Date: Fri, 20 Jul 2018 14:04:07 +0100 Subject: [PATCH 4/8] Implement separate per loop hot counters in the interpreter using BC_LOOPHC to store the count --- src/lib_jit.c | 14 +-- src/lj_dispatch.c | 4 +- src/lj_dispatch.h | 8 +- src/lj_trace.c | 11 ++- src/lj_trace.h | 2 +- src/vm_x64.dasc | 5 +- src/vm_x86.dasc | 5 +- .../test/trace/hotcounters.lua | 93 ++++++++++++++++++- 8 files changed, 115 insertions(+), 27 deletions(-) diff --git a/src/lib_jit.c b/src/lib_jit.c index 8e285b5b1b..376ad2e6bd 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -105,13 +105,6 @@ LJLIB_CF(jit_sethot) GCproto *pt = check_Lproto(L, 0); int32_t count = lj_lib_checkint(L, 2); int32_t loopid = lj_lib_optint(L, 3, -1); - /* - ** Loops decrement the count by two instead of one like functions when using - ** shared hot counters. - */ - if (loopid != -1) { - count = count * 2; - } if (count < 0 || count > 0xffff) { luaL_error(L, "bad hot count value"); @@ -126,13 +119,12 @@ LJLIB_CF(jit_sethot) BCIns *bc = proto_bc(pt); MSize hci = 0, i = 0; for (i = 0; i != pt->sizebc; i++) { - int iscountbc = bc_op(bc[i]) == BC_FORL || bc_op(bc[i]) == BC_ITERL || - bc_op(bc[i]) == BC_LOOP; + int iscountbc = bc_op(bc[i]) == BC_LOOPHC; if (iscountbc) { if (++hci == loopid) { BCIns *hcbc = bc + i; - int old = hotcount_get(L2GG(L), hcbc); - hotcount_set(L2GG(L), hcbc, count); + int old = hotcount_loop_get(hcbc-1); + hotcount_loop_set(hcbc-1, count); setintV(L->top-1, old); return 1; } diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c index 5d6795f88e..eb75abb812 100644 --- a/src/lj_dispatch.c +++ b/src/lj_dispatch.c @@ -464,7 +464,7 @@ static int call_init(lua_State *L, GCfunc *fn) } /* Call dispatch. Used by call hooks, hot calls or when recording. */ -ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) +ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, BCIns *pc) { ERRNO_SAVE GCfunc *fn = curr_func(L); @@ -480,7 +480,7 @@ ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns *pc) #ifdef LUA_USE_ASSERT ptrdiff_t delta = L->top - L->base; #endif - pc = (const BCIns *)((uintptr_t)pc & ~(uintptr_t)1); + pc = (BCIns *)((uintptr_t)pc & ~(uintptr_t)1); lj_trace_hot(J, pc); lua_assert(L->top - L->base == delta); goto out; diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index 5bda51a213..de5b4ff5ed 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -119,6 +119,12 @@ typedef struct GG_State { #define hotcount_set(gg, pc, val) \ (hotcount_get((gg), (pc)) = (HotCount)(val)) +#define hotcount_loop_get(pc) \ + ((HotCount)(((pc)[1]) >> 16)) + +#define hotcount_loop_set(pc, val) \ + (pc)[1] = (((pc)[1] & 0xffff) | ((val) << 16)) + /* Dispatch table management. */ LJ_FUNC void lj_dispatch_init(GG_State *GG); #if LJ_HASJIT @@ -128,7 +134,7 @@ LJ_FUNC void lj_dispatch_update(global_State *g); /* Instruction dispatch callback for hooks or when recording. */ LJ_FUNCA void LJ_FASTCALL lj_dispatch_ins(lua_State *L, const BCIns *pc); -LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, const BCIns*pc); +LJ_FUNCA ASMFunction LJ_FASTCALL lj_dispatch_call(lua_State *L, BCIns*pc); #if LJ_HASJIT LJ_FUNCA void LJ_FASTCALL lj_dispatch_stitch(jit_State *J, const BCIns *pc); #endif diff --git a/src/lj_trace.c b/src/lj_trace.c index 2a19e3a777..d556cabea8 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -396,9 +396,12 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) J->penalty[i].reason = e; /* If the pc is the function header set the hot count in the proto */ if (proto_bcpos(pt, pc) == 0) { + lua_assert(val == PENALTY_MIN || val > pt->hotcount); pt->hotcount = val; } else { - hotcount_set(J2GG(J), pc+1, val); + lua_assert(bc_op(pc[1]) == BC_LOOPHC); + lua_assert(val == PENALTY_MIN || val > hotcount_loop_get(pc)); + hotcount_loop_set(pc, val); } } @@ -738,7 +741,7 @@ void lj_trace_ins(jit_State *J, const BCIns *pc) } /* A hotcount triggered. Start recording a root trace. */ -void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc) +void LJ_FASTCALL lj_trace_hot(jit_State *J, BCIns *pc) { /* Note: pc is the interpreter bytecode PC here. It's offset by 1. */ ERRNO_SAVE @@ -748,7 +751,9 @@ void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc) lua_assert(pt->hotcount == 0xffff); pt->hotcount = J->param[JIT_P_hotfunc] - 1; } else { - hotcount_set(J2GG(J), pc, J->param[JIT_P_hotloop]*HOTCOUNT_LOOP); + BCIns *loop = pc-1; + lua_assert(hotcount_loop_get(loop) == 0xffff); + hotcount_loop_set(loop, J->param[JIT_P_hotloop] - 1); } /* Only start a new trace if not recording or inside __gc call or vmevent. */ diff --git a/src/lj_trace.h b/src/lj_trace.h index 22cae741f3..07dcd9c373 100644 --- a/src/lj_trace.h +++ b/src/lj_trace.h @@ -34,7 +34,7 @@ LJ_FUNC void lj_trace_freestate(global_State *g); /* Event handling. */ LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); -LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); +LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, BCIns *pc); LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 1bc69ba021..1d8a8b0543 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -334,10 +334,7 @@ | |// Decrement hashed hotcount and trigger trace recorder if zero. |.macro hotloop, reg -| mov reg, PCd -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP +| sub word [PCd+2], 2 | jb ->vm_hotloop |.endmacro | diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 73c581a81e..019f8182bc 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -429,10 +429,7 @@ | |// Decrement hashed hotcount and trigger trace recorder if zero. |.macro hotloop, reg -| mov reg, PC -| shr reg, 1 -| and reg, HOTCOUNT_PCMASK -| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP +| sub word [PC+2], 2 | jb ->vm_hotloop |.endmacro | diff --git a/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua index b61813a9f4..546fd27aad 100644 --- a/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua +++ b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua @@ -120,9 +120,46 @@ do --- hotcounter loop -- The loop hot counter should be zero after this call f1(lhot - 2) assert(tstarts == 0, tstarts) - + f1(3) + assert(tstarts == 1, tstarts) + assert(tstops == 1, tstops) +end + +do --- hotcounters multiloop + teststart() + local function f1(n, m) + local a = 0 + for i = 1, n do a = a + 1 end + local b = 0 + for i = 1, m do b = b + 1 end + return a, b + end + + f1(1, -1) + assert(tstarts == 0, tstarts) + + f1(-1, 2) + assert(tstarts == 0, tstarts) + + -- Fist loop hot counter should be zero after this call + f1(lhot-2, -1) + assert(tstarts == 0, tstarts) + + f1(3, 2) + assert(tstarts == 1 and tstops == 1, tstarts) + + -- Second loop hot counter should be zero after this call + f1(-1, lhot - 5) assert(tstarts == 1 and tstops == 1, tstarts) + + -- Jit the second loop + f1(-1, 4) + assert(tstarts == 2 and tstops == 2, tstarts) + + -- Both loops should be jit'ed not hot counting anymore + f1(lhot * 3, lhot * 3) + assert(tstarts == 2 and tstops == 2, tstarts) end do --- backoff fuctions @@ -185,6 +222,60 @@ do --- backoff loop assert(tstarts == 2 and tstops == 1 and taborts == 1, tstarts) end +do --- blacklist loop + teststart() + local function f1(n, abort) + local a = 0 + local prev_abort = taborts + for i = 1, n do + a = a + 1 + if abort then + -- Force an abort from calling a blacklisted function + nop() + if taborts ~= prev_abort then + return i + end + end + end + return a + end + + f1(lhot - 1) + assert(tstarts == 0) + + f1(1, true) + assert(tstarts == 1 and taborts == 1) + + local count = loop_penalty + local rand_total = 0 + for i=1, maxattemps_loop - 2 do + f1(count - 1) + assert(tstarts == i and taborts == i, tstarts .. i) + + -- Trigger another trace abort + f1(rand_total + 2, true) + assert(tstarts == i + 1 and taborts == i + 1, taborts .. (i + 1)) + + count = count * 2 + -- The random offset is added after the current count is doubled from an abort + if rand_total == 0 then + rand_total = random_backoff + else + rand_total = rand_total * 2 + random_backoff + end + end + assert(tstarts == maxattemps_loop - 1 and taborts == maxattemps_loop - 1) + + -- Last abort should blacklist the function + f1(count + rand_total, true) + assert(tstarts == maxattemps_loop) + assert(taborts == maxattemps_loop) + + f1(0xffff * 2) + assert(tstarts == maxattemps_loop, tstarts) + assert(taborts == maxattemps_loop, taborts) +end + do --- blacklist function teststart() local function f1(abort) From 8c7db3e6b530da944ad9b87f4a1ddabfd267d62d Mon Sep 17 00:00:00 2001 From: fsfod Date: Fri, 20 Jul 2018 15:47:21 +0100 Subject: [PATCH 5/8] Separate the hot counter penalty values for functions and loops and make them JIT params Make the loop hot counters decrement by one instead of two for consistently now that the counter backoff scaling values are not shared with functions. Use some non default max trace attempt counts for loops and functions in the unit tests to test out penaltymaxfunc and penaltymaxloop JIT parameters --- src/lj_jit.h | 6 ++++-- src/lj_record.c | 2 +- src/lj_trace.c | 13 +++++++----- src/vm_x64.dasc | 2 +- src/vm_x86.dasc | 2 +- .../test/trace/hotcounters.lua | 21 +++++++++++++++++-- 6 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/lj_jit.h b/src/lj_jit.h index a99a402790..611376a3fa 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -106,6 +106,10 @@ \ _(\007, hotloop, 56) /* # of iter. to detect a hot loop. */ \ _(\007, hotfunc, 56*2) /* # of iter. to detect a hot function. */ \ + _(\013, penaltyloop, 36) /*. initial penalty hot counter back off value */ \ + _(\013, penaltyfunc, (36*2)) /* initial penalty hot counter back off value */ \ + _(\016, penaltymaxloop, 25000) /* max the loop hot count backoff can be increased to */ \ + _(\016, penaltymaxfunc, 50000) /* max the function hot count backoff can be increased to */ \ _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \ \ @@ -289,8 +293,6 @@ typedef struct HotPenalty { } HotPenalty; #define PENALTY_SLOTS 64 /* Penalty cache slot. Must be a power of 2. */ -#define PENALTY_MIN (36*2) /* Minimum penalty value. */ -#define PENALTY_MAX 60000 /* Maximum penalty value. */ #define PENALTY_RNDBITS 4 /* # of random bits to add to penalty value. */ /* Round-robin backpropagation cache for narrowing conversions. */ diff --git a/src/lj_record.c b/src/lj_record.c index 9b76b635ed..0c2eba1bcb 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -555,7 +555,7 @@ static int innerloopleft(jit_State *J, const BCIns *pc) if (mref(J->penalty[i].pc, const BCIns) == pc) { if ((J->penalty[i].reason == LJ_TRERR_LLEAVE || J->penalty[i].reason == LJ_TRERR_LINNER) && - J->penalty[i].val >= 2*PENALTY_MIN) + J->penalty[i].val >= (J->param[JIT_P_penaltyloop]*2)) return 1; break; } diff --git a/src/lj_trace.c b/src/lj_trace.c index d556cabea8..7ee2d34bbf 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -375,13 +375,16 @@ static void blacklist_pc(GCproto *pt, BCIns *pc) /* Penalize a bytecode instruction. */ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) { - uint32_t i, val = PENALTY_MIN; + int isloop = proto_bcpos(pt, pc) != 0; + uint32_t i, val = J->param[isloop ? JIT_P_penaltyloop : JIT_P_penaltyfunc]; + uint32_t maxval = (uint32_t)J->param[isloop ? JIT_P_penaltymaxloop : + JIT_P_penaltymaxfunc]; for (i = 0; i < PENALTY_SLOTS; i++) if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ /* First try to bump its hotcount several times. */ val = ((uint32_t)J->penalty[i].val << 1) + LJ_PRNG_BITS(J, PENALTY_RNDBITS); - if (val > PENALTY_MAX) { + if (val > maxval) { blacklist_pc(pt, pc); /* Blacklist it, if that didn't help. */ return; } @@ -395,12 +398,12 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) J->penalty[i].val = (uint16_t)val; J->penalty[i].reason = e; /* If the pc is the function header set the hot count in the proto */ - if (proto_bcpos(pt, pc) == 0) { - lua_assert(val == PENALTY_MIN || val > pt->hotcount); + if (!isloop) { + lua_assert(val == J->param[JIT_P_penaltyfunc] || val > pt->hotcount); pt->hotcount = val; } else { lua_assert(bc_op(pc[1]) == BC_LOOPHC); - lua_assert(val == PENALTY_MIN || val > hotcount_loop_get(pc)); + lua_assert(val == J->param[JIT_P_penaltyloop] || val > hotcount_loop_get(pc)); hotcount_loop_set(pc, val); } } diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index 1d8a8b0543..f28b78db21 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -334,7 +334,7 @@ | |// Decrement hashed hotcount and trigger trace recorder if zero. |.macro hotloop, reg -| sub word [PCd+2], 2 +| sub word [PCd+2], 1 | jb ->vm_hotloop |.endmacro | diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 019f8182bc..0f63193998 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -429,7 +429,7 @@ | |// Decrement hashed hotcount and trigger trace recorder if zero. |.macro hotloop, reg -| sub word [PC+2], 2 +| sub word [PC+2], 1 | jb ->vm_hotloop |.endmacro | diff --git a/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua index 546fd27aad..1378a12420 100644 --- a/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua +++ b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua @@ -10,10 +10,27 @@ local fhot = 56 * 2 local lhot = 56 local func_penalty = 36 * 2 local loop_penalty = 36 -local maxattemps_func = 11 -local maxattemps_loop = 11 +local maxattemps_func = 9 +local maxattemps_loop = 6 local random_backoff = 15 +local function getmaxcount(attempts, penalty) + local count = penalty + 16 + for i = 1, attempts - 2 do + count = (count * 2) + 16 + if count > 0xffff then + error("Attempt count of "..count.." for attempt "..i.." is larger than the max value of uint16_t") + end + end + return count +end + +local countmax_func = getmaxcount(maxattemps_func, func_penalty) +local countmax_loop = getmaxcount(maxattemps_loop, loop_penalty) +print("penaltymaxfunc="..countmax_func, "penaltymaxloop="..countmax_loop) + +jit.opt.start("penaltymaxfunc="..countmax_func, "penaltymaxloop="..countmax_loop) + local function calln(f, n, ...) for i = 1, n do f(...) From dfdfcfec8fa97515c2e92cbc4bc7b15050481e9c Mon Sep 17 00:00:00 2001 From: fsfod Date: Sat, 3 Nov 2018 06:47:17 +0000 Subject: [PATCH 6/8] Make separate hot counters enabled under a define so as not to break other arch builds Add BC_LOOPHC under a define in lj_bc.h and lj_record.c Don't embed hot counter bytecode if not built with separate hot counters Scale penalty values by 2 when separate counters is disabled --- src/Makefile | 3 + src/Makefile.dep | 13 ++-- src/host/buildvm_lib.c | 5 ++ src/host/buildvm_libbc.h | 70 +++++++++---------- src/host/buildvm_libbc_counters.h | 56 +++++++++++++++ src/lib_jit.c | 22 ++++-- src/lj_arch.h | 6 ++ src/lj_bc.h | 8 ++- src/lj_bcread.c | 2 +- src/lj_dispatch.h | 19 +++-- src/lj_jit.h | 15 +++- src/lj_parse.c | 6 +- src/lj_record.c | 6 +- src/lj_trace.c | 42 +++++++---- src/msvcbuild.bat | 4 +- src/vm_x64.dasc | 29 +++++++- src/vm_x86.dasc | 30 +++++++- .../test/trace/hotcounters.lua | 13 +++- 18 files changed, 270 insertions(+), 79 deletions(-) create mode 100644 src/host/buildvm_libbc_counters.h diff --git a/src/Makefile b/src/Makefile index 71ca028cde..a02775ac46 100644 --- a/src/Makefile +++ b/src/Makefile @@ -426,6 +426,9 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) DASM_AFLAGS+= -D NO_UNWIND TARGET_ARCH+= -DLUAJIT_NO_UNWIND endif +ifneq (,$(findstring LJ_SEPARATE_COUNTERS 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SEPARATE_COUNTERS +endif DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) ifeq (Windows,$(TARGET_SYS)) DASM_AFLAGS+= -D WIN diff --git a/src/Makefile.dep b/src/Makefile.dep index 2b1cb5ef29..3e5fab7b9e 100644 --- a/src/Makefile.dep +++ b/src/Makefile.dep @@ -54,9 +54,9 @@ lj_asm.o: lj_asm.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_gc.h \ lj_bc.o: lj_bc.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h lj_bc.h \ lj_bcdef.h lj_bcread.o: lj_bcread.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ - lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h \ - lj_ctype.h lj_cdata.h lualib.h lj_lex.h lj_bcdump.h lj_state.h \ - lj_strfmt.h + lj_gc.h lj_err.h lj_errmsg.h lj_buf.h lj_str.h lj_tab.h lj_bc.h lj_jit.h \ + lj_ir.h lj_dispatch.h lj_ctype.h lj_cdata.h lualib.h lj_lex.h \ + lj_bcdump.h lj_state.h lj_strfmt.h lj_bcwrite.o: lj_bcwrite.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_buf.h lj_str.h lj_bc.h lj_ctype.h lj_dispatch.h lj_jit.h \ lj_ir.h lj_strfmt.h lj_bcdump.h lj_lex.h lj_err.h lj_errmsg.h lj_vm.h @@ -162,12 +162,11 @@ lj_opt_narrow.o: lj_opt_narrow.c lj_obj.h lua.h luaconf.h lj_def.h \ lj_opt_sink.o: lj_opt_sink.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_ir.h lj_jit.h lj_iropt.h lj_target.h lj_target_*.h lj_opt_split.o: lj_opt_split.c lj_obj.h lua.h luaconf.h lj_def.h \ - lj_arch.h lj_err.h lj_errmsg.h lj_buf.h lj_gc.h lj_str.h lj_ir.h \ - lj_jit.h lj_ircall.h lj_iropt.h lj_dispatch.h lj_bc.h lj_vm.h + lj_arch.h lj_parse.o: lj_parse.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_gc.h lj_err.h lj_errmsg.h lj_debug.h lj_buf.h lj_str.h lj_tab.h \ lj_func.h lj_state.h lj_bc.h lj_ctype.h lj_strfmt.h lj_lex.h lj_parse.h \ - lj_vm.h lj_vmevent.h + lj_vm.h lj_vmevent.h lj_dispatch.h lj_jit.h lj_ir.h lj_profile.o: lj_profile.c lj_obj.h lua.h luaconf.h lj_def.h lj_arch.h \ lj_buf.h lj_gc.h lj_str.h lj_frame.h lj_bc.h lj_debug.h lj_dispatch.h \ lj_jit.h lj_ir.h lj_trace.h lj_traceerr.h lj_profile.h luajit.h @@ -240,7 +239,7 @@ host/buildvm_fold.o: host/buildvm_fold.c host/buildvm.h lj_def.h lua.h \ luaconf.h lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_ir.h lj_obj.h host/buildvm_lib.o: host/buildvm_lib.c host/buildvm.h lj_def.h lua.h luaconf.h \ lj_arch.h lj_obj.h lj_def.h lj_arch.h lj_bc.h lj_lib.h lj_obj.h \ - host/buildvm_libbc.h + host/buildvm_libbc.h host/buildvm_libbc_counters.h host/buildvm_peobj.o: host/buildvm_peobj.c host/buildvm.h lj_def.h lua.h \ luaconf.h lj_arch.h lj_bc.h lj_def.h lj_arch.h host/minilua.o: host/minilua.c diff --git a/src/host/buildvm_lib.c b/src/host/buildvm_lib.c index 2956fdb6cd..0b91170ebc 100644 --- a/src/host/buildvm_lib.c +++ b/src/host/buildvm_lib.c @@ -7,7 +7,12 @@ #include "lj_obj.h" #include "lj_bc.h" #include "lj_lib.h" + +#if LJ_SEPARATE_COUNTERS +#include "buildvm_libbc_counters.h" +#else #include "buildvm_libbc.h" +#endif /* Context for library definitions. */ static uint8_t obuf[8192]; diff --git a/src/host/buildvm_libbc.h b/src/host/buildvm_libbc.h index 179512d265..b2600bd590 100644 --- a/src/host/buildvm_libbc.h +++ b/src/host/buildvm_libbc.h @@ -6,39 +6,39 @@ static const uint8_t libbc_code[] = { #if LJ_FR2 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, -16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,41,2,1,0,21,3, -0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,89,7,1, -128,76,6,2,0,79,2,248,127,88,0,60,0,75,0,1,0,0,2,11,0,0,0,17,16,0,12,0,16,1, -9,0,43,2,0,0,18,3,0,0,41,4,0,0,89,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7, -3,2,10,7,0,0,89,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,88,0,60,0,75,0,1,0,0, -1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,31,16,0,12,0,21,2,0,0, -11,1,0,0,89,3,7,128,8,2,0,0,89,3,24,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0, -89,3,19,128,16,1,14,0,41,3,1,0,3,3,1,0,89,3,15,128,3,1,2,0,89,3,13,128,59,3, -1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252, -127,88,0,60,0,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,37,16,0, -12,0,16,1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,89,5,1,128,18,4,0,0,16,4,12,0,3, -1,2,0,89,5,26,128,33,5,1,3,0,2,3,0,89,6,4,128,2,3,1,0,89,6,2,128,4,4,0,0,89, -6,10,128,18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10, -4,79,6,252,127,88,0,60,0,89,6,9,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128, -32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,0,60,0,76,4,2,0,0 +16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,88,7,1, +128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,11,0,0,0,16,16,0,12,0,16,1,9,0,43,2, +0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7,3,2,10,7, +0,0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, +0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, +8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, +0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, +0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, +2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, +3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, +0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, +41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, +18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, +6,252,127,76,4,2,0,0 #else 0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, 0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, -16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,16,16,0,12,0,16,1,9,0,41,2,1,0,21,3, -0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,89,7,1, -128,76,6,2,0,79,2,248,127,88,0,60,0,75,0,1,0,0,2,10,0,0,0,17,16,0,12,0,16,1, -9,0,43,2,0,0,18,3,0,0,41,4,0,0,89,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3, -2,10,7,0,0,89,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,88,0,60,0,75,0,1,0,0,1, -2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,31,16,0,12,0,21,2,0,0,11, -1,0,0,89,3,7,128,8,2,0,0,89,3,24,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,89, -3,19,128,16,1,14,0,41,3,1,0,3,3,1,0,89,3,15,128,3,1,2,0,89,3,13,128,59,3,1, -0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252, -127,88,0,60,0,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,37,16,0, -12,0,16,1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,89,5,1,128,18,4,0,0,16,4,12,0,3, -1,2,0,89,5,26,128,33,5,1,3,0,2,3,0,89,6,4,128,2,3,1,0,89,6,2,128,4,4,0,0,89, -6,10,128,18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10, -4,79,6,252,127,88,0,60,0,89,6,9,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128, -32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,0,60,0,76,4,2,0,0 +16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,15,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,88,7,1, +128,76,6,2,0,79,2,248,127,75,0,1,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,43,2, +0,0,18,3,0,0,41,4,0,0,88,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3,2,10,7,0, +0,88,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,75,0,1,0,0,1,2,0,0,0,3,16,0,12, +0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,30,16,0,12,0,21,2,0,0,11,1,0,0,88,3,7,128, +8,2,0,0,88,3,23,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,88,3,18,128,16,1,14, +0,41,3,1,0,3,3,1,0,88,3,14,128,3,1,2,0,88,3,12,128,59,3,1,0,22,4,1,1,18,5,2, +0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252,127,43,4,0,0,64,4, +2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,35,16,0,12,0,16,1,14,0,16,2,14,0,16, +3,14,0,11,4,0,0,88,5,1,128,18,4,0,0,16,4,12,0,3,1,2,0,88,5,24,128,33,5,1,3, +0,2,3,0,88,6,4,128,2,3,1,0,88,6,2,128,4,4,0,0,88,6,9,128,18,6,1,0,18,7,2,0, +41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,6,8,128, +18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10,4,79, +6,252,127,76,4,2,0,0 #endif }; @@ -47,10 +47,10 @@ static const struct { const char *name; int ofs; } libbc_map[] = { {"math_rad",25}, {"string_len",50}, {"table_foreachi",69}, -{"table_foreach",140}, -{"table_getn",215}, -{"table_remove",234}, -{"table_move",367}, -{NULL,522} +{"table_foreach",136}, +{"table_getn",207}, +{"table_remove",226}, +{"table_move",355}, +{NULL,502} }; diff --git a/src/host/buildvm_libbc_counters.h b/src/host/buildvm_libbc_counters.h new file mode 100644 index 0000000000..179512d265 --- /dev/null +++ b/src/host/buildvm_libbc_counters.h @@ -0,0 +1,56 @@ +/* This is a generated file. DO NOT EDIT! */ + +static const int libbc_endian = 0; + +static const uint8_t libbc_code[] = { +#if LJ_FR2 +0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, +0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, +16,0,5,0,21,1,0,0,76,1,2,0,0,2,10,0,0,0,16,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,8,5,0,59,9,5,0,66,6,3,2,10,6,0,0,89,7,1, +128,76,6,2,0,79,2,248,127,88,0,60,0,75,0,1,0,0,2,11,0,0,0,17,16,0,12,0,16,1, +9,0,43,2,0,0,18,3,0,0,41,4,0,0,89,5,7,128,18,7,1,0,18,9,5,0,18,10,6,0,66,7, +3,2,10,7,0,0,89,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,88,0,60,0,75,0,1,0,0, +1,2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,31,16,0,12,0,21,2,0,0, +11,1,0,0,89,3,7,128,8,2,0,0,89,3,24,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0, +89,3,19,128,16,1,14,0,41,3,1,0,3,3,1,0,89,3,15,128,3,1,2,0,89,3,13,128,59,3, +1,0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252, +127,88,0,60,0,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,37,16,0, +12,0,16,1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,89,5,1,128,18,4,0,0,16,4,12,0,3, +1,2,0,89,5,26,128,33,5,1,3,0,2,3,0,89,6,4,128,2,3,1,0,89,6,2,128,4,4,0,0,89, +6,10,128,18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10, +4,79,6,252,127,88,0,60,0,89,6,9,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128, +32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,0,60,0,76,4,2,0,0 +#else +0,1,2,0,0,1,2,24,1,0,0,76,1,2,0,241,135,158,166,3,220,203,178,130,4,0,1,2,0, +0,1,2,24,1,0,0,76,1,2,0,243,244,148,165,20,198,190,199,252,3,0,1,2,0,0,0,3, +16,0,5,0,21,1,0,0,76,1,2,0,0,2,9,0,0,0,16,16,0,12,0,16,1,9,0,41,2,1,0,21,3, +0,0,41,4,1,0,77,2,8,128,18,6,1,0,18,7,5,0,59,8,5,0,66,6,3,2,10,6,0,0,89,7,1, +128,76,6,2,0,79,2,248,127,88,0,60,0,75,0,1,0,0,2,10,0,0,0,17,16,0,12,0,16,1, +9,0,43,2,0,0,18,3,0,0,41,4,0,0,89,5,7,128,18,7,1,0,18,8,5,0,18,9,6,0,66,7,3, +2,10,7,0,0,89,8,1,128,76,7,2,0,70,5,3,3,82,5,247,127,88,0,60,0,75,0,1,0,0,1, +2,0,0,0,3,16,0,12,0,21,1,0,0,76,1,2,0,0,2,10,0,0,2,31,16,0,12,0,21,2,0,0,11, +1,0,0,89,3,7,128,8,2,0,0,89,3,24,128,59,3,2,0,43,4,0,0,64,4,2,0,76,3,2,0,89, +3,19,128,16,1,14,0,41,3,1,0,3,3,1,0,89,3,15,128,3,1,2,0,89,3,13,128,59,3,1, +0,22,4,1,1,18,5,2,0,41,6,1,0,77,4,4,128,23,8,1,7,59,9,7,0,64,9,8,0,79,4,252, +127,88,0,60,0,43,4,0,0,64,4,2,0,76,3,2,0,75,0,1,0,0,2,0,5,12,0,0,0,37,16,0, +12,0,16,1,14,0,16,2,14,0,16,3,14,0,11,4,0,0,89,5,1,128,18,4,0,0,16,4,12,0,3, +1,2,0,89,5,26,128,33,5,1,3,0,2,3,0,89,6,4,128,2,3,1,0,89,6,2,128,4,4,0,0,89, +6,10,128,18,6,1,0,18,7,2,0,41,8,1,0,77,6,4,128,32,10,5,9,59,11,9,0,64,11,10, +4,79,6,252,127,88,0,60,0,89,6,9,128,18,6,2,0,18,7,1,0,41,8,255,255,77,6,4,128, +32,10,5,9,59,11,9,0,64,11,10,4,79,6,252,127,88,0,60,0,76,4,2,0,0 +#endif +}; + +static const struct { const char *name; int ofs; } libbc_map[] = { +{"math_deg",0}, +{"math_rad",25}, +{"string_len",50}, +{"table_foreachi",69}, +{"table_foreach",140}, +{"table_getn",215}, +{"table_remove",234}, +{"table_move",367}, +{NULL,522} +}; + diff --git a/src/lib_jit.c b/src/lib_jit.c index 376ad2e6bd..b100439e74 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -105,26 +105,38 @@ LJLIB_CF(jit_sethot) GCproto *pt = check_Lproto(L, 0); int32_t count = lj_lib_checkint(L, 2); int32_t loopid = lj_lib_optint(L, 3, -1); + /* + ** Loops decrement the count by two instead of one like functions when using + ** shared hot counters. + */ + if (!LJ_SEPARATE_COUNTERS && loopid != -1) { + count = count * 2; + } if (count < 0 || count > 0xffff) { luaL_error(L, "bad hot count value"); } if (loopid == -1) { - int old = pt->hotcount; - pt->hotcount = count; + int old = hotcount_get_pt(L2GG(L), pt, proto_bc(pt)); + hotcount_set_pt(L2GG(L), pt, proto_bc(pt), count); setintV(L->top-1, old); return 1; } else if (loopid > 0) { BCIns *bc = proto_bc(pt); MSize hci = 0, i = 0; for (i = 0; i != pt->sizebc; i++) { +#if LJ_SEPARATE_COUNTERS int iscountbc = bc_op(bc[i]) == BC_LOOPHC; +#else + int iscountbc = bc_op(bc[i]) == BC_FORL || bc_op(bc[i]) == BC_ITERL || + bc_op(bc[i]) == BC_LOOP; +#endif if (iscountbc) { if (++hci == loopid) { - BCIns *hcbc = bc + i; - int old = hotcount_loop_get(hcbc-1); - hotcount_loop_set(hcbc-1, count); + BCIns *hcbc = bc + i - LJ_SEPARATE_COUNTERS; + int old = hotcount_get_loop(L2GG(L), hcbc); + hotcount_set_loop(L2GG(L), hcbc, count); setintV(L->top-1, old); return 1; } diff --git a/src/lj_arch.h b/src/lj_arch.h index fcebd84b28..5455ac4bc2 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -498,6 +498,12 @@ #define LJ_HASJIT 1 #endif +#if !LUAJIT_NO_SEPARATE_COUNTERS && LJ_HASJIT && LJ_TARGET_X86ORX64 +#define LJ_SEPARATE_COUNTERS 1 +#else +#define LJ_SEPARATE_COUNTERS 0 +#endif + /* Disable or enable the FFI extension. */ #if defined(LUAJIT_DISABLE_FFI) || defined(LJ_ARCH_NOFFI) #define LJ_HASFFI 0 diff --git a/src/lj_bc.h b/src/lj_bc.h index ff01e304fa..4df0acfb17 100644 --- a/src/lj_bc.h +++ b/src/lj_bc.h @@ -56,6 +56,12 @@ (((BCIns)(o))|((BCIns)(a)<<8)|((BCIns)(d)<<16)) #define BCINS_AJ(o, a, j) BCINS_AD(o, a, (BCPos)((int32_t)(j)+BCBIAS_J)) +#if LJ_SEPARATE_COUNTERS + #define BCDEF_LOOPHC(_) _(LOOPHC, ___, ___, lit, ___) +#else + #define BCDEF_LOOPHC(_) +#endif + /* Bytecode instruction definition. Order matters, see below. ** ** (name, filler, Amode, Bmode, Cmode or Dmode, metamethod) @@ -183,7 +189,7 @@ _(LOOP, rbase, ___, jump, ___) \ _(ILOOP, rbase, ___, jump, ___) \ _(JLOOP, rbase, ___, lit, ___) \ - _(LOOPHC, ___, ___, lit, ___) \ + BCDEF_LOOPHC(_) \ \ _(JMP, rbase, ___, jump, ___) \ \ diff --git a/src/lj_bcread.c b/src/lj_bcread.c index 6a8c9cd560..2fd17fa1d0 100644 --- a/src/lj_bcread.c +++ b/src/lj_bcread.c @@ -383,7 +383,7 @@ GCproto *lj_bcread_proto(LexState *ls) setmref(pt->uvinfo, NULL); setmref(pt->varinfo, NULL); } -#if LJ_HASJIT +#if LJ_SEPARATE_COUNTERS pt->hotcount = L2J(ls->L)->param[JIT_P_hotfunc] - 1; #endif return pt; diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h index de5b4ff5ed..4726cbd678 100644 --- a/src/lj_dispatch.h +++ b/src/lj_dispatch.h @@ -114,16 +114,27 @@ typedef struct GG_State { #define GG_DISP2HOT (GG_OFS(hotcount) - GG_OFS(dispatch)) #define GG_DISP2STATIC (GG_LEN_DDISP*(int)sizeof(ASMFunction)) +#if LJ_SEPARATE_COUNTERS +#define hotcount_get_pt(gg, pt, pc) ((pt)->hotcount) +#define hotcount_set_pt(gg, pt, pc, value) ((pt)->hotcount = (value)) + +#define hotcount_get_loop(gg, pc) \ + ((HotCount)(((pc)[1]) >> 16)) + +#define hotcount_set_loop(gg, pc, val) \ + (pc)[1] = (((pc)[1] & 0xffff) | ((val) << 16)) +#else #define hotcount_get(gg, pc) \ (gg)->hotcount[(u32ptr(pc)>>2) & (HOTCOUNT_SIZE-1)] #define hotcount_set(gg, pc, val) \ (hotcount_get((gg), (pc)) = (HotCount)(val)) -#define hotcount_loop_get(pc) \ - ((HotCount)(((pc)[1]) >> 16)) +#define hotcount_get_pt(gg, pt, pc) ((void)pt, hotcount_get(gg, (pc)+1)) +#define hotcount_set_pt(gg, pt, pc, val) (hotcount_set(gg, (pc)+1, val), (void)pt) -#define hotcount_loop_set(pc, val) \ - (pc)[1] = (((pc)[1] & 0xffff) | ((val) << 16)) +#define hotcount_get_loop(gg, pc) hotcount_get(gg, (pc)+1) +#define hotcount_set_loop(gg, pc, val) hotcount_set(gg, (pc)+1, val) +#endif /* Dispatch table management. */ LJ_FUNC void lj_dispatch_init(GG_State *GG); diff --git a/src/lj_jit.h b/src/lj_jit.h index 611376a3fa..6a854dae63 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -95,6 +95,17 @@ #define JIT_P_sizemcode_DEFAULT 32 #endif +#define funchot_scale 2 + +#if LJ_SEPARATE_COUNTERS +#define JIT_PARAMDEF_COUNTERS(_) \ + _(\007, hotfunc, (56*2)) /* # of iter. to detect a hot function. */ \ + _(\013, penaltyfunc, (36*2)) /* initial penalty hot counter back off value */ \ + _(\016, penaltymaxfunc, (25000*2)) /* max the function hot count backoff can be increased to */ +#else +#define JIT_PARAMDEF_COUNTERS(_) +#endif + /* Optimization parameters and their defaults. Length is a char in octal! */ #define JIT_PARAMDEF(_) \ _(\010, maxtrace, 1000) /* Max. # of traces in cache. */ \ @@ -105,11 +116,8 @@ _(\011, minstitch, 0) /* Min. # of IR ins for a stitched trace. */ \ \ _(\007, hotloop, 56) /* # of iter. to detect a hot loop. */ \ - _(\007, hotfunc, 56*2) /* # of iter. to detect a hot function. */ \ _(\013, penaltyloop, 36) /*. initial penalty hot counter back off value */ \ - _(\013, penaltyfunc, (36*2)) /* initial penalty hot counter back off value */ \ _(\016, penaltymaxloop, 25000) /* max the loop hot count backoff can be increased to */ \ - _(\016, penaltymaxfunc, 50000) /* max the function hot count backoff can be increased to */ \ _(\007, hotexit, 10) /* # of taken exits to start a side trace. */ \ _(\007, tryside, 4) /* # of attempts to compile a side trace. */ \ \ @@ -122,6 +130,7 @@ _(\011, sizemcode, JIT_P_sizemcode_DEFAULT) \ /* Max. total size of all machine code areas (in KBytes). */ \ _(\010, maxmcode, 512) \ + JIT_PARAMDEF_COUNTERS(_) \ /* End of list. */ enum { diff --git a/src/lj_parse.c b/src/lj_parse.c index 0cf85eef91..ddfd1f5b44 100644 --- a/src/lj_parse.c +++ b/src/lj_parse.c @@ -1576,7 +1576,7 @@ static GCproto *fs_finish(LexState *ls, BCLine line) pt->numparams = fs->numparams; pt->framesize = fs->framesize; setgcref(pt->chunkname, obj2gco(ls->chunkname)); -#if LJ_HASJIT +#if LJ_SEPARATE_COUNTERS pt->hotcount = L2J(ls->L)->param[JIT_P_hotfunc] - 1; #endif @@ -2353,11 +2353,13 @@ static void parse_break(LexState *ls) static void emit_loophotcount(FuncState *fs) { +#if LJ_SEPARATE_COUNTERS #if LJ_HASJIT bcemit_AD(fs, BC_LOOPHC, 0, L2J(fs->L)->param[JIT_P_hotloop] - 1); #else bcemit_AD(fs, BC_LOOPHC, 0, 0); #endif +#endif } /* Parse 'goto' statement. */ @@ -2503,7 +2505,7 @@ static void parse_for_num(LexState *ls, GCstr *varname, BCLine line) emit_loophotcount(fs); fs->bcbase[loopend].line = line; /* Fix line for control ins. */ jmp_patchins(fs, loopend, loop+1); - jmp_patchins(fs, loop, fs->pc-1); + jmp_patchins(fs, loop, fs->pc-LJ_SEPARATE_COUNTERS); } /* Try to predict whether the iterator is next() and specialize the bytecode. diff --git a/src/lj_record.c b/src/lj_record.c index 0c2eba1bcb..96ec11c1cf 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1671,7 +1671,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) lua_assert(bc_op(*J->pc) == BC_JFUNCF); lj_trace_flush(J, lnk); /* Flush trace that only returns. */ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ - pt->hotcount = LJ_PRNG_BITS(J, 4); + hotcount_set_pt(J2GG(J), pt, J->pc, LJ_PRNG_BITS(J, 4)); } lj_trace_err(J, LJ_TRERR_CUNROLL); } @@ -2414,10 +2414,10 @@ void lj_record_ins(jit_State *J) case BC_IFUNCV: lj_trace_err(J, LJ_TRERR_BLACKL); break; - +#if LJ_SEPARATE_COUNTERS case BC_LOOPHC: break; - +#endif case BC_JMP: if (ra < J->maxslot) J->maxslot = ra; /* Shrink used slots. */ diff --git a/src/lj_trace.c b/src/lj_trace.c index 7ee2d34bbf..cc799e7e43 100644 --- a/src/lj_trace.c +++ b/src/lj_trace.c @@ -372,13 +372,21 @@ static void blacklist_pc(GCproto *pt, BCIns *pc) pt->flags |= PROTO_ILOOP; } + + /* Penalize a bytecode instruction. */ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) { + uint32_t i, val, maxval; +#if LJ_SEPARATE_COUNTERS int isloop = proto_bcpos(pt, pc) != 0; - uint32_t i, val = J->param[isloop ? JIT_P_penaltyloop : JIT_P_penaltyfunc]; - uint32_t maxval = (uint32_t)J->param[isloop ? JIT_P_penaltymaxloop : - JIT_P_penaltymaxfunc]; + val = J->param[isloop ? JIT_P_penaltyloop : JIT_P_penaltyfunc]; + maxval = (uint32_t)J->param[isloop ? JIT_P_penaltymaxloop : + JIT_P_penaltymaxfunc]; +#else + val = J->param[JIT_P_penaltyloop] * 2; + maxval = (uint32_t)J->param[JIT_P_penaltymaxloop] * 2; +#endif for (i = 0; i < PENALTY_SLOTS; i++) if (mref(J->penalty[i].pc, const BCIns) == pc) { /* Cache slot found? */ /* First try to bump its hotcount several times. */ @@ -397,15 +405,20 @@ static void penalty_pc(jit_State *J, GCproto *pt, BCIns *pc, TraceError e) setpenalty: J->penalty[i].val = (uint16_t)val; J->penalty[i].reason = e; + +#if LJ_SEPARATE_COUNTERS /* If the pc is the function header set the hot count in the proto */ if (!isloop) { lua_assert(val == J->param[JIT_P_penaltyfunc] || val > pt->hotcount); - pt->hotcount = val; + hotcount_set_pt(J2GG(J), pt, pc, val); } else { lua_assert(bc_op(pc[1]) == BC_LOOPHC); - lua_assert(val == J->param[JIT_P_penaltyloop] || val > hotcount_loop_get(pc)); - hotcount_loop_set(pc, val); + lua_assert(val == J->param[JIT_P_penaltyloop] || val > hotcount_get_loop(J2GG(J), pc)); + hotcount_set_loop(J2GG(J), pc, val); } +#else + hotcount_set(J2GG(J), pc+1, val); +#endif } /* -- Trace compiler state machine ---------------------------------------- */ @@ -586,10 +599,10 @@ static int trace_abort(jit_State *J) if (e == LJ_TRERR_RETRY) { /* Immediate retry. */ if (proto_bcpos(startpt, startpc) == 0) { - startpt->hotcount = 1; + hotcount_set_pt(J2GG(J), startpt, startpc, 1); } else { lua_assert(bc_op(startpc[0]) > BC_FORI || bc_op(startpc[0]) <= BC_JLOOP); - hotcount_set(J2GG(J), startpc+1, 1); + hotcount_set_loop(J2GG(J), startpc, 1); } } else { penalty_pc(J, startpt, startpc, e); @@ -749,14 +762,17 @@ void LJ_FASTCALL lj_trace_hot(jit_State *J, BCIns *pc) /* Note: pc is the interpreter bytecode PC here. It's offset by 1. */ ERRNO_SAVE /* Reset hotcount. */ +#if LJ_SEPARATE_COUNTERS if (bc_op(pc[-1]) >= BC_FUNCF && bc_op(pc[-1]) <= BC_JFUNCV) { GCproto *pt = (GCproto *)(((char *)(pc-1)) - sizeof(GCproto)); lua_assert(pt->hotcount == 0xffff); - pt->hotcount = J->param[JIT_P_hotfunc] - 1; - } else { - BCIns *loop = pc-1; - lua_assert(hotcount_loop_get(loop) == 0xffff); - hotcount_loop_set(loop, J->param[JIT_P_hotloop] - 1); + hotcount_set_pt(J2GG(J), pt, pc, J->param[JIT_P_hotfunc] - 1); + } + else +#endif + { + lua_assert(hotcount_get_loop(J2GG(J), pc-1) == 0xffff || hotcount_get_loop(J2GG(J), pc-1) == 0xfffe); + hotcount_set_loop(J2GG(J), pc-1, J->param[JIT_P_hotloop] - 1); } /* Only start a new trace if not recording or inside __gc call or vmevent. */ diff --git a/src/msvcbuild.bat b/src/msvcbuild.bat index 71bde7598d..56eea129f7 100644 --- a/src/msvcbuild.bat +++ b/src/msvcbuild.bat @@ -32,11 +32,11 @@ if exist minilua.exe.manifest^ %LJMT% -manifest minilua.exe.manifest -outputresource:minilua.exe -@set DASMFLAGS=-D WIN -D JIT -D FFI -D P64 +@set DASMFLAGS=-D WIN -D JIT -D FFI -D SEPARATE_COUNTERS -D P64 @set LJARCH=x64 @minilua @if errorlevel 8 goto :X64 -@set DASMFLAGS=-D WIN -D JIT -D FFI +@set DASMFLAGS=-D WIN -D JIT -D FFI -D SEPARATE_COUNTERS @set LJARCH=x86 @set LJCOMPILE=%LJCOMPILE% /arch:SSE2 :X64 diff --git a/src/vm_x64.dasc b/src/vm_x64.dasc index f28b78db21..d157fdb721 100644 --- a/src/vm_x64.dasc +++ b/src/vm_x64.dasc @@ -334,12 +334,26 @@ | |// Decrement hashed hotcount and trigger trace recorder if zero. |.macro hotloop, reg +|.if SEPARATE_COUNTERS | sub word [PCd+2], 1 +|.else +| mov reg, PCd +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP +|.endif | jb ->vm_hotloop |.endmacro | |.macro hotcall, reg +|.if SEPARATE_COUNTERS | sub word [PCd-4+PC2PROTO(hotcount)], 1 +|.else +| mov reg, PCd +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL +|.endif | jb ->vm_hotcall |.endmacro | @@ -4465,17 +4479,25 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) if (op == BC_JITERL) { | mov [RA-8], RB | jmp =>BC_JLOOP + |.if SEPARATE_COUNTERS |2: | ins_next2 // Skips over BC_LOOPHC + |.endif } else { | branchPC RD // Otherwise save control var + branch. | mov [RA-8], RB + |.if SEPARATE_COUNTERS |1: | ins_next |2: | add PC, 4 // Skip over BC_LOOPHC | jmp <1 + |.endif } + |.if not SEPARATE_COUNTERS + |2: + | ins_next + |.endif break; case BC_LOOP: @@ -4490,12 +4512,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ILOOP: | ins_A // RA = base, RD = target (loop extent) + |.if SEPARATE_COUNTERS | ins_next2 + |.else + | ins_next + |.endif break; +#if LJ_SEPARATE_COUNTERS case BC_LOOPHC: | ins_next break; - +#endif case BC_JLOOP: |.if JIT | ins_AD // RA = base (ignored), RD = traceno diff --git a/src/vm_x86.dasc b/src/vm_x86.dasc index 0f63193998..9569060d00 100644 --- a/src/vm_x86.dasc +++ b/src/vm_x86.dasc @@ -427,14 +427,29 @@ | #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | +| |// Decrement hashed hotcount and trigger trace recorder if zero. |.macro hotloop, reg +|.if SEPARATE_COUNTERS | sub word [PC+2], 1 +|.else +| mov reg, PC +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP +|.endif | jb ->vm_hotloop |.endmacro | |.macro hotcall, reg +|.if SEPARATE_COUNTERS | sub word [PC-4+PC2PROTO(hotcount)], 1 +|.else +| mov reg, PC +| shr reg, 1 +| and reg, HOTCOUNT_PCMASK +| sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL +|.endif | jb ->vm_hotcall |.endmacro | @@ -5242,19 +5257,27 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | mov RB, [RA] | mov [RA-8], RB | jmp =>BC_JLOOP + |.if SEPARATE_COUNTERS |2: | ins_next2 // Skips over BC_LOOPHC + |.endif } else { | branchPC RD // Otherwise save control var + branch. | mov RD, [RA] | mov [RA-4], RB | mov [RA-8], RD + |.if SEPARATE_COUNTERS |1: | ins_next |2: | add PC, 4 // Skip over BC_LOOPHC | jmp <1 + |.endif } + |.if not SEPARATE_COUNTERS + |2: + | ins_next + |.endif break; case BC_LOOP: @@ -5268,12 +5291,17 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) break; case BC_ILOOP: | ins_A // RA = base, RD = target (loop extent) + |.if SEPARATE_COUNTERS | ins_next2 + |.else + | ins_next + |.endif break; +#if LJ_SEPARATE_COUNTERS case BC_LOOPHC: | ins_next break; - +#endif case BC_JLOOP: |.if JIT | ins_AD // RA = base (ignored), RD = traceno diff --git a/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua index 1378a12420..bb951c13d3 100644 --- a/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua +++ b/tests/LuaJIT-test-cleanup/test/trace/hotcounters.lua @@ -25,11 +25,22 @@ local function getmaxcount(attempts, penalty) return count end +local seperate_penalty = true + +if not pcall(jit.opt.start, "penaltymaxfunc="..func_penalty) then + maxattemps_func = 11 + maxattemps_loop = 11 + seperate_penalty = false +end + local countmax_func = getmaxcount(maxattemps_func, func_penalty) local countmax_loop = getmaxcount(maxattemps_loop, loop_penalty) print("penaltymaxfunc="..countmax_func, "penaltymaxloop="..countmax_loop) -jit.opt.start("penaltymaxfunc="..countmax_func, "penaltymaxloop="..countmax_loop) +if seperate_penalty then + jit.opt.start("penaltymaxfunc="..countmax_func, "penaltymaxloop="..countmax_loop) +end + local function calln(f, n, ...) for i = 1, n do From cc22eef9a8a4ecd432de1808753d6cba61d65b86 Mon Sep 17 00:00:00 2001 From: fsfod Date: Fri, 14 Dec 2018 06:21:58 +0000 Subject: [PATCH 7/8] Fix missing newline at the end of file for test/trace/index --- tests/LuaJIT-test-cleanup/test/trace/index | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/LuaJIT-test-cleanup/test/trace/index b/tests/LuaJIT-test-cleanup/test/trace/index index d6afb32bcd..d04be19ef4 100644 --- a/tests/LuaJIT-test-cleanup/test/trace/index +++ b/tests/LuaJIT-test-cleanup/test/trace/index @@ -5,4 +5,4 @@ gc64_slot_revival.lua phi snap.lua stitch.lua -hotcounters.lua \ No newline at end of file +hotcounters.lua From ae185f18e901ba5405c3f6e1aa27850efdeb5938 Mon Sep 17 00:00:00 2001 From: fsfod Date: Fri, 14 Dec 2018 06:29:58 +0000 Subject: [PATCH 8/8] Try to keep indent the same kind for a changed line in lj_record.c --- src/lj_record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lj_record.c b/src/lj_record.c index 96ec11c1cf..0c38b3a9a5 100644 --- a/src/lj_record.c +++ b/src/lj_record.c @@ -1671,7 +1671,7 @@ static void check_call_unroll(jit_State *J, TraceNo lnk) lua_assert(bc_op(*J->pc) == BC_JFUNCF); lj_trace_flush(J, lnk); /* Flush trace that only returns. */ /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */ - hotcount_set_pt(J2GG(J), pt, J->pc, LJ_PRNG_BITS(J, 4)); + hotcount_set_pt(J2GG(J), pt, J->pc, LJ_PRNG_BITS(J, 4)); } lj_trace_err(J, LJ_TRERR_CUNROLL); }