Skip to content

Commit

Permalink
Merge PRs snabbco#619 snabbco#620 snabbco#621 (LuaJIT extensions) int…
Browse files Browse the repository at this point in the history
…o next
  • Loading branch information
lukego committed Sep 14, 2015
3 parents 6d7602c + 53aedce + a212176 commit 863854c
Show file tree
Hide file tree
Showing 4 changed files with 239 additions and 21 deletions.
7 changes: 6 additions & 1 deletion lib/luajit/src/jit/dump.lua
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ local bcline, disass
-- Active flag, output file handle and dump mode.
local active, out, dumpmode

-- Information about traces that is remembered for future reference.
local info = {}

------------------------------------------------------------------------------

local symtabmt = { __index = false }
Expand Down Expand Up @@ -550,6 +553,7 @@ local function dump_trace(what, tr, func, pc, otr, oex)
if dumpmode.m then dump_mcode(tr) end
end
if what == "start" then
info[tr] = { func = func, pc = pc, otr = otr, oex = oex }
if dumpmode.H then out:write('<pre class="ljdump">\n') end
out:write("---- TRACE ", tr, " ", what)
if otr then out:write(" ", otr, "/", oex) end
Expand Down Expand Up @@ -701,6 +705,7 @@ end
return {
on = dumpon,
off = dumpoff,
start = dumpon -- For -j command line option.
start = dumpon, -- For -j command line option.
info = info
}

47 changes: 42 additions & 5 deletions lib/luajit/src/jit/p.lua
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
-- G Produce raw output suitable for graphical tools (e.g. flame graphs).
-- m<number> Minimum sample percentage to be shown. Default: 3.
-- i<number> Sampling interval in milliseconds. Default: 10.
-- S[<string>] Events source if performace events are enabled
--
----------------------------------------------------------------------------

Expand All @@ -44,6 +45,8 @@ local jit = require("jit")
assert(jit.version_num == 20100, "LuaJIT core/library version mismatch")
local profile = require("jit.profile")
local vmdef = require("jit.vmdef")
local jutil = require("jit.util")
local dump = require("jit.dump")
local math = math
local pairs, ipairs, tonumber, floor = pairs, ipairs, tonumber, math.floor
local sort, format = table.sort, string.format
Expand Down Expand Up @@ -74,7 +77,38 @@ local function prof_cb(th, samples, vmmode)
-- Collect keys for sample.
if prof_states then
if prof_states == "v" then
key_state = map_vmmode[vmmode] or vmmode
if map_vmmode[vmmode] then
key_state = map_vmmode[vmmode]
else
-- Sampling a trace: make an understandable one-line description.
local tr = tonumber(vmmode)
local info = jutil.traceinfo(tr)
local extra = dump.info[tr]
-- Show the parent of this trace (if this is a side trace)
local parent = ""
if extra and extra.otr and extra.oex then
parent = "("..extra.otr.."/"..extra.oex..")"
end
-- Show what the end of the trace links to (e.g. loop or other trace)
local lnk = ""
local link, ltype = info.link, info.linktype
if link == tr or link == 0 then lnk = "->"..ltype
elseif ltype == "root" then lnk = "->"..link
else lnk = "->"..link.." "..ltype end
-- Show the current zone (if zone profiling is enabled)
local z = ""
if zone and zone:get() then
z = (" %-16s"):format(zone:get())
end
-- Show the source location where the trace starts
local loc = ""
if extra and extra.func then
local fi = jutil.funcinfo(extra.func, extra.pc)
if fi.loc then loc = fi.loc end
end
local s = ("TRACE %3d %-8s %-10s%s %s"):format(vmmode, parent, lnk, z, loc)
key_state = map_vmmode[vmmode] or s
end
else
key_state = zone:get() or "(none)"
end
Expand Down Expand Up @@ -243,15 +277,18 @@ end
-- Start profiling.
local function prof_start(mode)
local interval = ""
mode = mode:gsub("i%d*", function(s) interval = s; return "" end)
mode = mode:gsub("i%d+", function(s) interval = s; return "" end)
prof_min = 3
mode = mode:gsub("m(%d+)", function(s) prof_min = tonumber(s); return "" end)
prof_depth = 1
mode = mode:gsub("%-?%d+", function(s) prof_depth = tonumber(s); return "" end)
local flavour = "S[vanilla]"
mode = mode:gsub("S%[.+%]", function(s) flavour = s; return "" end)

local m = {}
for c in mode:gmatch(".") do m[c] = c end
prof_states = m.z or m.v
if prof_states == "z" then zone = require("jit.zone") end
prof_states = m.v or m.z
if m.z == "z" then zone = require("jit.zone") end
local scope = m.l or m.f or m.F or (prof_states and "" or "f")
local flags = (m.p or "")
prof_raw = m.r
Expand Down Expand Up @@ -285,7 +322,7 @@ local function prof_start(mode)
prof_count1 = {}
prof_count2 = {}
prof_samples = 0
profile.start(scope:lower()..interval, prof_cb)
profile.start(scope:lower()..interval..flavour, prof_cb)
prof_ud = newproxy(true)
getmetatable(prof_ud).__gc = prof_finish
end
Expand Down
5 changes: 4 additions & 1 deletion lib/luajit/src/lib_jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -555,7 +555,10 @@ static void jit_profile_callback(lua_State *L2, lua_State *L, int samples,
setfuncV(L2, L2->top++, funcV(tv));
setthreadV(L2, L2->top++, L);
setintV(L2->top++, samples);
setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
if (vmstate >= 256)
setintV(L2->top++, vmstate-256);
else
setstrV(L2, L2->top++, lj_str_new(L2, &vmst, 1));
status = lua_pcall(L2, 3, 0, 0); /* callback(thread, samples, vmstate) */
if (status) {
if (G(L2)->panic) G(L2)->panic(L2);
Expand Down
201 changes: 187 additions & 14 deletions lib/luajit/src/lj_profile.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#define lj_profile_c
#define LUA_CORE
#define _GNU_SOURCE 1

#include "lj_obj.h"

Expand All @@ -29,6 +30,17 @@
#define profile_lock(ps) UNUSED(ps)
#define profile_unlock(ps) UNUSED(ps)

#if 1
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <sys/prctl.h>
#endif


#elif LJ_PROFILE_PTHREAD

#include <pthread.h>
Expand Down Expand Up @@ -62,6 +74,8 @@ typedef struct ProfileState {
SBuf sb; /* String buffer for stack dumps. */
int interval; /* Sample interval in milliseconds. */
int samples; /* Number of samples for next callback. */
char *flavour; /* What generates profiling events. */
int perf_event_fd; /* Performace event file descriptor */
int vmstate; /* VM state when profile timer triggered. */
#if LJ_PROFILE_SIGPROF
struct sigaction oldsa; /* Previous SIGPROF state. */
Expand Down Expand Up @@ -155,7 +169,7 @@ static void profile_trigger(ProfileState *ps)
mask = g->hookmask;
if (!(mask & (HOOK_PROFILE|HOOK_VMEVENT))) { /* Set profile hook. */
int st = g->vmstate;
ps->vmstate = st >= 0 ? 'N' :
ps->vmstate = st >= 0 ? 256+st :
st == ~LJ_VMST_INTERP ? 'I' :
st == ~LJ_VMST_C ? 'C' :
st == ~LJ_VMST_GC ? 'G' : 'J';
Expand All @@ -176,29 +190,178 @@ static void profile_signal(int sig)
profile_trigger(&profile_state);
}


static int perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
return syscall(SYS_perf_event_open, attr, pid, cpu, group_fd, flags);
}


static void register_prof_events(ProfileState *ps)
{
struct flavour_t {
char *name; uint32_t type; uint64_t config;
};

static struct flavour_t flavours[] =
{
{ "sw-cpu-clock",
PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK },

{ "sw-context-switches",
PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES },

{ "sw-page-faults",
PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS },

{ "sw-minor-page-faults",
PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN },

{ "sw-major-page-faults",
PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ },

{ "branch-instructions",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },

{ "cpu-cycles",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES },

{ "instructions",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS },

{ "cache-references",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES },

{ "cache-misses",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES },

{ "branch-instructions",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },

{ "branch-misses",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES },

{ "bus-cycles",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES },

{ "stalled-cycles-frontend",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },

{ "stalled-cycles-backend",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },

{ "cpu-cycles",
PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES },

{ 0, 0, 0 }
};


struct perf_event_attr attr = { };

memset(&attr, 0, sizeof(struct perf_event_attr));

const struct flavour_t *f;
for (f = flavours; f->name != 0; f++)
{
if (strcmp (ps->flavour, f->name) == 0)
{
attr.type = f->type;
attr.config = f->config;
break;
}
}

if (strcmp (ps->flavour, "?") == 0)
{
const struct flavour_t *f;
fprintf (stderr, "I know: ");
for (f = flavours; f->name != 0; f++)
fprintf (stderr, "%s ", f->name);
fprintf(stderr, "\n");
}
else if (! f->name)
{
fprintf (stderr, "unknown profiling flavour `%s', S[?] to list\n", ps->flavour);
}

attr.size = sizeof(struct perf_event_attr);
attr.sample_type = PERF_SAMPLE_IP;
/* attr.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; */
attr.disabled=1;
attr.pinned=1;
attr.exclude_kernel=1;
attr.exclude_hv=1;

attr.sample_period = ps->interval;
/* attr.watermark=0; */
/* attr.wakeup_events=1; */

int fd = perf_event_open(&attr, 0, -1, -1, 0);
if (fd == -1)
{
printf ("! perf_event_open %m\n");
}

ps->perf_event_fd = fd;

fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
fcntl(fd, F_SETSIG, SIGPROF);
fcntl(fd, F_SETOWN, getpid());

ioctl(fd, PERF_EVENT_IOC_RESET, 0);

int err = ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
if (err != 0)
printf ("! perf_events enable\n");
}



/* Start profiling timer. */
static void profile_timer_start(ProfileState *ps)
{
int interval = ps->interval;
struct itimerval tm;
struct sigaction sa;
tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
setitimer(ITIMER_PROF, &tm, NULL);
sa.sa_flags = SA_RESTART;
sa.sa_handler = profile_signal;
struct sigaction sa = {
.sa_flags = SA_RESTART,
.sa_handler = profile_signal
};

sigemptyset(&sa.sa_mask);
sigaction(SIGPROF, &sa, &ps->oldsa);

if (strcmp(ps->flavour, "vanilla") == 0)
{
int interval = ps->interval;
struct itimerval tm;
tm.it_value.tv_sec = tm.it_interval.tv_sec = interval / 1000;
tm.it_value.tv_usec = tm.it_interval.tv_usec = (interval % 1000) * 1000;
setitimer(ITIMER_PROF, &tm, NULL);
}
else
{
register_prof_events(ps);
}
}



/* Stop profiling timer. */
static void profile_timer_stop(ProfileState *ps)
{
struct itimerval tm;
tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
setitimer(ITIMER_PROF, &tm, NULL);
sigaction(SIGPROF, &ps->oldsa, NULL);
if (ps->perf_event_fd)
{
ioctl(ps->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0);
}
else
{
struct itimerval tm;
tm.it_value.tv_sec = tm.it_interval.tv_sec = 0;
tm.it_value.tv_usec = tm.it_interval.tv_usec = 0;
setitimer(ITIMER_PROF, &tm, NULL);
sigaction(SIGPROF, &ps->oldsa, NULL);
}
}

#elif LJ_PROFILE_PTHREAD
Expand Down Expand Up @@ -300,6 +463,8 @@ LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
{
ProfileState *ps = &profile_state;
int interval = LJ_PROFILE_INTERVAL_DEFAULT;
char *flavour;

while (*mode) {
int m = *mode++;
switch (m) {
Expand All @@ -315,6 +480,13 @@ LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
lj_trace_flushall(L);
break;
#endif
case 'S':
{
int k;
if (sscanf (mode, "[%m[^]]]%n", &flavour, &k) > 0)
mode += k;
}

default: /* Ignore unknown mode chars. */
break;
}
Expand All @@ -328,6 +500,7 @@ LUA_API void luaJIT_profile_start(lua_State *L, const char *mode,
ps->cb = cb;
ps->data = data;
ps->samples = 0;
ps->flavour = flavour;
lj_buf_init(L, &ps->sb);
profile_timer_start(ps);
}
Expand Down

0 comments on commit 863854c

Please sign in to comment.