diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c0c5854 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +*pyc +tracer/pin/bbtrace.pb.* +tracer/pin/obj-intel64 +tracer/pin/pin.log +tracer/common/bbtrace.pb.* +tracer/common/*.o +tracer/common/libtracer.a +tracer/bts/*.o +tracer/bts/bts_trace +tracer/bts/bts_trace +tests/bisect +tests/quicksort +tests/test? +viewer/bbtrace_pb2.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..5f89cd9 --- /dev/null +++ b/README.md @@ -0,0 +1,72 @@ +FuzzTrace is a "general-purpose" tracing tool for closed-source applications, +aimed at generating a concise execution trace that can be used to support the +fuzz-testing activity or other analyses. + +At the time of writing, we provide two tracing back-ends, based on Intel BTS +and PIN respectively. In any case, the execution trace is serialized to a +[protobuf](https://code.google.com/p/protobuf/) object, that can then be +processed off-line. Available back-ends are briefly described in the next +paragraphs, together with some usage examples. + +On a Debian/Ubuntu system, use the following commands to install the required +dependencies (these are common to all the back-ends available): + + roby@gimli:~$ sudo apt-get install protobuf-compiler python-protobuf libprotobuf-dev + +## Back-ends ## + +### BTS-based execution tracers ### + +The BTS back-end is an efficient tracer that leverages Intel "Branch Trace +Store" (BTS) technology. The source code for this back-end is located under +`tracer/bts`. To compile this back-end module enter directory `tracer/bin` and +run `make`. + +Hopefully, everything will go fine. You should now be able to trace your target +application using the `bts_trace` binary: + + roby@gimli:~/projects/fuzztrace/tracer/bts$ ./bts_trace -f /dev/shm/trace.bin -- /bin/ls -la >/dev/null + [*] Got 108684 events (4967 CFG edges) + [*] Serializing to /dev/shm/trace.bin + +### PIN-based execution tracers ### + +The PIN back-end is a +[PIN](https://software.intel.com/en-us/articles/pin-a-dynamic-binary-instrumentation-tool) +extension to monitor the execution of a binary application and record its +"execution trace". FuzzTrace/PIN lives in directory `fuzztrace/tracer/pin`. To +compile the PIN module, set the `PIN_ROOT` environment variable and launch +`make` from the `tracer/pin` directory, e.g.: + + roby@gimli:~/apps/pin$ export PIN_ROOT=$(pwd) + roby@gimli:~/apps/pin$ cd ~/projects/fuzztrace/tracer-pin + roby@gimli:~/projects/fuzztrace/tracer/pin$ make + + +You should be able to trace your target program using the `pintrace` PIN tool: + + roby@gimli:~/projects/fuzztrace/tracer/pin$ ${PIN_ROOT}/pin.sh -t obj-intel64/pintrace.so -o /dev/shm/trace.bin -- /bin/ls + +## Trace viewer ## + +The `viewer` directory provides a basic trace viewer, which parses a saved +execution traces and displays recorded branches. + +Before using the viewer, compile the `bbtrace.proto` file: + + roby@gimli:~/projects/fuzztrace/viewer$ protoc --python_out=. bbtrace.proto + +After that, usage is quite straightforward: + + roby@gimli:~/projects/fuzztrace/viewer$ python trace.py /dev/shm/trace.bin + #### Trace '/dev/shm/trace.bin' #### + [/dev/shm/trace.bin] cmd: test, data: 0 bytes, time: 2015-01-29 22:47:52, hash: 3c31f, edges(s): 708, exception(s): 0 + + - CFG edges + [00402168 -> 00402178] 1 hit + [00402178 -> 0040217d] 1 hit + [0040217d -> 00412513] 1 hit + [00402190 -> 004124e0] 1 hit + ... + + diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..ac13c32 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,20 @@ +TESTS_ASM := $(basename $(wildcard *.S)) +TESTS_C := $(basename $(wildcard *.c)) +TESTS = $(TESTS_ASM) $(TESTS_C) +TRACES = $(TESTS:=.trace) + +tests: $(TESTS) +all: tests +clean: + -rm $(TESTS) + +traces: $(TRACES) + +$(TRACES): %.trace: % + ../tracer/bts/bts_trace -f /dev/shm/$@ ./$^ + +$(TESTS_ASM): %: %.S + $(CC) -nostdlib -o $@ $^ + +$(TESTS_C): %: %.c + $(CC) -o $@ $^ diff --git a/tests/bisect.c b/tests/bisect.c new file mode 100644 index 0000000..e18252a --- /dev/null +++ b/tests/bisect.c @@ -0,0 +1,92 @@ +#include +#include +#include + +#define ARRAY_SIZE 1024 + +static int bisect(int v[], int size, int key) { + int start, end, middle, pos; + + start = 0; + end = size-1; + pos = -1; + while (start <= end) { + middle = (end+start)/2; + +#if 0 + printf("Searching [%d - %d] for key %d, middle %d\n", + start, end, key, middle); +#endif + + if (v[middle] > key) { + end = middle-1; + } else if (v[middle] < key) { + start = middle+1; + } else { + pos = middle; + break; + } + } + + return pos; +} + +static void dump_array(int v[], int size) { + const int blocksize = 32; + int i, blockno; + + assert((size % blocksize) == 0); + + for (blockno=0; blockno 1) { + elem = atoi(argv[1]); + } else { + elem = 42; + printf("No element specified, searching for %d\n", elem); + } + + srandom(time(NULL)); + for (i=0; i +#include + +#define ARRAY_SIZE 1024 + +static void swap(void *x, void *y, size_t l) { + char *a = x, *b = y, c; + while(l--) { + c = *a; + *a++ = *b; + *b++ = c; + } +} + +int count = 0; +static void sort(char *array, size_t size, int (*cmp)(void*,void*), int begin, int end) { + count++; + if (end > begin) { + void *pivot = array + begin; + int l = begin + size; + int r = end; + while(l < r) { + if (cmp(array+l,pivot) <= 0) { + l += size; + } else if ( cmp(array+r, pivot) > 0 ) { + r -= size; + } else if ( l < r ) { + swap(array+l, array+r, size); + } + } + l -= size; + swap(array+begin, array+l, size); + sort(array, size, cmp, begin, l); + sort(array, size, cmp, r, end); + } +} + +void quicksort(void *array, size_t nitems, size_t size, int (*cmp)(void*,void*)) { + sort(array, size, cmp, 0, nitems*size); +} + +typedef int type; + +int type_cmp(void *a, void *b){ return (*(type*)a)-(*(type*)b); } + +int main(void) { + int array[ARRAY_SIZE]; + int len=sizeof(array)/sizeof(type); + char *sep=""; + int i; + + srandom(time(NULL)); + for (i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/logging.h" +#include "./monitor.h" +#include "./perf.h" + +struct perf_global_status gbl_status = { + NULL, // mmap + NULL, // data + 0, // data_size + 0, // fd_evt + 0, // prev_head + 0, // n_events + 0, // pid_child + 0 // data_ready +}; + +static pid_t child_start(char **argv) { + pid_t pid; + int ret; + + pid = fork(); + assert(pid >= 0); + if (pid == 0) { + // Child + ret = ptrace(PTRACE_TRACEME, 0, 0, 0); + assert(ret != -1); + + raise(SIGTRAP); + execvp(argv[0], argv); + + // Unreachable + assert(0); + } + + // Parent + return pid; +} + +static void show_help(char **argv) { + fprintf(stderr, "Syntax: %s [-f ] cmdline\n", argv[0]); +} + +// Signal handler to process perf events. +static void sig_handler(int signum, siginfo_t *siginfo, void *dummy) { + if (signum == SIGIO) { + kill(gbl_status.pid_child, SIGTRAP); + gbl_status.data_ready++; + } +} + +int main(int argc, char **argv) { + struct perf_event_attr pe; + struct sigaction sa; + pid_t pid_child; + int opt; + std::string s_outfile; + + while ((opt = getopt(argc, argv, "f:h")) != -1) { + switch (opt) { + case 'f': + s_outfile = optarg; + break; + default: + case 'h': + show_help(argv); + exit(1); + } + } + + // Allocate work area for processing events + gbl_status.data_size = MMAP_PAGES*getpagesize(); + gbl_status.data = (unsigned char*) malloc(gbl_status.data_size); + assert(gbl_status.data > 0); + + memset(gbl_status.data, 0, gbl_status.data_size); + + // Prepare the child process + pid_child = child_start(argv+optind); + LOG_DEBUG("Started child with pid %d", pid_child); + gbl_status.pid_child = pid_child; + + // Setup signals + memset(&sa, 0, sizeof(struct sigaction)); + sa.sa_sigaction = sig_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGIO, &sa, NULL) < 0) { + LOG_FATAL("Error setting up signal handler"); + } + + // Initialize perf structure + perf_init(&pe, MMAP_PAGES); + + gbl_status.fd_evt = perf_event_open(&pe, pid_child, -1, -1, 0); + if (gbl_status.fd_evt == -1) { + perror("perf_event_open"); + exit(EXIT_FAILURE); + } + + // Allocate mmap'ed area + gbl_status.mmap = mmap(NULL, (MMAP_PAGES+1)*getpagesize(), + PROT_READ | PROT_WRITE, MAP_SHARED, gbl_status.fd_evt, 0); + assert(gbl_status.mmap != reinterpret_cast(-1)); + + fcntl(gbl_status.fd_evt, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); + fcntl(gbl_status.fd_evt, F_SETSIG, SIGIO); + fcntl(gbl_status.fd_evt, F_SETOWN, getpid()); + + // Monitor child until it terminates + monitor_loop(pid_child, s_outfile); + + ioctl(gbl_status.fd_evt, PERF_EVENT_IOC_DISABLE, 0); + close(gbl_status.fd_evt); + munmap(gbl_status.mmap, (MMAP_PAGES+1)*getpagesize()); + + return 0; +} diff --git a/tracer/bts/bts_trace.h b/tracer/bts/bts_trace.h new file mode 100644 index 0000000..4949023 --- /dev/null +++ b/tracer/bts/bts_trace.h @@ -0,0 +1,30 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#ifndef _BTS_TRACE_H_ +#define _BTS_TRACE_H_ + +#define __STDC_FORMAT_MACROS + +#include +#include +#include + +#define MMAP_PAGES 512 + +// Global status of perf_event monitor +struct perf_global_status { + void *mmap; // Pointer to mmap'ed area + unsigned char *data; // Backup copy of BTS data for processing + int data_size; // Size of mmap'ed data area (without hdr) + int fd_evt; + uint64_t prev_head; + int n_events; + int pid_child; + volatile sig_atomic_t data_ready; +}; + +extern struct perf_global_status gbl_status; + +#endif // _BTS_TRACE_H_ diff --git a/tracer/bts/monitor.cc b/tracer/bts/monitor.cc new file mode 100644 index 0000000..d3d3542 --- /dev/null +++ b/tracer/bts/monitor.cc @@ -0,0 +1,255 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#include "./monitor.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/common.h" +#include "common/serialize.h" +#include "./bts_trace.h" +#include "./perf.h" + +static const int MAX_STACKTRACE_SIZE = 16; + +static ExecutionTrace gbl_execution_trace; + +static inline bool is_kernel_addr(target_addr addr) { + return (addr >> 47) != 0; +} + +// Memory mapping of a new executable section. Extract details of the +// mmap()'ped region and add to the global structure +static void monitor_add_mmap(struct perf_event_mmap *mmap_event) { + MemoryRegion region; + region.base = mmap_event->addr; + region.size = mmap_event->len; + region.filename = mmap_event->filename; + gbl_execution_trace.memory_regions.push_back(region); + + LOG_DEBUG("mmap()'ing image '%s' at range [0x%lx-0x%lx]", + region.filename.c_str(), region.base, region.base+region.size-1); +} + +// Add a new branch event +static void monitor_add_sample(struct perf_event_bts_sample *bts_sample) { + const target_addr bb_previous = bts_sample->bts.from; + const target_addr bb_current = bts_sample->bts.to; + +#ifdef DEBUG_MODE + fprintf(stderr, "[tid %d] from: 0x%016" PRIx64 ", to: 0x%016" PRIx64 "\n", + bts_sample->bts.tid, bb_previous, bb_current); +#endif + + // Skip kernel addresses + if (is_kernel_addr(bb_previous) || is_kernel_addr(bb_current)) { + return; + } + + gbl_execution_trace.basic_blocks.AddEdge(bb_previous, bb_current); + gbl_status.n_events++; +} + +static void monitor_process_events(void) { + struct perf_event_mmap_page *control_page; + struct perf_event_header *event; + uint64_t head, prev_head_wrap; + void *data_mmap; + int size, offset; + + control_page = (struct perf_event_mmap_page*) gbl_status.mmap; + data_mmap = (unsigned char*) gbl_status.mmap + getpagesize(); + + if (control_page == NULL) { + LOG_WARN("Skipping invalid control page"); + return; + } + + head = control_page->data_head; + rmb(); + + size = head - gbl_status.prev_head; + + prev_head_wrap = gbl_status.prev_head % gbl_status.data_size; + + LOG_DEBUG("Current head 0x%016" PRIx64 ", previous head 0x%016" PRIx64 + ", size %d data_size %d prev_head_wrap 0x%016" PRIx64, head, + gbl_status.prev_head, size, gbl_status.data_size, prev_head_wrap); + + + // Copy (possibly wrapped) data to the work area + memcpy(gbl_status.data, (unsigned char*) data_mmap + prev_head_wrap, + gbl_status.data_size - prev_head_wrap); + memcpy(gbl_status.data + gbl_status.data_size - prev_head_wrap, + (unsigned char*) data_mmap, prev_head_wrap); + + offset = 0; + while (offset < size) { + event = (struct perf_event_header *) &gbl_status.data[offset]; + + switch (event->type) { + case PERF_RECORD_MMAP: + monitor_add_mmap(reinterpret_cast(event)); + break; + + case PERF_RECORD_LOST: + LOG_DEBUG("Lost %lu events", ((struct perf_record_lost*) event)->lost); + break; + + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: + LOG_DEBUG("Received a (un)throttle event, ignoring"); + break; + + case PERF_RECORD_SAMPLE: + assert(event->size == sizeof(struct perf_event_bts_sample)); + monitor_add_sample(reinterpret_cast< + struct perf_event_bts_sample *>(event)); + break; + + case PERF_RECORD_FORK: { + LOG_DEBUG("Process %d (thread %d) created", + reinterpret_cast(event)->pid, + reinterpret_cast(event)->tid); + break; + } + + case PERF_RECORD_EXIT: { + LOG_DEBUG("Process %d (thread %d) has exited", + reinterpret_cast(event)->pid, + reinterpret_cast(event)->tid); + break; + } + + default: + LOG_FATAL("Unsupported perf record %d", event->type); + } + + // Skip event + offset += event->size; + } + + assert(offset == size); + + mb(); + control_page->data_tail = head; + gbl_status.prev_head = head; +} + +static void monitor_handle_signal(pid_t pid, int status) { + LOG_DEBUG("Read signal info (pid %d)", pid); + siginfo_t si; + int ret = ptrace(PTRACE_GETSIGINFO, pid, NULL, &si); + assert(ret != -1); + + // Exception type + ExceptionType exc_type; + target_addr exc_faulty; + + switch (si.si_signo) { + case SIGSEGV: + exc_type = ExceptionAccessViolation; + exc_faulty = reinterpret_cast(si.si_addr); + break; + default: + exc_type = ExceptionUnknown; + exc_faulty = 0; + break; + } + + LOG_DEBUG("Reading child registers (pid %d)", pid); + struct user_regs_struct regs; + ret = ptrace(PTRACE_GETREGS, pid, NULL, ®s); + assert(ret != -1); + + std::shared_ptr + exc(new Exception(pid, exc_type, regs.rip, exc_faulty, 0)); + + // Generate a stack trace for this process + target_addr fp = regs.rsp; + LOG_DEBUG("Starting stack walking at 0x%016lx", fp); + + for (int i = 0; i < MAX_STACKTRACE_SIZE; i++) { + // Read the return address of current stack frame + target_addr retaddr = ptrace(PTRACE_PEEKTEXT, pid, + fp+sizeof(target_addr), 0); + if (retaddr == static_cast(-1)) { + LOG_DEBUG("Failed reading %d-th return address @0x%lx, giving up", + i, fp+sizeof(target_addr)); + break; + } + + // Save this return address + exc->stacktrace_push(retaddr); + LOG_DEBUG("ret%d @0x%lx -> 0x%lx", i, fp+sizeof(target_addr), retaddr); + + // Read the address of the next stack frame + fp = ptrace(PTRACE_PEEKTEXT, pid, fp, 0); + if (fp == static_cast(-1)) { + LOG_DEBUG("Failed reading %d-th frame pointer @0x%lx, giving up", i, fp); + break; + } + } + + gbl_execution_trace.exceptions.push_back(exc); +} + +void monitor_loop(pid_t pid_child, const std::string s_outfile) { + int ret, status; + pid_t pid; + + // Wait until child terminates + while (1) { + pid = waitpid(pid_child, &status, 0); + if (pid == -1 && errno == EINTR) { + continue; + } + + assert(pid == pid_child); + + // Process any pending event + if (gbl_status.data_ready > 0) { + gbl_status.data_ready--; + monitor_process_events(); + } + + if (WIFEXITED(status)) { + LOG_DEBUG("Child terminated with status %d", WEXITSTATUS(status)); + break; + } else if (WIFSIGNALED(status)) { + LOG_DEBUG("Child terminated by signal #%d", WTERMSIG(status)); + break; + } else if (WIFSTOPPED(status) && WSTOPSIG(status) != SIGTRAP) { + LOG_DEBUG("Child stopped by signal #%d", WSTOPSIG(status)); + monitor_handle_signal(pid_child, status); + break; + } + + // Resume child process + ret = ptrace(PTRACE_CONT, pid_child, 0, 0); + assert(ret != -1); + } + + // Process final events before terminating + monitor_process_events(); + + // Serialize to file + LOG_INFO("Got %d events (%d CFG edges)", gbl_status.n_events, + gbl_execution_trace.basic_blocks.size()); + + if (s_outfile.length() > 0) { + LOG_INFO("Serializing to %s", s_outfile.c_str()); + serialize_trace(s_outfile, gbl_execution_trace); + } +} diff --git a/tracer/bts/monitor.h b/tracer/bts/monitor.h new file mode 100644 index 0000000..504f057 --- /dev/null +++ b/tracer/bts/monitor.h @@ -0,0 +1,15 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#ifndef _MONITOR_H_ +#define _MONITOR_H_ + +#include + +#include +#include + +void monitor_loop(pid_t pid_child, const std::string s_outfile); + +#endif // _MONITOR_H_ diff --git a/tracer/bts/perf.cc b/tracer/bts/perf.cc new file mode 100644 index 0000000..a614b73 --- /dev/null +++ b/tracer/bts/perf.cc @@ -0,0 +1,40 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#include "./perf.h" + +#include +#include + +#include "common/logging.h" +#include "./bts_trace.h" + +/* There is no glibc wrapper for syscall perf_event_open(), so we provide a + simple wrapper here */ +int64_t perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, uint64_t flags) { + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +void perf_init(struct perf_event_attr *attr, int mmap_pages) { + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = PERF_TYPE_HARDWARE; + attr->size = sizeof(struct perf_event_attr); + attr->config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + + attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY; + + attr->disabled = 1; + attr->enable_on_exec = 1; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; + attr->exclude_idle = 1; + attr->precise_ip = 2; + attr->wakeup_watermark = mmap_pages*getpagesize()/32/4; + attr->watermark = 1; + attr->mmap = 1; + + attr->sample_period = 1; + attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR; +} diff --git a/tracer/bts/perf.h b/tracer/bts/perf.h new file mode 100644 index 0000000..f130711 --- /dev/null +++ b/tracer/bts/perf.h @@ -0,0 +1,77 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#ifndef _PERF_H_ +#define _PERF_H_ + +#include +#include +#include + +struct sample_id { + uint64_t id; +}; + +struct perf_record_lost { + struct perf_event_header header; + uint64_t id; + uint64_t lost; + struct sample_id sample_id; +}; + +// A branch trace record in perf_event +struct perf_event_bts { + uint64_t from; + uint32_t pid, tid; + uint64_t to; +}; + +// A perf_event branch trace sample +struct perf_event_bts_sample { + struct perf_event_header header; + struct perf_event_bts bts; +}; + +// mmap()'ing of executable areas +struct perf_event_mmap { + struct perf_event_header header; + uint32_t pid, tid; + uint64_t addr; + uint64_t len; + uint64_t pgoff; + char filename[]; + struct sample_id sample_id; +}; + +// Process exit event +struct perf_event_exit { + struct perf_event_header header; + uint32_t pid, tid; + uint64_t time; + struct sample_id sample_id; +}; + +// Process fork event +struct perf_event_fork { + struct perf_event_header header; + uint32_t pid, ppid; + uint32_t tid, ptid; + uint64_t time; + struct sample_id sample_id; +}; + +int64_t perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, uint64_t flags); +void perf_init(struct perf_event_attr *attr, int mmap_pages); + +#if defined(__i386__) +#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") + +#elif defined(__x86_64) +#define rmb() asm volatile("lfence":::"memory") +#define mb() asm volatile("mfence":::"memory") +#endif + +#endif // _PERF_H_ diff --git a/tracer/common/Makefile b/tracer/common/Makefile new file mode 100644 index 0000000..b8f4cce --- /dev/null +++ b/tracer/common/Makefile @@ -0,0 +1,20 @@ +.PHONY: all clean + +CFLAGS=-Wall -std=c++11 -fPIC +LDFLAGS= + +all: libtracer.a +clean: + -rm $(objs) $(protobuf-files) + +objs = bbtrace.pb.o bbmap.o exception.o serialize.o +protobuf-files = bbtrace.pb.cc bbtrace.pb.h + +libtracer.a: $(objs) + ar -rcs -o $@ $^ $(LDFLAGS) + +%.o: %.cc logging.h + $(CXX) $(CFLAGS) -c -o $@ $< + +bbtrace.pb.h bbtrace.pb.cc: bbtrace.proto + protoc --cpp_out=. $< diff --git a/tracer/common/bbmap.cc b/tracer/common/bbmap.cc new file mode 100644 index 0000000..fefad75 --- /dev/null +++ b/tracer/common/bbmap.cc @@ -0,0 +1,26 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#include "./bbmap.h" + +#include + +void BBMap::AddEdge(target_addr prev, target_addr next) { + bbmap_edge edge(prev, next); + + if (bb_map_.find(edge) == bb_map_.end()) { + bb_map_[edge] = 1; + } else { + bb_map_[edge] += 1; + } +} + +uint32_t BBMap::ComputeHash() const { + uint32_t hash = 0; + for (bbmap_iterator it = map_begin(); it != map_end(); it++) { + bbmap_edge edge = it->first; + hash ^= edge.first ^ edge.second; + } + return hash; +} diff --git a/tracer/common/bbmap.h b/tracer/common/bbmap.h new file mode 100644 index 0000000..5f2cccf --- /dev/null +++ b/tracer/common/bbmap.h @@ -0,0 +1,38 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// +// Execution monitor. +// + +#ifndef _COMMON_BBMAP_H +#define _COMMON_BBMAP_H + +#include +#include + +#include "./common.h" + +typedef std::pair bbmap_edge; +typedef std::map::const_iterator bbmap_iterator; + +class BBMap { + public: + explicit BBMap() {} + + // Record the execution of a CFG edge, identified by a pair of basic block + // addresses + void AddEdge(target_addr prev, target_addr next); + + // Return a 32-bit hash of this basic block map + uint32_t ComputeHash() const; + + // Iterate over the (edge, #hit) pairs + bbmap_iterator map_begin() const { return bb_map_.begin(); } + bbmap_iterator map_end() const { return bb_map_.end(); } + int size() const { return bb_map_.size(); } + + private: + std::map bb_map_; +}; + +#endif // _COMMON_BBMAP_H diff --git a/tracer/common/bbtrace.proto b/tracer/common/bbtrace.proto new file mode 100644 index 0000000..bb9b7f5 --- /dev/null +++ b/tracer/common/bbtrace.proto @@ -0,0 +1,66 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +package bbtrace; + +message TraceHeader { + enum TraceMagic { + TRACE_MAGIC = 0x0b0b0b0b; + } + + required fixed32 magic = 1; + required uint64 timestamp = 2; + required uint32 hash = 3; +} + +message Edge { + required uint64 prev = 1; + required uint64 next = 2; + required uint64 hit = 3; +} + +// Information about "interesting" exceptions observed during program execution +// (these may both be handled by the application, or cause program termination) +message Exception { + enum ExceptionType { + TYPE_UNKNOWN = 0; + TYPE_VIOLATION = 1; + } + + enum ExceptionAccess { + ACCESS_UNKNOWN = 0; + ACCESS_READ = 1; + ACCESS_WRITE = 2; + ACCESS_EXECUTE = 3; + } + + required uint32 tid = 1; + required ExceptionType type = 2 [default = TYPE_UNKNOWN]; + + // Program counter + required uint64 pc = 3; + + // Faulty address + required uint64 faultyaddr = 4; + + // Access type + required ExceptionAccess access = 5 [default = ACCESS_UNKNOWN]; + + // Stack trace + repeated uint64 stacktrace = 6; +} + +// Memory-mapped regions +message MemoryRegion { + required uint64 base = 1; + required uint32 size = 2; + required string name = 3; +} + +message Trace { + required TraceHeader header = 1; + repeated Edge edge = 2; + repeated Exception exception = 3; + repeated MemoryRegion region = 4; +} diff --git a/tracer/common/common.h b/tracer/common/common.h new file mode 100644 index 0000000..9c1e80d --- /dev/null +++ b/tracer/common/common.h @@ -0,0 +1,18 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#ifndef _COMMON_COMMON_H +#define _COMMON_COMMON_H + +#include "cstdint" + +#include "./logging.h" + +#if __x86_64__ +typedef uint64_t target_addr; +#else +typedef uint32_t target_addr; +#endif + +#endif // _COMMON_COMMON_H diff --git a/tracer/common/exception.cc b/tracer/common/exception.cc new file mode 100644 index 0000000..d4a7b26 --- /dev/null +++ b/tracer/common/exception.cc @@ -0,0 +1,11 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#include "./exception.h" + +Exception::Exception(int tid, ExceptionType type, target_addr pc, + target_addr faulty_addr, int faulty_type) + : tid_(tid), type_(type), pc_(pc), faulty_addr_(faulty_addr), + faulty_type_(faulty_type) { +} diff --git a/tracer/common/exception.h b/tracer/common/exception.h new file mode 100644 index 0000000..7635226 --- /dev/null +++ b/tracer/common/exception.h @@ -0,0 +1,61 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// +// OS-independent exception handling. +// + +#ifndef _COMMON_EXCEPTION_H +#define _COMMON_EXCEPTION_H + +#include +#include + +#include "./common.h" + +typedef std::vector::const_iterator stacktrace_iterator; + +enum ExceptionType { + ExceptionUnknown = 0, + ExceptionAccessViolation = 1, +}; + +enum ExceptionFaultyType { + ExceptionFaultyUnknown = 0, + ExceptionFaultyRead = 1, + ExceptionFaultyWrite = 2, + ExceptionFaultyExecute = 3, +}; + +class Exception { + public: + explicit Exception(int tid, ExceptionType type, target_addr pc, + target_addr faulty_addr, int faulty_type); + + // Getters + int tid() const { return tid_; } + ExceptionType type() const { return type_; } + target_addr pc() const { return pc_; } + target_addr faulty_addr() const { return faulty_addr_; } + int faulty_type() const { return faulty_type_; } + + // Push a new entry to the stack trace + void stacktrace_push(target_addr addr) { + stacktrace_.push_back(addr); + } + + stacktrace_iterator stacktrace_begin() const { return stacktrace_.begin(); } + stacktrace_iterator stacktrace_end() const { return stacktrace_.end(); } + + private: + int tid_; // Thread ID + ExceptionType type_; // Exception type + target_addr pc_; // Program counter + target_addr faulty_addr_; // Faulty address + int faulty_type_; // Type of faulty access + std::vector stacktrace_; // Stack trace (list of retaddr) +}; + +typedef std::vector > exceptions_list; +typedef exceptions_list::const_iterator exceptions_iterator; + +#endif // _COMMON_EXCEPTION_H diff --git a/tracer/common/logging.h b/tracer/common/logging.h new file mode 100644 index 0000000..e08f8a2 --- /dev/null +++ b/tracer/common/logging.h @@ -0,0 +1,29 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#ifndef _COMMON_LOGGING_H_ +#define _COMMON_LOGGING_H_ + +#include +#include + +// #define DEBUG_MODE + +#define _msg(tag, fmt, ...) \ + fprintf(stderr, "[" tag "] " fmt "\n", ## __VA_ARGS__) + +#ifdef DEBUG_MODE +#define LOG_DEBUG(fmt, ...) _msg("D", fmt, ## __VA_ARGS__) +#else +#define LOG_DEBUG(fmt, ...) +#endif + +#define LOG_INFO(fmt, ...) _msg("*", fmt, ## __VA_ARGS__) +#define LOG_WARN(fmt, ...) _msg("!", fmt, ## __VA_ARGS__) +#define LOG_FATAL(fmt, ...) { \ + _msg("#", fmt, ## __VA_ARGS__); \ + exit(-1); \ + } + +#endif // _COMMON_LOGGING_H_ diff --git a/tracer/common/serialize.cc b/tracer/common/serialize.cc new file mode 100644 index 0000000..1944db9 --- /dev/null +++ b/tracer/common/serialize.cc @@ -0,0 +1,104 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#include +#include + +#include "./bbtrace.pb.h" +#include "./serialize.h" + +// Populate a protobuf Edge object +static inline void serialize_populate_edge(bbtrace::Edge *output, + const bbmap_edge &edge, + unsigned int hit) { + output->set_prev(edge.first); + output->set_next(edge.second); + output->set_hit(hit); +} + +// Populate a protobuf Exception object +static inline void +serialize_populate_exception(bbtrace::Exception *output, + const std::shared_ptr &exc) { + output->set_tid(exc->tid()); + output->set_pc(exc->pc()); + output->set_faultyaddr(exc->faulty_addr()); + + // Set exception type + switch (exc->type()) { + case ExceptionAccessViolation: + output->set_type(bbtrace::Exception::TYPE_VIOLATION); + break; + default: + output->set_type(bbtrace::Exception::TYPE_UNKNOWN); + break; + } + + // Set faulty access type + switch (exc->faulty_type()) { + case ExceptionFaultyRead: + output->set_access(bbtrace::Exception::ACCESS_READ); + break; + case ExceptionFaultyWrite: + output->set_access(bbtrace::Exception::ACCESS_WRITE); + break; + case ExceptionFaultyExecute: + output->set_access(bbtrace::Exception::ACCESS_EXECUTE); + break; + default: + output->set_access(bbtrace::Exception::ACCESS_UNKNOWN); + break; + } + + // Stack trace + for (stacktrace_iterator it_stack = exc->stacktrace_begin(); + it_stack != exc->stacktrace_end(); it_stack++) { + output->add_stacktrace(*it_stack); + } +} + +// Populate a protobuf Exception object +static inline void +serialize_populate_region(bbtrace::MemoryRegion *output, + const MemoryRegion ®ion) { + output->set_base(region.base); + output->set_size(region.size); + output->set_name(region.filename); +} + +void serialize_trace(const std::string &filename, + const ExecutionTrace &execution_trace) { + bbtrace::Trace trace; + + // Create the trace header + bbtrace::TraceHeader *header = trace.mutable_header(); + header->set_magic(bbtrace::TraceHeader::TRACE_MAGIC); + header->set_timestamp(time(NULL)); + header->set_hash(execution_trace.basic_blocks.ComputeHash()); + + // Output basic block information + for (bbmap_iterator it = execution_trace.basic_blocks.map_begin(); + it != execution_trace.basic_blocks.map_end(); it++) { + bbtrace::Edge *edge = trace.add_edge(); + serialize_populate_edge(edge, it->first, it->second); + } + + // Output recorded exceptions + for (exceptions_iterator it = execution_trace.exceptions.begin(); + it != execution_trace.exceptions.end(); it++) { + bbtrace::Exception *exception = trace.add_exception(); + serialize_populate_exception(exception, *it); + } + + // Output memory-mapped regions + for (auto it = execution_trace.memory_regions.begin(); + it != execution_trace.memory_regions.end(); it++) { + bbtrace::MemoryRegion *region = trace.add_region(); + serialize_populate_region(region, *it); + } + + std::fstream outstream(filename.c_str(), + std::ios::out | std::ios::trunc | std::ios::binary); + trace.SerializeToOstream(&outstream); +} diff --git a/tracer/common/serialize.h b/tracer/common/serialize.h new file mode 100644 index 0000000..96c0b0a --- /dev/null +++ b/tracer/common/serialize.h @@ -0,0 +1,37 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// +// Serialize trace data to file. +// + +#ifndef _COMMON_SERIALIZE_H +#define _COMMON_SERIALIZE_H + +#include +#include + +#include "./common.h" +#include "./bbmap.h" +#include "./exception.h" + +typedef struct { + target_addr base; + unsigned int size; + std::string filename; +} MemoryRegion; + +typedef struct { + // Map of basic block edges + BBMap basic_blocks; + + // Tracked exceptions + exceptions_list exceptions; + + // Mapped memory regions + std::vector memory_regions; +} ExecutionTrace; + +void serialize_trace(const std::string &filename, + const ExecutionTrace &execution_trace); + +#endif // _COMMON_SERIALIZE_H diff --git a/tracer/pin/images.H b/tracer/pin/images.H new file mode 100644 index 0000000..db3a7d8 --- /dev/null +++ b/tracer/pin/images.H @@ -0,0 +1,15 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#ifndef _IMAGES_H +#define _IMAGES_H + +#include "pin.H" + +BOOL Images_IsInterestingAddress(ADDRINT addr); + +// Image loading callback +VOID Images_CallbackNewImage(IMG img, VOID *v); + +#endif // _IMAGES_H diff --git a/tracer/pin/images.cpp b/tracer/pin/images.cpp new file mode 100644 index 0000000..bff3021 --- /dev/null +++ b/tracer/pin/images.cpp @@ -0,0 +1,69 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#include +#include +#include +#include + +#include "images.H" +#include "common/serialize.h" + +extern ExecutionTrace gbl_execution_trace; + +// Blacklisted image names +static const std::string gbl_bad_images[] = { +#if 0 + "/lib/x86_64-linux-gnu/libacl.so.1", + "/lib/x86_64-linux-gnu/libattr.so.1", + "/lib/x86_64-linux-gnu/libc.so.6", + "/lib/x86_64-linux-gnu/libdl.so.2", + "/lib/x86_64-linux-gnu/libpcre.so.3", + "/lib/x86_64-linux-gnu/libselinux.so.1", + "/lib64/ld-linux-x86-64.so.2", +#endif +}; + +// Vector of std::string for blacklisted memory images +static const std::vector gbl_img_blacklist( + gbl_bad_images, + gbl_bad_images + sizeof(gbl_bad_images) / sizeof(gbl_bad_images[0]) +); + +// Lower/upper bounds of active images (i.e., those we are interested to trace) +static std::vector > gbl_img_active; + +BOOL Images_IsInterestingAddress(ADDRINT addr) { + BOOL b = FALSE; + for (std::vector >::iterator it = + gbl_img_active.begin(); + it != gbl_img_active.end(); + it++) { + if (addr >= it->first && addr <= it->second) { + b = TRUE; + break; + } + } + return b; +} + +VOID Images_CallbackNewImage(IMG img, VOID *v) { + // Check if the image is blacklisted + const std::string name = IMG_Name(img); + if (std::find(gbl_img_blacklist.begin(), gbl_img_blacklist.end(), name) + != gbl_img_blacklist.end()) { + return; + } + + // Otherwise, add the image address range to the global "gbl_img_active" + // vector, to keep track of which instructions must be processed + std::pair range(IMG_LowAddress(img), IMG_HighAddress(img)); + gbl_img_active.push_back(range); + + MemoryRegion region; + region.base = IMG_LowAddress(img); + region.size = IMG_HighAddress(img) - IMG_LowAddress(img); + region.filename = IMG_Name(img); + gbl_execution_trace.memory_regions.push_back(region); +} diff --git a/tracer/pin/linux_exception.H b/tracer/pin/linux_exception.H new file mode 100644 index 0000000..a0e7063 --- /dev/null +++ b/tracer/pin/linux_exception.H @@ -0,0 +1,16 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// +// Exception handling, Linux version. +// + +#ifndef _LINUX_EXCEPTION_H +#define _LINUX_EXCEPTION_H + +#include "pin.H" +#include "common/exception.h" + +Exception* Linux_BuildException(THREADID tid, INT32 sig, + const EXCEPTION_INFO *pExceptInfo); + +#endif // _LINUX_EXCEPTION_H diff --git a/tracer/pin/linux_exception.cpp b/tracer/pin/linux_exception.cpp new file mode 100644 index 0000000..ca7ef2c --- /dev/null +++ b/tracer/pin/linux_exception.cpp @@ -0,0 +1,31 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// + +#include +#include + +#include "linux_exception.H" + +#include "exception.H" + +Exception* Linux_BuildException(THREADID tid, INT32 sig, + const EXCEPTION_INFO *pExceptInfo) { + assert(sig == SIGSEGV); + assert(pExceptInfo != NULL); + assert(PIN_GetExceptionClass(PIN_GetExceptionCode(pExceptInfo)) == + EXCEPTCLASS_ACCESS_FAULT); + + ADDRINT pc = PIN_GetExceptionAddress(pExceptInfo); + + // TODO(roby): check PIN_GetFaultyAccessAddress() return value + ADDRINT faulty_addr = 0; + PIN_GetFaultyAccessAddress(pExceptInfo, &faulty_addr); + + FAULTY_ACCESS_TYPE faulty_type = PIN_GetFaultyAccessType(pExceptInfo); + + Exception *exception = + new Exception(tid, ExceptionAccessViolation, pc, faulty_addr, faulty_type); + + return exception; +} diff --git a/tracer/pin/makefile b/tracer/pin/makefile new file mode 100644 index 0000000..da26698 --- /dev/null +++ b/tracer/pin/makefile @@ -0,0 +1,21 @@ +############################################################## +# +# DO NOT EDIT THIS FILE! +# +############################################################## + +# If the tool is built out of the kit, PIN_ROOT must be specified in the make invocation and point to the kit root. +ifdef PIN_ROOT +CONFIG_ROOT := $(PIN_ROOT)/source/tools/Config +else +CONFIG_ROOT := ../Config +endif +include $(CONFIG_ROOT)/makefile.config +include makefile.rules +include $(TOOLS_ROOT)/Config/makefile.default.rules + +############################################################## +# +# DO NOT EDIT THIS FILE! +# +############################################################## diff --git a/tracer/pin/makefile.rules b/tracer/pin/makefile.rules new file mode 100644 index 0000000..9fe26c3 --- /dev/null +++ b/tracer/pin/makefile.rules @@ -0,0 +1,35 @@ +TEST_TOOL_ROOTS := +TEST_ROOTS := +SANITY_SUBSET := +TOOL_ROOTS := pintrace +SA_TOOL_ROOTS := +APP_ROOTS := +OBJECT_ROOTS := +DLL_ROOTS := +LIB_ROOTS := + +TOOL_CXXFLAGS += -I../ -std=c++11 +TOOL_LPATHS += -L../common/ +TOOL_LIBS += -lprotobuf -ltracer +TOOL_OBJS = \ + $(OBJDIR)pintrace$(OBJ_SUFFIX) \ + $(OBJDIR)images$(OBJ_SUFFIX) + +# Windows-specific objects +ifeq ($(TARGET_OS),windows) + TOOL_OBJS += $(OBJDIR)win_exception$(OBJ_SUFFIX) +endif + +# Linux-specific objects +ifeq ($(TARGET_OS),linux) + TOOL_OBJS += $(OBJDIR)linux_exception$(OBJ_SUFFIX) +endif + +libtracer=../common/libtracer.a + +.PHONY: $(libtracer) +$(libtracer): + @$(MAKE) -C $(dir $(libtracer)) + +$(OBJDIR)pintrace$(PINTOOL_SUFFIX): $(libtracer) $(TOOL_OBJS) + $(LINKER) $(TOOL_LDFLAGS) $(LINK_EXE)$@ $(TOOL_OBJS) $(TOOL_LPATHS) $(TOOL_LIBS) diff --git a/tracer/pin/pintrace.cpp b/tracer/pin/pintrace.cpp new file mode 100644 index 0000000..ac08e09 --- /dev/null +++ b/tracer/pin/pintrace.cpp @@ -0,0 +1,157 @@ +// +// Copyright 2015, Roberto Paleari (@rpaleari) and Aristide Fattori (@joystick) +// +// Basic BB tracer. +// + +#include +#include +#include +#include +#include + +#include "pin.H" + +#include "common/bbmap.h" +#include "common/exception.h" +#include "common/serialize.h" +#include "images.H" + +#ifdef _WIN32 +// Windows +#include "win_exception.H" +#else +// Linux +#include +#include "linux_exception.H" +#endif + +static const std::string DEFAULT_OUTFILE = "/dev/shm/trace.bin"; +static const int MAX_STACKTRACE_SIZE = 16; + +// Globals +ExecutionTrace gbl_execution_trace; + +// Map to keep track of last observed basic block, for each application thread +static std::map gbl_last_bb; + +// Command line switches +KNOB gbl_outfile(KNOB_MODE_WRITEONCE, "pintool", "f", DEFAULT_OUTFILE, + "specify file name for FuzzTrace output"); + +// Print help message +INT32 Usage() { + cerr << KNOB_BASE::StringKnobSummary() << endl; + return -1; +} + +static void build_stacktrace(const CONTEXT *context, Exception *exc) { + // Generate a stack trace for this process + ADDRINT retaddr; + ADDRINT fp = PIN_GetContextReg(context, REG_GBP); + + for (int i = 0; i < MAX_STACKTRACE_SIZE; i++) { + // Read the return address of current stack frame + size_t nbytes = + PIN_SafeCopy(&retaddr, + reinterpret_cast(fp + sizeof(ADDRINT)), + sizeof(ADDRINT)); + + if (nbytes != sizeof(ADDRINT)) { + break; + } + + // Save this return address + exc->stacktrace_push(retaddr); + // printf("ret%d @0x%lx -> 0x%lx\n", i, fp+sizeof(ADDRINT), retaddr); + + // Read the address of the next stack frame + nbytes = + PIN_SafeCopy(&fp, reinterpret_cast(fp), sizeof(ADDRINT)); + + if (nbytes != sizeof(ADDRINT)) { + break; + } + } +} + +// Instrumentation callback +VOID CallbackTrace(TRACE trace, VOID *v) { + // Visit every basic block in the trace + for (BBL bbl = TRACE_BblHead(trace); BBL_Valid(bbl); bbl = BBL_Next(bbl)) { + ADDRINT bb_current = BBL_Address(bbl); + + if (!Images_IsInterestingAddress(bb_current)) { + continue; + } + + // Compute the (bb_previous, bb_current) pair, that determines the brach + // that has been taken by the application + THREADID tid = PIN_ThreadId(); + + // If we have no previous BB, skip + if (gbl_last_bb.find(tid) != gbl_last_bb.end()) { + ADDRINT bb_previous = gbl_last_bb[tid]; + gbl_execution_trace.basic_blocks.AddEdge(bb_previous, bb_current); + } + + gbl_last_bb[tid] = bb_current; + } +} + +#ifdef _WIN32 +// Windows-only +VOID CallbackContextChangeWindows(THREADID tid, CONTEXT_CHANGE_REASON reason, + const CONTEXT *from, CONTEXT *to, INT32 info, + VOID *v) { + if (reason != CONTEXT_CHANGE_REASON_EXCEPTION) { + return; + } +} +#else +// Linux-only +BOOL CallbackSignalLinux(THREADID tid, INT32 sig, CONTEXT *ctxt, + BOOL hasHandler, const EXCEPTION_INFO *pExceptInfo, + VOID *v) { + Exception *exception = Linux_BuildException(tid, sig, pExceptInfo); + build_stacktrace(ctxt, exception); + gbl_execution_trace.exceptions.push_back(std::shared_ptr + (exception)); + + // Always pass the exception to the application + return TRUE; +} +#endif + +// Termination callback +VOID CallbackFini(INT32 code, VOID *v) { + string filename = gbl_outfile.Value(); + + serialize_trace(filename, gbl_execution_trace); +} + +int main(int argc, char **argv) { + // Initialize PIN library and check command-line + if (PIN_Init(argc, argv)) { + return Usage(); + } + + IMG_AddInstrumentFunction(Images_CallbackNewImage, 0); + TRACE_AddInstrumentFunction(CallbackTrace, 0); + +#ifdef _WIN32 + // Callback for context changes (e.g., exceptions). Windows only. + PIN_AddContextChangeFunction(CallbackContextChangeWindows, 0); +#else + // Callback for SEGVs. Linux only. + PIN_InterceptSignal(SIGSEGV, CallbackSignalLinux, 0); +#endif + + // Callback for application termination + PIN_AddFiniFunction(CallbackFini, 0); + + // Start the program, never returns + PIN_StartProgram(); + + return 0; +} diff --git a/viewer/bbtrace.proto b/viewer/bbtrace.proto new file mode 120000 index 0000000..228b679 --- /dev/null +++ b/viewer/bbtrace.proto @@ -0,0 +1 @@ +../tracer/common/bbtrace.proto \ No newline at end of file diff --git a/viewer/trace.py b/viewer/trace.py new file mode 100644 index 0000000..c00b129 --- /dev/null +++ b/viewer/trace.py @@ -0,0 +1,264 @@ +""" +Execution trace object. + +Copyright 2014, Roberto Paleari (@rpaleari) +""" + +import datetime +import hashlib +import logging +import os +import struct + +import bbtrace_pb2 + +class MemoryRegion(object): + """Mapped memory region, possibly associated to a filename.""" + def __init__(self, obj): + self.base = obj.base + self.size = obj.size + self.name = obj.name + + def get_lower(self): + return self.base + + def get_upper(self): + return self.base + self.size - 1 + + def get_name(self): + return self.name + + def has(self, addr): + """Check if the specified address belongs to this region.""" + return self.get_lower() <= addr <= self.get_upper() + +class CrashException(object): + """Exception observed during program execution.""" + def __init__(self, obj, timestamp): + """Constructor for the CrashException class.""" + self.timestamp = timestamp + self.exc_type = obj.type + self.exc_pc = obj.pc + self.faultyaddr = obj.faultyaddr + self.access = obj.access + self.hashz = None + + # Update hash value for this exception + self.__update_hash() + assert self.hashz is not None + + def is_valid(self): + """Return True if this exception object is valid.""" + if self.hashz is None: + return False + + return True + + def __update_hash(self): + """Update the hash value that uniquely characterizes this object.""" + md5 = hashlib.md5() + md5.update("%d" % self.exc_type) + md5.update("%x" % self.exc_pc) + md5.update("%x" % self.faultyaddr) + md5.update("%d" % self.access) + self.hashz = md5.hexdigest() + + def __str__(self): + s = "type: %d, pc: 0x%x, addr: 0x%x, access: %d" % ( + self.exc_type, self.exc_pc, self.faultyaddr, self.access) + return s + +class ExecutionTrace(object): + """Single execution of a target process.""" + + MAP_EXCEPTION_TYPE = { + bbtrace_pb2.Exception.TYPE_UNKNOWN: "unknown", + bbtrace_pb2.Exception.TYPE_VIOLATION: "violation", + } + + MAP_EXCEPTION_ACCESS = { + bbtrace_pb2.Exception.ACCESS_UNKNOWN: "unknown", + bbtrace_pb2.Exception.ACCESS_READ: "read", + bbtrace_pb2.Exception.ACCESS_WRITE: "write", + bbtrace_pb2.Exception.ACCESS_EXECUTE: "execute", + } + + def __init__(self, cmdline, tracefile, inputdata = None, deps = None): + """Constructor for the ExecutionTrace class. + + Keyword arguments: + cmdline -- process command line (list of arguments). + tracefile -- name of the trace file (must exist). + inputdata -- data feed to the program via stdin (can be None). + deps -- other file names this execution depends on (can be None). + """ + self.cmdline = cmdline + self.inputdata = inputdata + self.deps = set() + + # Ensure all dependecies actually exist + if deps is not None: + assert all([os.path.exists(x) for x in deps]) + self.deps |= deps + + # Read data + self.tracefile = tracefile + f = open(tracefile, "rb") + data = f.read() + f.close() + + # Parse object + obj = bbtrace_pb2.Trace() + obj.ParseFromString(data) + assert obj.header.magic == bbtrace_pb2.TraceHeader.TRACE_MAGIC + + self.timestamp = datetime.datetime.fromtimestamp(obj.header.timestamp) + self.hashz = "%x" % obj.header.hash + + # Prepare the list of CFG edges observed in this execution + # NOTE: We ignore the execution order of CFG edges + self.edges = list(set([(e.prev, e.next, e.hit) for e in obj.edge])) + self.edges.sort() + + # Create the list of CrashException object, representing exceptions + # risen during this execution + self.exceptions = [] + for e in obj.exception: + exception = CrashException(e, self.timestamp) + self.exceptions.append(exception) + + # Parse memory-mapped regions + self.regions = [] + for m in obj.region: + region = MemoryRegion(m) + self.regions.append(region) + self.regions.sort(cmp=lambda a,b: cmp(a.base, b.base)) + + @staticmethod + def exception_type_str(n): + return ExecutionTrace.MAP_EXCEPTION_TYPE.get(n) + + @staticmethod + def exception_access_str(n): + return ExecutionTrace.MAP_EXCEPTION_ACCESS.get(n) + + def purge(self): + """Deletes all the dependency files of this execution trace.""" + if os.path.isfile(self.tracefile): + logging.debug("Removing trace file %s", self.tracefile) + os.unlink(self.tracefile) + + for dep in self.deps: + logging.debug("Removing dependency file %s", dep) + os.unlink(dep) + + def is_valid(self): + """Return True if this execution trace is valid.""" + if not os.path.isfile(self.tracefile): + return False + + for dep in self.deps: + if not os.path.isfile(dep): + return False + + return True + + def update_hash(self): + """Re-compute the hash value for this execution trace.""" + # Compute the hash as MD5 of CFG edges + md5 = hashlib.md5() + for bb_prev, bb_next, hit in self.edges: + md5.update(struct.pack("L", bb_prev)) + md5.update(struct.pack("L", bb_next)) + self.hashz = md5.hexdigest() + + def __str__(self): + """Return a concise string representation of this execution trace.""" + s = ("[%s] cmd: %s, data: %d bytes, time: %s, " + "hash: %s, edges(s): %d, exception(s): %d" % + (self.tracefile, " ".join(self.cmdline), + len(self.inputdata) if self.inputdata is not None else 0, + self.timestamp, self.hashz, len(self.edges), + len(self.exceptions))) + return s + + def pretty_print(self): + """ + Print this Trace object to standard output. + + Compared to __str__(), this method provides a more verbose + representation of the object. + """ + + print "#### Trace '%s' ####" % self.tracefile + print trace + print + + print " - CFG edges" + for e_prev, e_next, e_hit in trace.edges: + print " [%08x -> %08x] %d hit" % (e_prev, e_next, e_hit) + + if len(trace.exceptions) > 0: + print + print " - Exceptions" + for exc in trace.exceptions: + print (" type: %s, pc: 0x%08x, faultyaddr: 0x%08x, " + "access: %s" % ( + ExecutionTrace.exception_type_str(exc.exc_type), + exc.exc_pc, exc.faultyaddr, + ExecutionTrace.exception_access_str(exc.access))) + + if len(trace.regions) > 0: + print + print " - Memory regions" + for region in trace.regions: + print " [0x%08x, 0x%08x] %s" % (region.get_lower(), + region.get_upper(), + region.get_name()) + + +if __name__ == "__main__": + # Parse an execution trace and print it out, optionally performing a "diff" + # between two traces + import argparse + import sys + + # Parse command-line arguments + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("-d", "--diff", default=False, action="store_true", + help="perform a diff between two trace files") + parser.add_argument("tracefiles", metavar="TRACE", nargs="+", + help="trace files") + args = parser.parse_args() + + # Parse trace files + traces = [] + for filename in args.tracefiles: + trace = ExecutionTrace(["test",], filename) + traces.append(trace) + + if args.diff: + assert len(traces) == 2 + print ("#### Comparing %s vs %s ####" % + (traces[0].tracefile, traces[1].tracefile)) + print + + edge0 = set([(e_prev, e_next) for e_prev, e_next, _ in traces[0].edges]) + edge1 = set([(e_prev, e_next) for e_prev, e_next, _ in traces[1].edges]) + + for filename, setz in ( + (traces[0].tracefile, edge0 - edge1), + (traces[1].tracefile, edge1 - edge0), + ): + print "- Only in %s, %d edge(s)" % (filename, len(setz)) + edges = list(setz) + edges.sort() + for edge_prev, edge_next in edges: + print "- 0x%08x -> 0x%08x" % (edge_prev, edge_next) + print + + else: + # Just print traces to stdout + for trace in traces: + trace.pretty_print()