diff --git a/README.md b/README.md index 5e7468f..f5185f8 100755 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ python3 run_afl.py ``` In another terminal (QSYM stuffs) ```bash -python3 main.py +python3 digfuzz.py ``` diff --git a/config.py b/config.py index 26eead1..8ba1a99 100644 --- a/config.py +++ b/config.py @@ -1,27 +1,13 @@ +import os + import pwn # Target to fuzz OBJ_PATH = "test.c.o" -# QSYM Remote setup -QSYM_HOST = '54.245.74.219' -QSYM_UN = 'ubuntu' -QSYM_KEYFILE = "./seem-priv-key.PEM" -QSYM_SSH_CONN = pwn.ssh(host=QSYM_HOST, user=QSYM_UN, keyfile=QSYM_KEYFILE) -PIN_SH = "/home/ubuntu/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh" # the location of qsym script remote -QSYM_OBJECT_PATH = "/home/ubuntu/qsym/qsym/pintool/obj-intel64/libqsym.so" # the location of qsym pin obj remote - # AFL slave count AFL_NUM_SLAVE = 1 -# NO NEED TO CHANGE IF USING run_afl.py -AFL_FUZZ_PATH = "./AFLplusplus/afl-fuzz" -AFL_IN_PATH = "./in" -AFL_OUT_PATH = "./out" -AFL_SLAVE_NAME = "s" -AFL_MASTER_NAME = "m" -AFL_CORPUS_PATH = f"{AFL_OUT_PATH}/{AFL_MASTER_NAME}/queue" # the directory of the afl master corpus - # NO NEED TO CHANGE LOCAL_UNINSTRUMENTED_EXEC_PATH = "./harness" REMOTE_UNINSTRUMENTED_EXEC_PATH = "/tmp/harness" @@ -29,7 +15,34 @@ QEMU_BIN = "./qemu_stdout" DUMPER_PATH = "./dumper" -SHM_KEY = f"/{OBJ_PATH}.shm" +SHM_KEY = f"tuba.shm" QEMU_TIMEOUT = 30 QSYM_TIMEOUT = 30 + +# NO NEED TO CHANGE IF USING run_afl.py +AFL_FUZZ_PATH = "./AFLplusplus/afl-fuzz" +AFL_IN_PATH = "./in" +AFL_OUT_PATH = "./out" +AFL_SLAVE_NAME = "s" +AFL_MASTER_NAME = "m" +AFL_CORPUS_PATH = f"{AFL_OUT_PATH}/{AFL_MASTER_NAME}/queue" # the directory of the afl master corpus + +# QSYM Remote setup +USE_SSH = False +if not USE_SSH: + os.system(f"mkdir /tmp/digfuzz && cp {LOCAL_UNINSTRUMENTED_EXEC_PATH} /tmp/digfuzz/harness") + PIN_SH = "/workdir/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh" # the location of qsym script remote + QSYM_OBJECT_PATH = "/workdir/qsym/qsym/pintool/obj-intel64/libqsym.so" # the location of qsym pin obj remote + QSYM_IMAGE_NAME = "qsym" + QSYM_CMD = ["docker", "-v", "/tmp/digfuzz:/tmp/digfuzz", QSYM_IMAGE_NAME] + +else: + QSYM_HOST = '54.245.74.219' + QSYM_UN = 'ubuntu' + QSYM_KEYFILE = "./seem-priv-key.PEM" + QSYM_SSH_CONN = pwn.ssh(host=QSYM_HOST, user=QSYM_UN, keyfile=QSYM_KEYFILE) + PIN_SH = "/home/ubuntu/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh" # the location of qsym script remote + QSYM_OBJECT_PATH = "/home/ubuntu/qsym/qsym/pintool/obj-intel64/libqsym.so" # the location of qsym pin obj remote + + diff --git a/main.py b/digfuzz.py similarity index 87% rename from main.py rename to digfuzz.py index 070ddc4..9859d29 100755 --- a/main.py +++ b/digfuzz.py @@ -14,12 +14,12 @@ def add_input_to_afl_queue(content): if not content: return global added_counter - with open("%s/id:%6d,src:digfuzz" % (config.AFL_CORPUS_PATH, added_counter), "wb+") as fp: + with open("%s/id:%6d,sync:digfuzz,src:000000" % (config.AFL_CORPUS_PATH, added_counter), "wb+") as fp: fp.write(content) added_counter += 1 -added_counter = int(1e6) +added_counter = int(1e5) utils.copy_file_to_qsym_host(config.LOCAL_UNINSTRUMENTED_EXEC_PATH, config.REMOTE_UNINSTRUMENTED_EXEC_PATH) utils.qsym_host_provide_permission(config.REMOTE_UNINSTRUMENTED_EXEC_PATH) @@ -42,14 +42,14 @@ def get_new_testcase_filenames(): while 1: qemu.build_execution_tree(get_new_testcase_filenames()) - qemu.dump_execution_tree() + # qemu.dump_execution_tree() paths = qemu.get_sorted_missed_path() solving_path = random.choice(paths) print(f"Solving for path {solving_path['flip']} with prob {solving_path['prob']}") testcase_content = open(solving_path["fn"], "rb").read() for solution in qsym.flip_it(testcase_content, solving_path["flip"], + nth=solving_path["nth"], qemu_instr_obj=qemu, testcase_fn=solving_path["fn"]): add_input_to_afl_queue(solution) - time.sleep(5) # allow fuzzer to sync corpus print("Round done") diff --git a/instr_interface.py b/instr_interface.py index 3290074..30e2f80 100644 --- a/instr_interface.py +++ b/instr_interface.py @@ -1,18 +1,19 @@ import abc import json +import angr # Exec Tree class Node: - left = None - right = None - addr = 0 - left_prob = -1 - right_prob = -1 - is_comp = False - visit_count = 1 - addr_range = None - led_by = "" + def __init__(self): + self.children = set() + self.children_prob = [] + self.max_encounter_child = {} + self.addr = 0 + self.is_comp = False + self.visit_count = 1 + self.addr_range = None + self.led_by = "" @staticmethod def to_addr(node): @@ -20,23 +21,39 @@ def to_addr(node): return node.addr return 0 + def __hash__(self): + return self.addr + def __str__(self): - return f"left: {hex(self.to_addr(self.left))}; " \ - f"right: {hex(self.to_addr(self.right))}; " \ - f"comp: {self.is_comp}; " \ + return f"comp: {self.is_comp}; " \ f"vc: {self.visit_count}; " \ - f"left_prob: {self.left_prob}; " \ - f"right_prob: {self.right_prob};" \ + f"children: {self.children}; " \ + f"prob: {self.children_prob};" \ f"led_by: {self.led_by}" \ f"addr_range: {hex(self.addr_range[0])} - {hex(self.addr_range[1])}" +class UnknownNode(Node): + pass + + class Instrumentation(abc.ABC): def __init__(self, executor): self.executor = executor self.execution_tree = {} # addr -> Node self.corpus_traces = {} + self.dfs_visited_nodes = set() self.unsolvable = set() + self.solved = set() + self.basic_block = {} # BB start => size + self.__get_basic_block_size() + + def __get_basic_block_size(self): + p = angr.Project(self.executor.uninstrumented_path, load_options={'auto_load_libs': False}) + cfg = p.analyses.CFGFast() + for key in cfg.kb.functions: + for bb in cfg.kb.functions[key].blocks: + self.basic_block[bb.addr] = bb.size def build_execution_tree(self, new_testcase_filenames: [str]): pass @@ -44,80 +61,76 @@ def build_execution_tree(self, new_testcase_filenames: [str]): def dump_execution_tree(self): print(json.dumps({hex(x): str(self.execution_tree[x]) for x in self.execution_tree}, sort_keys=True, indent=4)) - def __dfs_helper(self, current_node_addr, visited_nodes): - if current_node_addr in visited_nodes: - return - visited_nodes.add(current_node_addr) - current_node = self.execution_tree[current_node_addr] - - left_node = self.execution_tree[current_node_addr].left - right_node = self.execution_tree[current_node_addr].right - should_assign_prob = current_node.is_comp - sum_of_children = 1 # prevent div by 0, todo: this causes left + right != 1 - - if left_node is not None: - self.__dfs_helper(left_node.addr, visited_nodes) - sum_of_children += left_node.visit_count - - if right_node is not None: - self.__dfs_helper(right_node.addr, visited_nodes) - sum_of_children += right_node.visit_count + def assign_prob(self): + for addr, current_node in self.execution_tree.items(): + should_assign_prob = current_node.is_comp + sum_of_children = 1 # prevent div by 0, todo: this causes left + right != 1 - if left_node is not None: - current_node.left_prob = left_node.visit_count / sum_of_children - else: - current_node.left_prob = 3 / sum_of_children + for child_node_addr in current_node.children: + child_node = self.execution_tree[child_node_addr] + sum_of_children += child_node.visit_count - if right_node is not None: - current_node.right_prob = right_node.visit_count / sum_of_children - else: - current_node.right_prob = 3 / sum_of_children + for child_node_addr in current_node.children: + child_node = self.execution_tree[child_node_addr] + current_node.children_prob.append(child_node.visit_count / sum_of_children) - if not should_assign_prob or sum_of_children < 30: - current_node.left_prob = 1 - current_node.right_prob = 1 + while len(current_node.children_prob) < 2: + current_node.children_prob.append(3 / sum_of_children) - def assign_prob(self): - self.__dfs_helper(next(iter(self.execution_tree)), set()) + if not should_assign_prob or sum_of_children < 30: + current_node.children_prob = [1.0 for _ in range(len(current_node.children_prob))] def __get_prob(self, parent, child): parent_node = self.execution_tree[parent] - child_node = self.execution_tree[child] - if parent_node.left and parent_node.left == child_node: - return parent_node.left_prob - if parent_node.right and parent_node.right == child_node: - return parent_node.right_prob + child_node_addr = self.execution_tree[child].addr + for k, _child_addr in enumerate(parent_node.children): + if _child_addr == child_node_addr: + return parent_node.children_prob[k] print(f"[Exec] {parent} {child} not in execution tree") assert False - def __is_branch_missed(self, parent): - parent_node = self.execution_tree[parent] - return parent_node.right is None and parent_node.is_comp + def __is_branch_missed(self, parent_addr, child_addr, nth=0): + hit_count = nth + 1 + parent_node = self.execution_tree[parent_addr] + return ( + len(parent_node.children) < 2 + or hit_count not in parent_node.max_encounter_child[child_addr] + ) and parent_node.is_comp + + def __should_i_solve(self, testcase_fn, flip_pcs, nth=0): + return ((testcase_fn, flip_pcs[0], flip_pcs[1], nth) not in self.unsolvable) and \ + ((testcase_fn, flip_pcs[0], flip_pcs[1], nth) not in self.solved) - def __is_unsolvable(self, testcase_fn, flip_pcs): - return (testcase_fn, flip_pcs[0], flip_pcs[1]) in self.unsolvable + def add_unsolvable_path(self, testcase_fn, flip_pcs, nth=0): + self.unsolvable.add((testcase_fn, flip_pcs[0], flip_pcs[1], nth)) - def add_unsolvable_path(self, testcase_fn, flip_pcs): - self.unsolvable.add((testcase_fn, flip_pcs[0], flip_pcs[1])) + def add_solved_path(self, testcase_fn, flip_pcs, nth=0): + self.solved.add((testcase_fn, flip_pcs[0], flip_pcs[1], nth)) def get_sorted_missed_path(self, num=10): missed_paths = [] for filename in self.corpus_traces: + hit_counts = {} trace = self.corpus_traces[filename] prob = 1 trace_len = len(trace) - for k, node in enumerate(trace): - if k + 1 == trace_len: - break + for k in range(1, trace_len - 1): + node = trace[k] next_node = trace[k + 1] - if self.__is_branch_missed(node.addr): - path_prob = prob * node.right_prob - if self.__is_unsolvable(filename, node.addr_range): + prev_node = trace[k - 1] + + hit_counts[node] = hit_counts[node] + 1 if node in hit_counts else 1 + nth = hit_counts[node] - 1 + if self.__is_branch_missed(node.addr, next_node.addr, nth=nth): + path_prob = prob * node.children_prob[-1] + flip_it = prev_node.addr_range + if not self.__should_i_solve(filename, flip_it, nth=nth): continue missed_paths.append({ - "flip": node.addr_range, + "flip": flip_it, "prob": path_prob, - "fn": filename + "fn": filename, + "nth": nth }) prob *= self.__get_prob(node.addr, next_node.addr) return sorted(missed_paths, key=lambda x: x["prob"])[:min(num, len(missed_paths))] diff --git a/llvm_instr.py b/llvm_instr.py index 221778b..a8a4584 100644 --- a/llvm_instr.py +++ b/llvm_instr.py @@ -26,11 +26,48 @@ # clang -c -g angr_harness.c -o angr_harness.o # clang -g angr_harness.o test.cc -o test.angr - -class STDINExecutorLLVM: +class GDBExecutor: EXTRACT_START = re.compile(b"starts at address 0x(.+?) ") EXTRACT_END = re.compile(b"and ends at 0x(.+?) ") + def __init__(self, uninstrumented_path): + self.gdb_instance = None + self.uninstrumented_path = uninstrumented_path + self.cmp_table = {} + + def restart_gdb(self): + self.gdb_instance = pwn.process(["gdb", self.uninstrumented_path]) + self.gdb_instance.recvuntil("(gdb) ") + + def run_gdb(self): + if self.gdb_instance is None: + self.restart_gdb() + + def execute_gdb_cmd(self, cmd): + assert self.gdb_instance + self.gdb_instance.sendline(cmd) + return self.gdb_instance.recvuntil("(gdb) ").replace(b"\n", b"") + + def get_addr(self, file_loc): + if file_loc in self.cmp_table: + return self.cmp_table[file_loc] + self.run_gdb() + real_file_loc = b':'.join(file_loc.split(b':')[:-1]) # todo: fix + result = self.execute_gdb_cmd(b"info line " + real_file_loc) # todo: fix + if b"starts at address" in result and b"and ends at" in result: + start = self.EXTRACT_START.split(result) + end = self.EXTRACT_END.split(result) + assert len(start) == 3 and len(end) == 3, "GDB gives something weird" + result = [int(b'0x' + start[1], 16), int(b'0x' + end[1], 16)] + else: + print(result) + print(f"[GDB] gdb thinks we give a bad file_loc {real_file_loc}") + return None + self.cmp_table[file_loc] = result + return result + + +class STDINExecutorLLVM: def __init__(self, build_dir, uninstrumented_path, instrumented_path): self.instance = None self.build_dir = build_dir @@ -59,19 +96,6 @@ def read_and_determine_done_reading(self): return 0, False return result[8:-1], True - def restart_gdb(self): - self.gdb_instance = pwn.process(["gdb", self.uninstrumented_path]) - self.gdb_instance.recvuntil("(gdb) ") - - def run_gdb(self): - if self.gdb_instance is None: - self.restart_gdb() - - def execute_gdb_cmd(self, cmd): - assert self.gdb_instance - self.gdb_instance.sendline(cmd) - return self.gdb_instance.recvuntil("(gdb) ").replace(b"\n", b"") - class LLVMInstr(instr_interface.Instrumentation): def __init__(self, executor, trace_directory="/tmp/digfuzz"): @@ -80,25 +104,7 @@ def __init__(self, executor, trace_directory="/tmp/digfuzz"): self.corpus_traces = {} self.cmp_table = {} self.visited_trace = set() - - def __get_angr_addr(self, file_loc): - if file_loc in self.cmp_table: - return self.cmp_table[file_loc] - self.executor.run_gdb() - real_file_loc = b':'.join(file_loc.split(b':')[:-1]) # todo: fix - result = self.executor.execute_gdb_cmd(b"info line " + - real_file_loc.replace(self.executor.build_dir.encode('ascii'), b"")) - if b"starts at address" in result and b"and ends at" in result: - start = self.executor.EXTRACT_START.split(result) - end = self.executor.EXTRACT_END.split(result) - assert len(start) == 3 and len(end) == 3, "GDB gives something weird" - result = [int(b'0x' + start[1], 16), int(b'0x' + end[1], 16)] - else: - print(result) - print(f"[GDB] gdb thinks we give a bad file_loc {real_file_loc}") - return None - self.cmp_table[file_loc] = result - return result + self.gdb = GDBExecutor(executor.uninstrumented_path) def __add_to_execution_tree(self, trace, file_name): last_node = None @@ -113,7 +119,7 @@ def __add_to_execution_tree(self, trace, file_name): self.execution_tree[addr].addr = addr self.execution_tree[addr].is_comp = is_cmp if file_loc: - self.execution_tree[addr].angr_addr_range = self.__get_angr_addr(file_loc) + self.execution_tree[addr].angr_addr_range = self.gdb.get_addr(file_loc) current_node = self.execution_tree[addr] if last_node is not None and (last_node.left != current_node and last_node.right != current_node): if last_node.left is None: diff --git a/qemu_instr.py b/qemu_instr.py index 06d1d82..1203696 100644 --- a/qemu_instr.py +++ b/qemu_instr.py @@ -4,7 +4,7 @@ import pwn import config import os - +import angr import utils @@ -63,38 +63,53 @@ def __grab_non_comp_bb(self): if len(line_arr) == 1: record_next = True driver_part = False - if len(line_arr) == 1 and ("
" in line_arr[-1] or "<__libc_csu" in line_arr[-1]): + if len(line_arr) == 1 and ("
" in line_arr[-1] + or "<__libc_csu" in line_arr[-1] + or "@plt>" in line_arr[-1]): + pc = int("0x" + line_arr[0].split(" ")[0].replace(" ", ""), 16) + self.__non_comp_bb.add(pc) driver_part = True def __add_to_execution_tree(self, trace, file_name): - last_node = None last_addr = 0 if file_name not in self.corpus_traces: self.corpus_traces[file_name] = [] - + hit_counts = {} + trace = list(trace) for addr in trace: + edge = (last_addr, addr) + hit_counts[edge] = hit_counts[edge] + 1 if edge in hit_counts else 1 + + # init node if addr not in self.execution_tree: self.execution_tree[addr] = instr_interface.Node() self.execution_tree[addr].addr = addr if addr not in self.__non_comp_bb: self.execution_tree[addr].is_comp = True - self.execution_tree[addr].addr_range = (0, 1e10) - # refine addr range - addr_range = self.execution_tree[addr].addr_range - if addr_range[1] - addr_range[0] > addr - last_addr: - self.execution_tree[addr].addr_range = (last_addr, addr) - last_addr = addr + self.execution_tree[addr].addr_range = (addr, addr + self.basic_block[addr]) + + # update children current_node = self.execution_tree[addr] - if last_node is not None and (last_node.left != current_node and last_node.right != current_node): - if last_node.left is None: - last_node.left = current_node - elif last_node.right is None: - last_node.right = current_node - else: - print("[Exec Tree] More than 2 children for a node :(") + if last_addr != 0 and addr not in self.execution_tree[last_addr].children: + self.execution_tree[last_addr].children.add(addr) current_node.led_by = file_name self.corpus_traces[file_name].append(current_node) - last_node = current_node + last_addr = addr + + # setup edge hitcount + last_addr = None + for addr in trace: + if not last_addr: + last_addr = addr + continue + edge = (last_addr, addr) + hit_count = hit_counts[edge] + last_node = self.execution_tree[last_addr] + if addr in last_node.max_encounter_child: + last_node.max_encounter_child[addr].add(hit_count) + else: + last_node.max_encounter_child[addr] = {hit_count} + last_addr = addr def __build_execution_tree(self, new_testcase_filenames): for filename in new_testcase_filenames: @@ -135,12 +150,12 @@ def build_execution_tree(self, new_testcase_filenames): if __name__ == "__main__": - - code_loc = "test.c" - os.system(f"gcc -c {code_loc} -no-pie -o {code_loc}.o") - - utils.setup() - utils.compile_harness(f"{code_loc}.o") + # + # code_loc = "test.c" + # os.system(f"gcc -c {code_loc} -no-pie -o {code_loc}.o") + # + # utils.setup() + # utils.compile_harness(f"{code_loc}.o") uninstrumented_executable = "harness" _executor = STDINExecutorQEMU(config.QEMU_BIN, uninstrumented_executable) diff --git a/qsym_ce.py b/qsym_ce.py index 50805af..2b85785 100644 --- a/qsym_ce.py +++ b/qsym_ce.py @@ -1,3 +1,4 @@ +import re import time import config @@ -13,7 +14,7 @@ def remove_assert(string): if met_assert and string[cter] == 40: string = string[cter:] break - if string[cter] == 40 and string[cter+1:cter+7] == b'assert': # b'(' + if string[cter] == 40 and string[cter + 1:cter + 7] == b'assert': # b'(' met_assert = True continue cter = len(string) @@ -26,24 +27,41 @@ def remove_assert(string): def negate_smt2(string): - print("="*30) - print(string) - print("="*30) string = remove_assert(string) return f'(assert (not {string.decode("utf-8")}))'.encode('utf-8') -def solve_smt(smt): +def get_bv_value(smt): + match_res = re.compile(r"(k![0-9]+) \(\)").findall(smt) + if len(match_res) < 1: + assert False, "Can't find declare-fun" + return sorted(list(set([int(x.replace("k!", "")) for x in match_res]))) + + +def solve_smt(smt, orig): + if type(orig) == bytes: + orig = [x for x in orig] + else: + orig = [ord(x) for x in orig] s = z3.Solver() + s.set("timeout", config.QSYM_TIMEOUT) s.from_string(smt) + bvs = get_bv_value(smt) try: s.check() m = s.model() result = [] + known = set() for d in m.decls(): - result.append([d.name(), m[d]]) - result = sorted(result, key=lambda x: x[0]) - return bytes([int(x[1].__str__()) for x in result]) + known.add(d.name()) + result.append((int(d.name().replace('k!', "")), m[d])) + for idx, sol in result: + idx = bvs.index(idx) + if idx >= len(orig): + print(idx, len(orig)) + continue + orig[idx] = int(sol.__str__()) + return bytes(orig) except Exception as e: print(f"[Solver] UNSAT {e}") @@ -62,26 +80,38 @@ def __init__(self, uninstrumented_executable): self.cmp_constraint = {} self.execution_tree = None self.qsym_instance = None - self.__run_qsym_remote() + self.__run_qsym() def update_exec_tree(self, tree): self.execution_tree = tree - def __run_qsym_remote(self): - config.QSYM_SSH_CONN.process(["mkdir", "in"]) - config.QSYM_SSH_CONN.process(["mkdir", "out"]) - self.qsym_instance = config.QSYM_SSH_CONN.process([config.PIN_SH, '-ifeellucky', '-t', - config.QSYM_OBJECT_PATH, '-i', 'in', '-o', 'out', '--', - self.uninstrumented_executable]) + def __run_qsym(self): + if config.USE_SSH: + config.QSYM_SSH_CONN.process(["mkdir", "in"]) + config.QSYM_SSH_CONN.process(["mkdir", "out"]) + self.qsym_instance = config.QSYM_SSH_CONN.process([config.PIN_SH, '-ifeellucky', '-t', + config.QSYM_OBJECT_PATH, '-i', 'in', '-o', 'out', '--', + self.uninstrumented_executable]) + else: + self.qsym_instance = config.QSYM_SSH_CONN.process(config.QSYM_CMD + [config.PIN_SH, '-ifeellucky', '-t', + config.QSYM_OBJECT_PATH, '-i', + '/tmp/in', '-o', + '/tmp/out', '--', + uninstrumented_executable]) self.qsym_instance.recvuntil(b"[INFO] IMG: /lib/x86_64-linux-gnu/libc.so.6") print("[QSYM] Ready") def __get_result(self, corpus_content): - self.qsym_instance.sendline(corpus_content) - start_time = time.time() - result = self.qsym_instance.recvuntil("EXECDONE", timeout=config.QSYM_TIMEOUT) - end_time = time.time() - print(f"[QSYM] Spent {end_time - start_time}s dumping constraints") + try: + self.qsym_instance.sendline(corpus_content) + start_time = time.time() + result = self.qsym_instance.recvuntil("EXECDONE", timeout=config.QSYM_TIMEOUT) + end_time = time.time() + print(f"[QSYM] Spent {end_time - start_time}s dumping constraints") + except EOFError as e: + print(f"[QSYM] Crashed, ignoring content {corpus_content}") + self.__run_qsym() + return b'' return result @staticmethod @@ -117,40 +147,47 @@ def __parse_output(self, lines: bytes): # get a list of [cmp constraints] that has pc in pc_wanted_range @staticmethod - def __find_last_cmp_pc(cmp_constraints: dict, pc_wanted_range): + def __find_last_cmp_pc(cmp_constraints: dict, pc_wanted_range, nth=0): result = [] + current_in_range_max = -1 for pc in cmp_constraints: if pc_wanted_range[0] < pc < pc_wanted_range[1]: - result.append(pc) + if pc < current_in_range_max: + nth -= 1 + current_in_range_max = max(pc, current_in_range_max) + if nth == 0: + result.append(pc) return result # find a path node to stop => find a cmp cons => flip cmp cons & concat - def __get_constraint(self, flip_pc_range, bvs, cmp_constraints): - cmp_cons_pcs = self.__find_last_cmp_pc(cmp_constraints, flip_pc_range) + def __get_constraint(self, flip_pc_range, bvs, cmp_constraints, nth=0): + cmp_cons_pcs = self.__find_last_cmp_pc(cmp_constraints, flip_pc_range, nth=nth) + if len(cmp_cons_pcs) == 0: + print("[QSYM] Trying to flip constant branch") for pc in cmp_cons_pcs: path = b"\n".join([cmp_constraints[_pc] for _pc in cmp_constraints if _pc < pc]) yield to_smt2(bvs, path + b'\n' + negate_smt2(cmp_constraints[pc])) # conduct concolic execution and flip constraints in flip_pc_range while preserving others - def flip_it(self, testcase_content, flip_pc_range, qemu_instr_obj=None, testcase_fn=None): + def flip_it(self, testcase_content, flip_pc_range, nth=0, qemu_instr_obj=None, testcase_fn=None): + if qemu_instr_obj and testcase_fn: + qemu_instr_obj.add_solved_path(testcase_fn, flip_pc_range, nth=nth) result = self.__get_result(testcase_content) - print(result) bvs, cmp_constraint = self.__parse_output(result) has_solution = False - for to_be_solved in self.__get_constraint(flip_pc_range, bvs, cmp_constraint): + for to_be_solved in self.__get_constraint(flip_pc_range, bvs, cmp_constraint, nth=nth): if len(to_be_solved) == 0: print("[Solver] Conc exec gives nothing") continue - print(to_be_solved) - solution = solve_smt(to_be_solved) + solution = solve_smt(to_be_solved, testcase_content) if not solution: continue - print(f"[QSYM] SAT: {to_be_solved}") + print(f"[QSYM] SAT") has_solution = True yield solution if not has_solution and qemu_instr_obj and testcase_fn: - qemu_instr_obj.add_unsolvable_path(testcase_fn, flip_pc_range) + qemu_instr_obj.add_unsolvable_path(testcase_fn, flip_pc_range, nth=nth) if __name__ == "__main__": diff --git a/run_afl.py b/run_afl.py index 2e29986..0f0b240 100644 --- a/run_afl.py +++ b/run_afl.py @@ -5,11 +5,14 @@ import config import utils -if len(sys.argv) > 1: +if len(sys.argv) > 1 and sys.argv[1].endswith(".o"): utils.compile_harness(sys.argv[1]) +elif len(sys.argv) > 1: + os.system(f"cp {sys.argv[1]} ./harness") else: code_loc = "test.c" - os.system(f"gcc -c {code_loc} -no-pie -o {code_loc}.o") + utils.setup() + os.system(f"gcc -g -c {code_loc} -no-pie -o {code_loc}.o") utils.compile_harness(f"{code_loc}.o") processes = [] diff --git a/targets/json-c/build.sh b/targets/json-c/build.sh new file mode 100755 index 0000000..e5fa71e --- /dev/null +++ b/targets/json-c/build.sh @@ -0,0 +1,10 @@ +git clone --depth 1 https://github.com/json-c/json-c.git target +cd target +mkdir json-c-build +cd json-c-build +cmake -DBUILD_SHARED_LIBS=OFF .. +CFLAGS=-no-pie CXXFLAGS=-no-pie make -j$(nproc) +cd .. +g++ -std=c++11 -no-pie -I. -I./json-c-build fuzz/tokener_parse_ex_fuzzer.cc ./json-c-build/libjson-c.a ../../../driver.o -o harness -lbsd +cp harness .. +cd .. \ No newline at end of file diff --git a/utils.py b/utils.py index e14a0dc..61462ee 100644 --- a/utils.py +++ b/utils.py @@ -22,8 +22,8 @@ def qsym_host_provide_permission(remote_path): def setup(): - os.system("gcc -c qemu_qsym_harness.c -no-pie -o driver.o") + os.system("gcc -c -g qemu_qsym_harness.c -no-pie -o driver.o") def compile_harness(obj_loc): - os.system(f"gcc {obj_loc} driver.o -no-pie -o {config.LOCAL_UNINSTRUMENTED_EXEC_PATH}") + os.system(f"gcc {obj_loc} driver.o -no-pie -g -o {config.LOCAL_UNINSTRUMENTED_EXEC_PATH}")