fix a lot of bugs

shouc · Dec 27, 2021 · 5228a51 · 5228a51
1 parent c038ade
commit 5228a51
Show file tree

Hide file tree

Showing 10 changed files with 278 additions and 181 deletions.
diff --git a/README.md b/README.md
@@ -27,7 +27,7 @@ python3 run_afl.py
 ```
 In another terminal (QSYM stuffs)
 ```bash
-python3 main.py
+python3 digfuzz.py
 ```
 
 

diff --git a/config.py b/config.py
@@ -1,35 +1,48 @@
+import os
+
 import pwn
 
 # Target to fuzz
 OBJ_PATH = "test.c.o"
 
-# QSYM Remote setup
-QSYM_HOST = '54.245.74.219'
-QSYM_UN = 'ubuntu'
-QSYM_KEYFILE = "./seem-priv-key.PEM"
-QSYM_SSH_CONN = pwn.ssh(host=QSYM_HOST, user=QSYM_UN, keyfile=QSYM_KEYFILE)
-PIN_SH = "/home/ubuntu/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh"  # the location of qsym script remote
-QSYM_OBJECT_PATH = "/home/ubuntu/qsym/qsym/pintool/obj-intel64/libqsym.so"  # the location of qsym pin obj remote
-
 # AFL slave count
 AFL_NUM_SLAVE = 1
 
-# NO NEED TO CHANGE IF USING run_afl.py
-AFL_FUZZ_PATH = "./AFLplusplus/afl-fuzz"
-AFL_IN_PATH = "./in"
-AFL_OUT_PATH = "./out"
-AFL_SLAVE_NAME = "s"
-AFL_MASTER_NAME = "m"
-AFL_CORPUS_PATH = f"{AFL_OUT_PATH}/{AFL_MASTER_NAME}/queue"  # the directory of the afl master corpus
-
 # NO NEED TO CHANGE
 LOCAL_UNINSTRUMENTED_EXEC_PATH = "./harness"
 REMOTE_UNINSTRUMENTED_EXEC_PATH = "/tmp/harness"
 
 QEMU_BIN = "./qemu_stdout"
 
 DUMPER_PATH = "./dumper"
-SHM_KEY = f"/{OBJ_PATH}.shm"
+SHM_KEY = f"tuba.shm"
 
 QEMU_TIMEOUT = 30
 QSYM_TIMEOUT = 30
+
+# NO NEED TO CHANGE IF USING run_afl.py
+AFL_FUZZ_PATH = "./AFLplusplus/afl-fuzz"
+AFL_IN_PATH = "./in"
+AFL_OUT_PATH = "./out"
+AFL_SLAVE_NAME = "s"
+AFL_MASTER_NAME = "m"
+AFL_CORPUS_PATH = f"{AFL_OUT_PATH}/{AFL_MASTER_NAME}/queue"  # the directory of the afl master corpus
+
+# QSYM Remote setup
+USE_SSH = False
+if not USE_SSH:
+    os.system(f"mkdir /tmp/digfuzz && cp {LOCAL_UNINSTRUMENTED_EXEC_PATH} /tmp/digfuzz/harness")
+    PIN_SH = "/workdir/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh"  # the location of qsym script remote
+    QSYM_OBJECT_PATH = "/workdir/qsym/qsym/pintool/obj-intel64/libqsym.so"  # the location of qsym pin obj remote
+    QSYM_IMAGE_NAME = "qsym"
+    QSYM_CMD = ["docker", "-v", "/tmp/digfuzz:/tmp/digfuzz", QSYM_IMAGE_NAME]
+
+else:
+    QSYM_HOST = '54.245.74.219'
+    QSYM_UN = 'ubuntu'
+    QSYM_KEYFILE = "./seem-priv-key.PEM"
+    QSYM_SSH_CONN = pwn.ssh(host=QSYM_HOST, user=QSYM_UN, keyfile=QSYM_KEYFILE)
+    PIN_SH = "/home/ubuntu/qsym/third_party/pin-2.14-71313-gcc.4.4.7-linux/pin.sh"  # the location of qsym script remote
+    QSYM_OBJECT_PATH = "/home/ubuntu/qsym/qsym/pintool/obj-intel64/libqsym.so"  # the location of qsym pin obj remote
+
+
diff --git a/main.py → digfuzz.py b/main.py → digfuzz.py
@@ -14,12 +14,12 @@ def add_input_to_afl_queue(content):
     if not content:
         return
     global added_counter
-    with open("%s/id:%6d,src:digfuzz" % (config.AFL_CORPUS_PATH, added_counter), "wb+") as fp:
+    with open("%s/id:%6d,sync:digfuzz,src:000000" % (config.AFL_CORPUS_PATH, added_counter), "wb+") as fp:
         fp.write(content)
     added_counter += 1
 
 
-added_counter = int(1e6)
+added_counter = int(1e5)
 utils.copy_file_to_qsym_host(config.LOCAL_UNINSTRUMENTED_EXEC_PATH, config.REMOTE_UNINSTRUMENTED_EXEC_PATH)
 utils.qsym_host_provide_permission(config.REMOTE_UNINSTRUMENTED_EXEC_PATH)
 
@@ -42,14 +42,14 @@ def get_new_testcase_filenames():
 
 while 1:
     qemu.build_execution_tree(get_new_testcase_filenames())
-    qemu.dump_execution_tree()
+    # qemu.dump_execution_tree()
     paths = qemu.get_sorted_missed_path()
     solving_path = random.choice(paths)
     print(f"Solving for path {solving_path['flip']} with prob {solving_path['prob']}")
     testcase_content = open(solving_path["fn"], "rb").read()
     for solution in qsym.flip_it(testcase_content, solving_path["flip"],
+                                 nth=solving_path["nth"],
                                  qemu_instr_obj=qemu,
                                  testcase_fn=solving_path["fn"]):
         add_input_to_afl_queue(solution)
-    time.sleep(5)  # allow fuzzer to sync corpus
     print("Round done")
diff --git a/instr_interface.py b/instr_interface.py
@@ -1,123 +1,136 @@
 import abc
 import json
+import angr
 
 
 # Exec Tree
 class Node:
-    left = None
-    right = None
-    addr = 0
-    left_prob = -1
-    right_prob = -1
-    is_comp = False
-    visit_count = 1
-    addr_range = None
-    led_by = ""
+    def __init__(self):
+        self.children = set()
+        self.children_prob = []
+        self.max_encounter_child = {}
+        self.addr = 0
+        self.is_comp = False
+        self.visit_count = 1
+        self.addr_range = None
+        self.led_by = ""
 
     @staticmethod
     def to_addr(node):
         if node:
             return node.addr
         return 0
 
+    def __hash__(self):
+        return self.addr
+
     def __str__(self):
-        return f"left: {hex(self.to_addr(self.left))}; " \
-               f"right: {hex(self.to_addr(self.right))}; " \
-               f"comp: {self.is_comp}; " \
+        return f"comp: {self.is_comp}; " \
                f"vc: {self.visit_count}; " \
-               f"left_prob: {self.left_prob}; " \
-               f"right_prob: {self.right_prob};" \
+               f"children: {self.children}; " \
+               f"prob: {self.children_prob};" \
                f"led_by: {self.led_by}" \
                f"addr_range: {hex(self.addr_range[0])} - {hex(self.addr_range[1])}"
 
 
+class UnknownNode(Node):
+    pass
+
+
 class Instrumentation(abc.ABC):
     def __init__(self, executor):
         self.executor = executor
         self.execution_tree = {}  # addr -> Node
         self.corpus_traces = {}
+        self.dfs_visited_nodes = set()
         self.unsolvable = set()
+        self.solved = set()
+        self.basic_block = {}  # BB start => size
+        self.__get_basic_block_size()
+
+    def __get_basic_block_size(self):
+        p = angr.Project(self.executor.uninstrumented_path, load_options={'auto_load_libs': False})
+        cfg = p.analyses.CFGFast()
+        for key in cfg.kb.functions:
+            for bb in cfg.kb.functions[key].blocks:
+                self.basic_block[bb.addr] = bb.size
 
     def build_execution_tree(self, new_testcase_filenames: [str]):
         pass
 
     def dump_execution_tree(self):
         print(json.dumps({hex(x): str(self.execution_tree[x]) for x in self.execution_tree}, sort_keys=True, indent=4))
 
-    def __dfs_helper(self, current_node_addr, visited_nodes):
-        if current_node_addr in visited_nodes:
-            return
-        visited_nodes.add(current_node_addr)
-        current_node = self.execution_tree[current_node_addr]
-
-        left_node = self.execution_tree[current_node_addr].left
-        right_node = self.execution_tree[current_node_addr].right
-        should_assign_prob = current_node.is_comp
-        sum_of_children = 1  # prevent div by 0, todo: this causes left + right != 1
-
-        if left_node is not None:
-            self.__dfs_helper(left_node.addr, visited_nodes)
-            sum_of_children += left_node.visit_count
-
-        if right_node is not None:
-            self.__dfs_helper(right_node.addr, visited_nodes)
-            sum_of_children += right_node.visit_count
+    def assign_prob(self):
+        for addr, current_node in self.execution_tree.items():
+            should_assign_prob = current_node.is_comp
+            sum_of_children = 1  # prevent div by 0, todo: this causes left + right != 1
 
-        if left_node is not None:
-            current_node.left_prob = left_node.visit_count / sum_of_children
-        else:
-            current_node.left_prob = 3 / sum_of_children
+            for child_node_addr in current_node.children:
+                child_node = self.execution_tree[child_node_addr]
+                sum_of_children += child_node.visit_count
 
-        if right_node is not None:
-            current_node.right_prob = right_node.visit_count / sum_of_children
-        else:
-            current_node.right_prob = 3 / sum_of_children
+            for child_node_addr in current_node.children:
+                child_node = self.execution_tree[child_node_addr]
+                current_node.children_prob.append(child_node.visit_count / sum_of_children)
 
-        if not should_assign_prob or sum_of_children < 30:
-            current_node.left_prob = 1
-            current_node.right_prob = 1
+            while len(current_node.children_prob) < 2:
+                current_node.children_prob.append(3 / sum_of_children)
 
-    def assign_prob(self):
-        self.__dfs_helper(next(iter(self.execution_tree)), set())
+            if not should_assign_prob or sum_of_children < 30:
+                current_node.children_prob = [1.0 for _ in range(len(current_node.children_prob))]
 
     def __get_prob(self, parent, child):
         parent_node = self.execution_tree[parent]
-        child_node = self.execution_tree[child]
-        if parent_node.left and parent_node.left == child_node:
-            return parent_node.left_prob
-        if parent_node.right and parent_node.right == child_node:
-            return parent_node.right_prob
+        child_node_addr = self.execution_tree[child].addr
+        for k, _child_addr in enumerate(parent_node.children):
+            if _child_addr == child_node_addr:
+                return parent_node.children_prob[k]
         print(f"[Exec] {parent} {child} not in execution tree")
         assert False
 
-    def __is_branch_missed(self, parent):
-        parent_node = self.execution_tree[parent]
-        return parent_node.right is None and parent_node.is_comp
+    def __is_branch_missed(self, parent_addr, child_addr, nth=0):
+        hit_count = nth + 1
+        parent_node = self.execution_tree[parent_addr]
+        return (
+                   len(parent_node.children) < 2
+                   or hit_count not in parent_node.max_encounter_child[child_addr]
+               ) and parent_node.is_comp
+
+    def __should_i_solve(self, testcase_fn, flip_pcs, nth=0):
+        return ((testcase_fn, flip_pcs[0], flip_pcs[1], nth) not in self.unsolvable) and \
+               ((testcase_fn, flip_pcs[0], flip_pcs[1], nth) not in self.solved)
 
-    def __is_unsolvable(self, testcase_fn, flip_pcs):
-        return (testcase_fn, flip_pcs[0], flip_pcs[1]) in self.unsolvable
+    def add_unsolvable_path(self, testcase_fn, flip_pcs, nth=0):
+        self.unsolvable.add((testcase_fn, flip_pcs[0], flip_pcs[1], nth))
 
-    def add_unsolvable_path(self, testcase_fn, flip_pcs):
-        self.unsolvable.add((testcase_fn, flip_pcs[0], flip_pcs[1]))
+    def add_solved_path(self, testcase_fn, flip_pcs, nth=0):
+        self.solved.add((testcase_fn, flip_pcs[0], flip_pcs[1], nth))
 
     def get_sorted_missed_path(self, num=10):
         missed_paths = []
         for filename in self.corpus_traces:
+            hit_counts = {}
             trace = self.corpus_traces[filename]
             prob = 1
             trace_len = len(trace)
-            for k, node in enumerate(trace):
-                if k + 1 == trace_len:
-                    break
+            for k in range(1, trace_len - 1):
+                node = trace[k]
                 next_node = trace[k + 1]
-                if self.__is_branch_missed(node.addr):
-                    path_prob = prob * node.right_prob
-                    if self.__is_unsolvable(filename, node.addr_range):
+                prev_node = trace[k - 1]
+
+                hit_counts[node] = hit_counts[node] + 1 if node in hit_counts else 1
+                nth = hit_counts[node] - 1
+                if self.__is_branch_missed(node.addr, next_node.addr, nth=nth):
+                    path_prob = prob * node.children_prob[-1]
+                    flip_it = prev_node.addr_range
+                    if not self.__should_i_solve(filename, flip_it, nth=nth):
                         continue
                     missed_paths.append({
-                        "flip": node.addr_range,
+                        "flip": flip_it,
                         "prob": path_prob,
-                        "fn": filename
+                        "fn": filename,
+                        "nth": nth
                     })
                 prob *= self.__get_prob(node.addr, next_node.addr)
         return sorted(missed_paths, key=lambda x: x["prob"])[:min(num, len(missed_paths))]
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,7 +27,7 @@ python3 run_afl.py @@
     ```
     In another terminal (QSYM stuffs)
     ```bash
-    python3 main.py
+    python3 digfuzz.py
     ```
@@ Expand Down @@