From 831f00beee433296389a2c5795717280242a5169 Mon Sep 17 00:00:00 2001 From: Jacarte Date: Wed, 9 Sep 2020 14:25:40 +0200 Subject: [PATCH 1/6] Inferring the number of levels and estimating the exploration stage time --- crow/crow/crow.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/crow/crow/crow.py b/crow/crow/crow.py index 1ed527e2..bcdcb636 100644 --- a/crow/crow/crow.py +++ b/crow/crow/crow.py @@ -418,21 +418,22 @@ def removeDuplicate(program_name, folder, filt="*.wasm", remove=False): def process(f, OUT_FOLDER, onlybc, program_name, redisports, isBc=False): - LEVELS = 20 # upper bound level count + LEVELS = len(config["DEFAULT"]["order"].split(",")) exploration_workers = config["DEFAULT"].getint("workers") exploration_timeout = config["DEFAULT"].getint("exploration-timeout") total_timeout = config["DEFAULT"].getint("timeout") - print(LEVELS, exploration_workers, exploration_timeout, total_timeout) if total_timeout > 0 : # There is a timeout - # Validate the composition of times - if exploration_workers > LEVELS: LOGGER.warning(program_name, f"The number of generation workers is the maximum number of levels {LEVELS}. You set the maximum to {exploration_workers}, skipped.") exploration_workers = LEVELS - if 1.0*exploration_timeout * LEVELS / exploration_workers >= total_timeout: + if 1.0*exploration_timeout * LEVELS / exploration_workers >= total_timeout: + if total_timeout > 0: LOGGER.error(program_name, f"The total timeout set {total_timeout}s must be larger than the whole (even parallel) generation stage, which is {1.0*exploration_timeout * LEVELS / exploration_workers}s, according to the number of threads and exploration timeout ") exit(1) + else: + LOGGER.info(program_name, f"The total exploration stage would take {1.0*exploration_timeout * LEVELS / exploration_workers}s, according to the number of threads and exploration timeout ") + global launch From 18f73d27b98f0af0257f2bad72e5321edd6954df Mon Sep 17 00:00:00 2001 From: Jacarte Date: Wed, 9 Sep 2020 15:14:29 +0200 Subject: [PATCH 2/6] Improving subsets iterator. Adding better logging --- crow/crow/crow.py | 44 +++++++++++++++++++++++++++++++----------- crow/crow/iterators.py | 10 +++++----- crow/crow/logger.py | 24 ++++++++++++++++++----- crow/crow/stages.py | 2 +- 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/crow/crow/crow.py b/crow/crow/crow.py index bcdcb636..11ddf5ea 100644 --- a/crow/crow/crow.py +++ b/crow/crow/crow.py @@ -21,6 +21,7 @@ import re import uuid from socket_server import listen +import numpy as np levelPool = None @@ -141,15 +142,26 @@ def processBitcode(self, bc, outResult, program_name, redisports, OUT_FOLDER, on futures = [] variants = [] - for subset in getIteratorByName("keysSubset")(merging): + showGenerationProgress = config["DEFAULT"].getboolean("show-generation-progress") + + temptativeNumber = np.prod([len(v) + 1 for v in merging.values()]) + + LOGGER.info(program_name,f"Temptative number of variants {temptativeNumber} (plus original). Expected ratio {len(redisports)} of programs in each iteration.") + + if showGenerationProgress: + LOGGER.disable() + printProgressBar(generationcount, temptativeNumber,suffix=f' {generationcount}/{temptativeNumber}') + for subset in getIteratorByName("keysSubset")(merging): + job = generationPool.submit( self.generateVariant, [subset], program_name, merging, redisports[generationcount - %len(redisports)], bc, OUT_FOLDER, onlybc, meta, outResult) + %len(redisports)], bc, OUT_FOLDER, onlybc, meta, outResult, generationcount, temptativeNumber) # job.result() futures.append(job) generationcount += 1 + if generationcount % len(redisports) == 0: ## WAIT for it @@ -163,16 +175,26 @@ def processBitcode(self, bc, outResult, program_name, redisports, OUT_FOLDER, on for f in done: variants += f.result() + + + if showGenerationProgress: + printProgressBar(len(variants), temptativeNumber,suffix=f' {generationcount}/{temptativeNumber}') + + + LOGGER.info(program_name,f"Executing final parallel generation job...") done, fail = wait(futures, return_when=ALL_COMPLETED) futures = [] LOGGER.info(program_name,f"Disposing job...{len(done)} {len(fail)}") + generationcount += len(done) + len(fail) for f in done: variants += f.result() # Save metadata - + if showGenerationProgress: + printProgressBar(len(variants), temptativeNumber,suffix=f' {generationcount}/{temptativeNumber}') + LOGGER.enable() LOGGER.info(program_name, f"Saving metadata...") variantsFile = open(f"{OUT_FOLDER}/{program_name}.variants.json", 'w') @@ -189,7 +211,7 @@ def processBitcode(self, bc, outResult, program_name, redisports, OUT_FOLDER, on return dict(programs=meta, count=len(meta.keys())) - def generateVariant(self,job,program_name, merging, port,bc, OUT_FOLDER, onlybc, meta, outResult): + def generateVariant(self,job,program_name, merging, port,bc, OUT_FOLDER, onlybc, meta, outResult, current, total): if len(job) == 0 : LOGGER.info(program_name,f"Empty job...") @@ -198,7 +220,7 @@ def generateVariant(self,job,program_name, merging, port,bc, OUT_FOLDER, onlybc, variants = [] LOGGER.info(program_name,f"Generating {len(job)} variants...") r = redis.Redis(host="localhost", port=port) - for j in job: + for jindex,j in enumerate(job): LOGGER.info(program_name, f"Cleaning previous cache for variant generation...{port}") try: LOGGER.success(program_name, f"Flushing redis DB...") @@ -335,7 +357,7 @@ def processLevel(self, levels, program_name, port, bc, OUT_FOLDER, onlybc, meta, raise e waitFor = 5 - LOGGER.warning(program_name, f"Sleeping for {waitFor} seconds waiting for freeing ports...") + LOGGER.warning(program_name, f"Sleeping for {waitFor} seconds waiting for free ports...") time.sleep(waitFor) while not q.empty(): i = q.get() @@ -432,7 +454,11 @@ def process(f, OUT_FOLDER, onlybc, program_name, redisports, isBc=False): LOGGER.error(program_name, f"The total timeout set {total_timeout}s must be larger than the whole (even parallel) generation stage, which is {1.0*exploration_timeout * LEVELS / exploration_workers}s, according to the number of threads and exploration timeout ") exit(1) else: - LOGGER.info(program_name, f"The total exploration stage would take {1.0*exploration_timeout * LEVELS / exploration_workers}s, according to the number of threads and exploration timeout ") + if exploration_timeout > -1: + LOGGER.info(program_name, f"The total exploration stage would take {1.0*exploration_timeout * LEVELS / exploration_workers}s, according to the number of threads and exploration timeout ") + else: + LOGGER.info(program_name, f"Sit and wait, there is no timeout") + global launch @@ -471,8 +497,6 @@ def launch(file, result): timeout = config["DEFAULT"].getint("timeout") - print("Timeout ... %s s" % timeout) - if total_timeout > 0: th.join(timeout=timeout) else: @@ -518,7 +542,6 @@ def main(f, redisports): result = dict(namespace=program_name, programs=[]) attach = [] - print(os.listdir(f)) for final in ["%s/%s" % (f, i) for i in os.listdir(f)]: program_name, OUT_FOLDER, onlybc = getFileMeta(final) @@ -540,7 +563,6 @@ def main(f, redisports): if not os.path.exists(OUT_FOLDER): os.mkdir(OUT_FOLDER) - print(json.dumps(result, indent=4)) if __name__ == "__main__": diff --git a/crow/crow/iterators.py b/crow/crow/iterators.py index 49298a18..5c098136 100644 --- a/crow/crow/iterators.py +++ b/crow/crow/iterators.py @@ -8,11 +8,11 @@ def keysSubset(S): if len(S) == 0: return {} - for k in S.keys(): - for v in S[k]: - d = dict(zip(S.keys(), [None]*len(S.keys()))) - d[k] = v - yield d + #for k in S.keys(): + # for v in S[k]: + # d = dict(zip(S.keys(), [None]*len(S.keys()))) + # d[k] = v + # yield d #print("Combinations") diff --git a/crow/crow/logger.py b/crow/crow/logger.py index 00906255..63923aa3 100644 --- a/crow/crow/logger.py +++ b/crow/crow/logger.py @@ -30,6 +30,14 @@ def __init__(self, debugToFile=False): self.debugToFile = debugToFile self.indent = 0 + self.disabled = False + + def disable(self): + self.disabled = True + + def enable(self): + self.disabled = False + def enter(self): self.indent += 1 @@ -58,31 +66,37 @@ def debug(self, file, message, std=None): if std: f.write(std.__str__()) else: - print("%s%s%s%s" % (self.getIndent(), bcolors.UNDERLINE, message, bcolors.ENDC)) + if not self.disabled: + print("%s%s%s%s" % (self.getIndent(), bcolors.UNDERLINE, message, bcolors.ENDC)) sys.stdout.flush() def error(self,file, message): f = open(getlogfilename(file), 'a+') f.write(message.__str__() + "\n") - print("%s%s%s%s" % (self.getIndent(), bcolors.FAIL, message, bcolors.ENDC)) + if not self.disabled: + print("%s%s%s%s" % (self.getIndent(), bcolors.FAIL, message, bcolors.ENDC)) sys.stdout.flush() def warning(self,file, message): f = open(getlogfilename(file), 'a+') f.write(message.__str__() + "\n") - print("%s%s%s%s" % (self.getIndent(), bcolors.WARNING, message, bcolors.ENDC)) + + if not self.disabled: + print("%s%s%s%s" % (self.getIndent(), bcolors.WARNING, message, bcolors.ENDC)) sys.stdout.flush() def info(self,file, message): f = open(getlogfilename(file), 'a+') f.write(message.__str__() + "\n") - print("%s%s%s%s" % (self.getIndent(), bcolors.OKBLUE, message, bcolors.ENDC)) + if not self.disabled: + print("%s%s%s%s" % (self.getIndent(), bcolors.OKBLUE, message, bcolors.ENDC)) sys.stdout.flush() def success(self,file, message): f = open(getlogfilename(file), 'a+') f.write(message.__str__() + "\n") - print("%s%s%s%s" % (self.getIndent(), bcolors.OKGREEN, message, bcolors.ENDC)) + if not self.disabled: + print("%s%s%s%s" % (self.getIndent(), bcolors.OKGREEN, message, bcolors.ENDC)) sys.stdout.flush() diff --git a/crow/crow/stages.py b/crow/crow/stages.py index ba8f6bba..37b51cd1 100644 --- a/crow/crow/stages.py +++ b/crow/crow/stages.py @@ -230,7 +230,7 @@ def __call__(self, args=[], std=None): # f -> inputs def processInner(self, std, err): # return the std output optimized LLVM IR - print(std, err) + #print(std, err) return std From 882d9cd3397c72f9007f3bed7f2325691b73a025 Mon Sep 17 00:00:00 2001 From: Jacarte Date: Wed, 9 Sep 2020 15:20:27 +0200 Subject: [PATCH 3/6] Adding eta --- crow/crow/crow.py | 11 ++++++++--- crow/crow/socket_server.py | 5 +++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/crow/crow/crow.py b/crow/crow/crow.py index 11ddf5ea..2a76f7ba 100644 --- a/crow/crow/crow.py +++ b/crow/crow/crow.py @@ -162,12 +162,14 @@ def processBitcode(self, bc, outResult, program_name, redisports, OUT_FOLDER, on futures.append(job) generationcount += 1 - if generationcount % len(redisports) == 0: ## WAIT for it + generationStartTime = time.time_ns() LOGGER.info(program_name,f"Executing parallel generation job...") done, fail = wait(futures, return_when=ALL_COMPLETED) + generationEndTime = time.time_ns() - generationStartTime + futures = [] LOGGER.info(program_name,f"Disposing job...{len(done)} {len(fail)}") @@ -178,7 +180,10 @@ def processBitcode(self, bc, outResult, program_name, redisports, OUT_FOLDER, on if showGenerationProgress: - printProgressBar(len(variants), temptativeNumber,suffix=f' {generationcount}/{temptativeNumber}') + speed = len(redisports)/generationEndTime + eta = temptativeNumber/speed/1e9 + + printProgressBar(len(variants), temptativeNumber,suffix=f' {generationcount}/{temptativeNumber} eta:{eta}s') @@ -193,7 +198,7 @@ def processBitcode(self, bc, outResult, program_name, redisports, OUT_FOLDER, on # Save metadata if showGenerationProgress: - printProgressBar(len(variants), temptativeNumber,suffix=f' {generationcount}/{temptativeNumber}') + printProgressBar(len(variants), temptativeNumber,suffix=f' {generationcount}/{temptativeNumber} ') LOGGER.enable() LOGGER.info(program_name, f"Saving metadata...") diff --git a/crow/crow/socket_server.py b/crow/crow/socket_server.py index 7767bf49..e5ea686d 100644 --- a/crow/crow/socket_server.py +++ b/crow/crow/socket_server.py @@ -12,12 +12,13 @@ def listen(port, q, program): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind((HOST, PORT)) - print(f"Listening...{port}") + + LOGGER.info(program,f"Listening...{port}") s.listen() conn, addr = s.accept() with conn: - print('Connected by', addr) + LOGGER.success(program,f'Connected by {addr}') while True: data = conn.recv(1024<<6) data = data.replace('\\'.encode(), '\\\\'.encode()).replace('\n'.encode(), '\\n'.encode()) From f52b58d3bcb71c15fc85d639f291a0efbe47f5ac Mon Sep 17 00:00:00 2001 From: Jacarte Date: Wed, 9 Sep 2020 15:24:14 +0200 Subject: [PATCH 4/6] No timeout message --- crow/crow/crow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crow/crow/crow.py b/crow/crow/crow.py index 2a76f7ba..8bed582b 100644 --- a/crow/crow/crow.py +++ b/crow/crow/crow.py @@ -454,7 +454,7 @@ def process(f, OUT_FOLDER, onlybc, program_name, redisports, isBc=False): if exploration_workers > LEVELS: LOGGER.warning(program_name, f"The number of generation workers is the maximum number of levels {LEVELS}. You set the maximum to {exploration_workers}, skipped.") exploration_workers = LEVELS - if 1.0*exploration_timeout * LEVELS / exploration_workers >= total_timeout: + if 1.0*exploration_timeout * LEVELS / exploration_workers >= total_timeout or exploration_timeout <= -1: if total_timeout > 0: LOGGER.error(program_name, f"The total timeout set {total_timeout}s must be larger than the whole (even parallel) generation stage, which is {1.0*exploration_timeout * LEVELS / exploration_workers}s, according to the number of threads and exploration timeout ") exit(1) @@ -462,7 +462,7 @@ def process(f, OUT_FOLDER, onlybc, program_name, redisports, isBc=False): if exploration_timeout > -1: LOGGER.info(program_name, f"The total exploration stage would take {1.0*exploration_timeout * LEVELS / exploration_workers}s, according to the number of threads and exploration timeout ") else: - LOGGER.info(program_name, f"Sit and wait, there is no timeout") + LOGGER.warning(program_name, f"Sit and wait, there is no timeout") From acd325d686caa6bce14de4bdd8975d9bf25f1a82 Mon Sep 17 00:00:00 2001 From: Jacarte Date: Wed, 9 Sep 2020 15:26:18 +0200 Subject: [PATCH 5/6] Typos --- crow/crow/socket_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crow/crow/socket_server.py b/crow/crow/socket_server.py index e5ea686d..cf4a85fe 100644 --- a/crow/crow/socket_server.py +++ b/crow/crow/socket_server.py @@ -37,7 +37,7 @@ def listen(port, q, program): q.put([k, v]) LOGGER.success(program,f"Populating results...{len(result.keys())} keys") s = np.prod([len(t) for t in result.values()]) - LOGGER.success(program,f"Populating results...{s} tentative replacements") + LOGGER.success(program,f"Populating results...{s} temptative replacements") except Exception as e: print(st) if not data: From b70084f5ee5e6dc26cae984fd860de106b5aa156 Mon Sep 17 00:00:00 2001 From: Jacarte Date: Wed, 9 Sep 2020 15:34:52 +0200 Subject: [PATCH 6/6] Avoid souper infer in the generation stage --- crow/crow/settings/config.ini | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/crow/crow/settings/config.ini b/crow/crow/settings/config.ini index 0ae15c9f..0c7407f0 100644 --- a/crow/crow/settings/config.ini +++ b/crow/crow/settings/config.ini @@ -1,7 +1,7 @@ [DEFAULT] -slumpspath = /slumps +slumpspath = /Users/javierca/Documents/Develop/slumps debugfile = ${slumpspath}/crow/slumps.debug.txt -outfolder = /slumps/crow/out +outfolder = /Users/javierca/Documents/Develop/slumps/crow_out print-sha = True prune-equal = False exit-on-find = False @@ -11,12 +11,13 @@ candidates-threshold = 1 thread-pool-size = 1 fail-silently = True upper-bound = 8000 -timeout = 3600 -exploration-timeout = 2000 +timeout = 0 +exploration-timeout = 10 generate-bc-only = False -workers = 8 +workers = 4 include = -I/inputs -order = 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 +order = 14,15 +show-generation-progress = True [clang] path = ${DEFAULT:slumpspath}/souper/third_party/llvm-Release-install/bin/clang @@ -49,9 +50,9 @@ path = ${DEFAULT:slumpspath}/souper/build souper = ${path}/souper check = ${path}/souper-check solver = -z3-path=${slumpspath}/souper/third_party/z3/build/z3 -passname = libsouperPass.so +passname = libsouperPass.dylib souper-debug-level = 1 -workers = 30 +workers = 4 socket_port = 100 souper-common = --solver-timeout=0 souper-level-1 = --souper-infer-inst @@ -75,5 +76,5 @@ souper-level-18 = --souper-synthesis-const-with-cegis --souper-synthesis-ignore- souper-level-19 = --souper-infer-inst=false --souper-max-lhs-cands=50 --souper-enumerative-synthesis-ignore-cost --souper-enumerative-synthesis-max-instructions=50 load-opt = -load ${DEFAULT:slumpspath}/souper/build/${passName} -souper list-candidates = ${load-opt} ${souper-common} --souper-valid-count --souper-debug-level=${souper-debug-level} %s %s -super-opt-pass = ${load-opt} --solver-timeout=1 %s --souper-debug-level=${souper-debug-level} %s +super-opt-pass = ${load-opt} --souper-no-infer %s --souper-debug-level=${souper-debug-level} %s