From 08205da80fe7439d71cff5cb9629bf24ee79f289 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 23 Jul 2015 17:06:21 +0200 Subject: [PATCH 01/10] Remove unused strategies. --- lithium/lithium.py | 66 +--------------------------------------------- 1 file changed, 1 insertion(+), 65 deletions(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index 248a68e..cc31242 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -136,10 +136,7 @@ def main(): return strategyFunction = { - 'minimize': minimize, - 'remove-pair': tryRemovingPair, - 'remove-adjacent-pairs': tryRemovingAdjacentPairs, - 'remove-substring': tryRemovingSubstring + 'minimize': minimize }.get(strategy, None) if not strategyFunction: @@ -404,67 +401,6 @@ def minimize(): print " Test total: " + quantity(testTotal, atom) -def tryRemovingChunks(chunkSize): - - print "Done with a round of chunk size " + str(chunkSize) + "!" - return anyChunksRemoved - - - -# Other reduction algorithms -# (Use these if you're really frustrated with something you know is 1-minimal.) - -def tryRemovingAdjacentPairs(): - # XXX capture the idea that after removing (4,5) it might be sensible to remove (3,6) - # but also that after removing (2,3) and (4,5) it might be sensible to remove (1,6) - # XXX also want to remove three at a time, and two at a time that are one line apart - for i in range(0, numParts - 2): - if enabled[i]: - enabled[i] = False - enabled[i + 1] = False - if interesting(): - print "Removed an adjacent pair based at " + str(i) - else: - enabled[i] = True - enabled[i + 1] = True - # Restore the original testcase - writeTestcase(testcaseFilename) - print "Done with one pass of removing adjacent pairs" - - - -def tryRemovingPair(): - for i in range(0, numParts): - enabled[i] = False - for j in range(i + 1, numParts): - enabled[j] = False - print "Trying removing the pair " + str(i) + ", " + str(j) - if interesting(): - print "Success! Removed a pair! Exiting." - sys.exit(0) # XXX not nice - enabled[j] = True - enabled[i] = True - - # Restore the original testcase - writeTestcase(testcaseFilename) - print "Failure! No pair can be removed." - - -def tryRemovingSubstring(): - for i in range(0, numParts): - for j in range(i, numParts): - enabled[j] = False - print "Trying removing the substring " + str(i) + ".." + str(j) - if interesting(): - print "Success! Removed a substring! Exiting." - sys.exit(0) # XXX not nice - for j in range(i, numParts): - enabled[j] = True - - # Restore the original testcase - writeTestcase(testcaseFilename) - print "Failure! No substring can be removed." - # Helpers From bb49a14adf010f638f80f20c943d6d92a0e7cbe4 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 23 Jul 2015 17:14:27 +0200 Subject: [PATCH 02/10] Document the minimize strategy. --- lithium/lithium.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lithium/lithium.py b/lithium/lithium.py index cc31242..f863c41 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -338,6 +338,17 @@ def interesting(partsSuggestion, writeIt=True): # Main reduction algorithm +# +# This Strategy attempt at removing chuncks which might not be interesting +# code, but which be removed independently of any other. This happens +# frequently with values which are computed, but either after the execution, +# or never used to influenced the interesting part. +# +# a = compute(); +# b = compute(); <-- !!! +# intereting(a); +# c = compute(); <-- !!! +# def minimize(): global parts, testCount, testTotal global minimizeMax, minimizeMin, minimizeChunkStart, minimizeRepeatFirstRound From 6d77bb8c7bf75893b0cec5c3c2465bc44fdbaf44 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 23 Jul 2015 17:26:35 +0200 Subject: [PATCH 03/10] Add minimize-around strategy. --- lithium/lithium.py | 147 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 146 insertions(+), 1 deletion(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index f863c41..92aa97c 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -136,7 +136,8 @@ def main(): return strategyFunction = { - 'minimize': minimize + 'minimize': minimize, + 'minimize-around': minimizeSurroundingPairs }.get(strategy, None) if not strategyFunction: @@ -413,6 +414,150 @@ def minimize(): +# +# This Strategy attempt at removing pairs of chuncks which might be surrounding +# interesting code, but which cannot be removed independently of the other. +# This happens frequently with patterns such as: +# +# a = 42; +# while (true) { +# b = foo(a); <-- !!! +# interesting(); +# a = bar(b); <-- !!! +# } +# +def minimizeSurroundingPairs(): + origNumParts = len(parts) + chunkSize = min(minimizeMax, largestPowerOfTwoSmallerThan(origNumParts)) + finalChunkSize = max(minimizeMin, 1) + + while 1: + anyChunksRemoved = tryRemovingSurroundingChunks(chunkSize); + + last = (chunkSize == finalChunkSize) + + if anyChunksRemoved and (minimizeRepeat == "always" or (minimizeRepeat == "last" and last)): + # Repeat with the same chunk size + pass + elif last: + # Done + break + else: + # Continue with the next smaller chunk size + chunkSize /= 2 + + writeTestcase(testcaseFilename) + + print "=== LITHIUM SUMMARY ===" + + if finalChunkSize == 1 and minimizeRepeat != "never": + print " Removing any single " + atom + " from the final file makes it uninteresting!" + + print " Initial size: " + quantity(origNumParts, atom) + print " Final size: " + quantity(len(parts), atom) + print " Tests performed: " + str(testCount) + print " Test total: " + quantity(testTotal, atom) + +def list_rindex(l, p, e): + if p < 0 or p > len(l): + raise ValueError("%s is not in list" % str(e)) + for index, item in enumerate(reversed(l[:p])): + if item == e: + return p - index - 1 + raise ValueError("%s is not in list" % str(e)) + +def list_nindex(l, p, e): + if p + 1 >= len(l): + raise ValueError("%s is not in list" % str(e)) + return l[(p + 1):].index(e) + (p + 1) + +def tryRemovingSurroundingChunks(chunkSize): + """Make a single run through the testcase, trying to remove chunks of size chunkSize. + + Returns True iff any chunks were removed.""" + + global parts + + chunksSoFar = 0 + summary = "" + + chunksRemoved = 0 + chunksSurviving = 0 + atomsRemoved = 0 + + atomsInitial = len(parts) + numChunks = divideRoundingUp(len(parts), chunkSize) + + # Not enough chunks to remove surrounding blocks. + if numChunks < 3: + return False + + print "Starting a round with chunks of " + quantity(chunkSize, atom) + "." + + summary = ['S' for i in range(numChunks)] + chunkStart = chunkSize + beforeChunkIdx = 0 + keepChunkIdx = 1 + afterChunkIdx = 2 + + try: + while chunkStart + chunkSize < len(parts): + chunkBefStart = max(0, chunkStart - chunkSize) + chunkBefEnd = chunkStart + chunkAftStart = min(len(parts), chunkStart + chunkSize) + chunkAftEnd = min(len(parts), chunkAftStart + chunkSize) + description = "chunk #" + str(beforeChunkIdx) + " & #" + str(afterChunkIdx) + " of " + str(numChunks) + " chunks of size " + str(chunkSize) + + if interesting(parts[:chunkBefStart] + parts[chunkBefEnd:chunkAftStart] + parts[chunkAftEnd:]): + print "Yay, reduced it by removing " + description + " :)" + chunksRemoved += 2 + atomsRemoved += (chunkBefEnd - chunkBefStart) + atomsRemoved += (chunkAftEnd - chunkAftStart) + summary[beforeChunkIdx] = '-' + summary[afterChunkIdx] = '-' + # The start is now sooner since we remove the chunk which was before this one. + chunkStart -= chunkSize + try: + # Try to keep removing surrounding chunks of the same part. + beforeChunkIdx = list_rindex(summary, keepChunkIdx, 'S') + except ValueError: + # There is no more survinving block on the left-hand-side of + # the current chunk, shift everything by one surviving + # block. Any ValueError from here means that there is no + # longer enough chunk. + beforeChunkIdx = keepChunkIdx + keepChunkIdx = list_nindex(summary, keepChunkIdx, 'S') + chunkStart += chunkSize + else: + print "Removing " + description + " made the file 'uninteresting'." + # Shift chunk indexes, and seek the next surviving chunk. ValueError + # from here means that there is no longer enough chunks. + beforeChunkIdx = keepChunkIdx + keepChunkIdx = afterChunkIdx + chunkStart += chunkSize + + afterChunkIdx = list_nindex(summary, keepChunkIdx, 'S') + + except ValueError: + # This is a valid loop exit point. + chunkStart = len(parts) + + atomsSurviving = atomsInitial - atomsRemoved + printableSummary = " ".join(["".join(summary[(2 * i):min(2 * (i + 1), numChunks + 1)]) for i in range(numChunks / 2 + numChunks % 2)]) + print "" + print "Done with a round of chunk size " + str(chunkSize) + "!" + print quantity(summary.count('S'), "chunk") + " survived; " + \ + quantity(summary.count('-'), "chunk") + " removed." + print quantity(atomsSurviving, atom) + " survived; " + \ + quantity(atomsRemoved, atom) + " removed." + print "Which chunks survived: " + printableSummary + print "" + + writeTestcaseTemp("did-round-" + str(chunkSize), True); + + return (chunksRemoved > 0) + + # Helpers def divideRoundingUp(n, d): From f3e65863ba7fbda584e78e6f8bbfc01ffe5195a3 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 23 Jul 2015 17:31:40 +0200 Subject: [PATCH 04/10] Add minimize-balanced strategy, to remove conditional blocks around interesting code. --- lithium/lithium.py | 253 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 251 insertions(+), 2 deletions(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index 92aa97c..018f4ba 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -31,7 +31,7 @@ def usage(): * --char (-c). Don't treat lines as atomic units; treat the file as a sequence of characters rather than a sequence of lines. -* --strategy=[minimize, remove-pair, remove-substring, check-only]. +* --strategy=[minimize, minimize-around, minimize-balanced]. default: minimize. * --testcase=filename. default: last thing on the command line, which can double as passing in. @@ -137,7 +137,8 @@ def main(): strategyFunction = { 'minimize': minimize, - 'minimize-around': minimizeSurroundingPairs + 'minimize-around': minimizeSurroundingPairs, + 'minimize-balanced': minimizeBalancedPairs, }.get(strategy, None) if not strategyFunction: @@ -558,6 +559,254 @@ def tryRemovingSurroundingChunks(chunkSize): return (chunksRemoved > 0) +# +# This Strategy attempt at removing balanced chuncks which might be surrounding +# interesting code, but which cannot be removed independently of the other. +# This happens frequently with patterns such as: +# +# ...; +# if (cond) { <-- !!! +# ...; +# interesting(); +# ...; +# } <-- !!! +# ...; +# +# The value of the condition might not be interesting, but in order to reach the +# interesting code we still have to compute it, and keep extra code alive. +# +def minimizeBalancedPairs(): + origNumParts = len(parts) + chunkSize = min(minimizeMax, largestPowerOfTwoSmallerThan(origNumParts)) + finalChunkSize = max(minimizeMin, 1) + + while 1: + anyChunksRemoved = tryRemovingBalancedPairs(chunkSize); + + last = (chunkSize == finalChunkSize) + + if anyChunksRemoved and (minimizeRepeat == "always" or (minimizeRepeat == "last" and last)): + # Repeat with the same chunk size + pass + elif last: + # Done + break + else: + # Continue with the next smaller chunk size + chunkSize /= 2 + + writeTestcase(testcaseFilename) + + print "=== LITHIUM SUMMARY ===" + if finalChunkSize == 1 and minimizeRepeat != "never": + print " Removing any single " + atom + " from the final file makes it uninteresting!" + + print " Initial size: " + quantity(origNumParts, atom) + print " Final size: " + quantity(len(parts), atom) + print " Tests performed: " + str(testCount) + print " Test total: " + quantity(testTotal, atom) + +def list_fiveParts(list, step, f, s, t): + return (list[:f], list[f:s], list[s:(s+step)], list[(s+step):(t+step)], list[(t+step):]) + +def tryRemovingBalancedPairs(chunkSize): + """Make a single run through the testcase, trying to remove chunks of size chunkSize. + + Returns True iff any chunks were removed.""" + + global parts + + chunksSoFar = 0 + summary = "" + + chunksRemoved = 0 + chunksSurviving = 0 + atomsRemoved = 0 + + atomsInitial = len(parts) + numChunks = divideRoundingUp(len(parts), chunkSize) + + # Not enough chunks to remove surrounding blocks. + if numChunks < 2: + return False + + print "Starting a round with chunks of " + quantity(chunkSize, atom) + "." + + summary = ['S' for i in range(numChunks)] + curly = [(parts[i].count('{') - parts[i].count('}')) for i in range(numChunks)] + square = [(parts[i].count('[') - parts[i].count(']')) for i in range(numChunks)] + normal = [(parts[i].count('(') - parts[i].count(')')) for i in range(numChunks)] + chunkStart = 0 + lhsChunkIdx = 0 + + try: + while chunkStart < len(parts): + + description = "chunk #" + str(lhsChunkIdx) + "".join([" " for i in range(len(str(lhsChunkIdx)) + 4)]) + description += " of " + str(numChunks) + " chunks of size " + str(chunkSize) + + assert summary[:lhsChunkIdx].count('S') * chunkSize == chunkStart, "the chunkStart should correspond to the lhsChunkIdx modulo the removed chunks." + + chunkLhsStart = chunkStart + chunkLhsEnd = min(len(parts), chunkLhsStart + chunkSize) + + nCurly = curly[lhsChunkIdx] + nSquare = square[lhsChunkIdx] + nNormal = normal[lhsChunkIdx] + + # If the chunk is already balanced, try to remove it. + if nCurly == 0 and nSquare == 0 and nNormal == 0: + if interesting(parts[:chunkLhsStart] + parts[chunkLhsEnd:]): + print "Yay, reduced it by removing " + description + " :)" + chunksRemoved += 1 + atomsRemoved += (chunkLhsEnd - chunkLhsStart) + summary[lhsChunkIdx] = '-' + else: + print "Removing " + description + " made the file 'uninteresting'." + chunkStart += chunkSize + lhsChunkIdx = list_nindex(summary, lhsChunkIdx, 'S') + continue + + # Otherwise look for the corresponding chunk. + rhsChunkIdx = lhsChunkIdx + for item in summary[(lhsChunkIdx + 1):]: + rhsChunkIdx += 1 + if item != 'S': + continue + nCurly += curly[rhsChunkIdx] + nSquare += square[rhsChunkIdx] + nNormal += normal[rhsChunkIdx] + if nCurly < 0 or nSquare < 0 or nNormal < 0: + break + if nCurly == 0 and nSquare == 0 and nNormal == 0: + break + + # If we have no match, then just skip this pair of chunks. + if nCurly != 0 or nSquare != 0 or nNormal != 0: + print "Skipping " + description + " because it is 'uninteresting'." + chunkStart += chunkSize + lhsChunkIdx = list_nindex(summary, lhsChunkIdx, 'S') + continue + + # Otherwise we do have a match and we check if this is interesting to remove both. + chunkRhsStart = chunkLhsStart + chunkSize * summary[lhsChunkIdx:rhsChunkIdx].count('S') + chunkRhsStart = min(len(parts), chunkRhsStart) + chunkRhsEnd = min(len(parts), chunkRhsStart + chunkSize) + + description = "chunk #" + str(lhsChunkIdx) + " & #" + str(rhsChunkIdx) + description += " of " + str(numChunks) + " chunks of size " + str(chunkSize) + + if interesting(parts[:chunkLhsStart] + parts[chunkLhsEnd:chunkRhsStart] + parts[chunkRhsEnd:]): + print "Yay, reduced it by removing " + description + " :)" + chunksRemoved += 2 + atomsRemoved += (chunkLhsEnd - chunkLhsStart) + atomsRemoved += (chunkRhsEnd - chunkRhsStart) + summary[lhsChunkIdx] = '-' + summary[rhsChunkIdx] = '-' + lhsChunkIdx = list_nindex(summary, lhsChunkIdx, 'S') + continue + + # Removing the braces make the failure disappear. As we are looking + # for removing chunk (braces), we need to make the content within + # the braces as minimal as possible, so let us try to see if we can + # move the chunks outside the braces. + print "Removing " + description + " made the file 'uninteresting'." + + # Moving chunks is still a bit experimental, and it can introduce reducing loops. + # If you want to try it, just replace this True by a False. + if True: + chunkStart += chunkSize + lhsChunkIdx = list_nindex(summary, lhsChunkIdx, 'S') + continue + + origChunkIdx = lhsChunkIdx + stayOnSameChunk = False + chunkMidStart = chunkLhsEnd + midChunkIdx = list_nindex(summary, lhsChunkIdx, 'S') + while chunkMidStart < chunkRhsStart: + assert summary[:midChunkIdx].count('S') * chunkSize == chunkMidStart, "the chunkMidStart should correspond to the midChunkIdx modulo the removed chunks." + description = "chunk #" + str(midChunkIdx) + "".join([" " for i in range(len(str(lhsChunkIdx)) + 4)]) + description += " of " + str(numChunks) + " chunks of size " + str(chunkSize) + + chunkMidEnd = chunkMidStart + chunkSize + p = list_fiveParts(parts, chunkSize, chunkLhsStart, chunkMidStart, chunkRhsStart) + + nCurly = curly[midChunkIdx] + nSquare = square[midChunkIdx] + nNormal = normal[midChunkIdx] + if nCurly != 0 or nSquare != 0 or nNormal != 0: + print "Keepping " + description + " because it is 'uninteresting'." + chunkMidStart += chunkSize + midChunkIdx = list_nindex(summary, midChunkIdx, 'S') + continue + + # Try moving the chunk after. + if interesting(p[0] + p[1] + p[3] + p[2] + p[4]): + print "->Moving " + description + " kept the file 'interesting'." + chunkRhsStart -= chunkSize + chunkRhsEnd -= chunkSize + tS = list_fiveParts(summary, 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + tc = list_fiveParts(curly , 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + ts = list_fiveParts(square , 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + tn = list_fiveParts(normal , 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + summary = tS[0] + tS[1] + tS[3] + tS[2] + tS[4] + curly = tc[0] + tc[1] + tc[3] + tc[2] + tc[4] + square = ts[0] + ts[1] + ts[3] + ts[2] + ts[4] + normal = tn[0] + tn[1] + tn[3] + tn[2] + tn[4] + rhsChunkIdx -= 1 + midChunkIdx = summary[midChunkIdx:].index('S') + midChunkIdx + continue + + # Try moving the chunk before. + if interesting(p[0] + p[2] + p[1] + p[3] + p[4]): + print "<-Moving " + description + " kept the file 'interesting'." + chunkLhsStart += chunkSize + chunkLhsEnd += chunkSize + chunkMidStart += chunkSize + tS = list_fiveParts(summary, 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + tc = list_fiveParts(curly , 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + ts = list_fiveParts(square , 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + tn = list_fiveParts(normal , 1, lhsChunkIdx, midChunkIdx, rhsChunkIdx) + summary = tS[0] + tS[2] + tS[1] + tS[3] + tS[4] + curly = tc[0] + tc[2] + tc[1] + tc[3] + tc[4] + square = ts[0] + ts[2] + ts[1] + ts[3] + ts[4] + normal = tn[0] + tn[2] + tn[1] + tn[3] + tn[4] + lhsChunkIdx += 1 + midChunkIdx = list_nindex(summary, midChunkIdx, 'S') + stayOnSameChunk = True + continue + + print "..Moving " + description + " made the file 'uninteresting'." + chunkMidStart += chunkSize + midChunkIdx = list_nindex(summary, midChunkIdx, 'S') + + lhsChunkIdx = origChunkIdx + if not stayOnSameChunk: + chunkStart += chunkSize + lhsChunkIdx = list_nindex(summary, lhsChunkIdx, 'S') + + + except ValueError: + # This is a valid loop exit point. + chunkStart = len(parts) + + atomsSurviving = atomsInitial - atomsRemoved + printableSummary = " ".join(["".join(summary[(2 * i):min(2 * (i + 1), numChunks + 1)]) for i in range(numChunks / 2 + numChunks % 2)]) + print "" + print "Done with a round of chunk size " + str(chunkSize) + "!" + print quantity(summary.count('S'), "chunk") + " survived; " + \ + quantity(summary.count('-'), "chunk") + " removed." + print quantity(atomsSurviving, atom) + " survived; " + \ + quantity(atomsRemoved, atom) + " removed." + print "Which chunks survived: " + printableSummary + print "" + + writeTestcaseTemp("did-round-" + str(chunkSize), True); + + return (chunksRemoved > 0) + + + # Helpers def divideRoundingUp(n, d): From 3689310a4bb3263fa1e9f1799b627225a96b651a Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 23 Jul 2015 17:40:05 +0200 Subject: [PATCH 05/10] Add replace-properties-by-global strategy, to reduce dependencies on holder objects. --- lithium/lithium.py | 152 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 151 insertions(+), 1 deletion(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index 018f4ba..178c984 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -7,6 +7,7 @@ import subprocess import time import sys +import re path0 = os.path.dirname(os.path.abspath(__file__)) path1 = os.path.abspath(os.path.join(path0, os.pardir, 'interestingness')) @@ -31,7 +32,7 @@ def usage(): * --char (-c). Don't treat lines as atomic units; treat the file as a sequence of characters rather than a sequence of lines. -* --strategy=[minimize, minimize-around, minimize-balanced]. +* --strategy=[minimize, minimize-around, minimize-balanced, replace-properties-by-globals]. default: minimize. * --testcase=filename. default: last thing on the command line, which can double as passing in. @@ -139,6 +140,7 @@ def main(): 'minimize': minimize, 'minimize-around': minimizeSurroundingPairs, 'minimize-balanced': minimizeBalancedPairs, + 'replace-properties-by-globals': replacePropertiesByGlobals, }.get(strategy, None) if not strategyFunction: @@ -807,6 +809,154 @@ def tryRemovingBalancedPairs(chunkSize): +# +# This Strategy attempt at removing members, such as other strategies can +# then move the lines out-side the functions. The goal is to rename +# variable at the same time, such as the program remains valid, while +# removing the dependency on the object on which the member is. +# +# function Foo() { +# this.list = []; +# } +# Foo.prototype.push = function(a) { +# this.list.push(a); +# } +# Foo.prototype.last = function() { +# return this.list.pop(); +# } +# +# Which might transform the previous example to something like: +# +# function Foo() { +# list = []; +# } +# push = function(a) { +# list.push(a); +# } +# last = function() { +# return list.pop(); +# } +# +def replacePropertiesByGlobals(): + origNumParts = len(parts) + chunkSize = min(minimizeMax, 2 * largestPowerOfTwoSmallerThan(origNumParts)) + finalChunkSize = max(minimizeMin, 1) + + origNumChars = 0 + for line in parts: + origNumChars += len(line) + + numChars = origNumChars + while 1: + numRemovedChars = tryMakingGlobals(chunkSize, numChars); + numChars -= numRemovedChars + + last = (chunkSize == finalChunkSize) + + if numRemovedChars and (minimizeRepeat == "always" or (minimizeRepeat == "last" and last)): + # Repeat with the same chunk size + pass + elif last: + # Done + break + else: + # Continue with the next smaller chunk size + chunkSize /= 2 + + writeTestcase(testcaseFilename) + + print "=== LITHIUM SUMMARY ===" + if finalChunkSize == 1 and minimizeRepeat != "never": + print " Removing any single " + atom + " from the final file makes it uninteresting!" + + print " Initial size: " + quantity(origNumChars, "character") + print " Final size: " + quantity(numChars, "character") + print " Tests performed: " + str(testCount) + print " Test total: " + quantity(testTotal, atom) + + +def tryMakingGlobals(chunkSize, numChars): + """Make a single run through the testcase, trying to remove chunks of size chunkSize. + + Returns True iff any chunks were removed.""" + + global parts + + summary = "" + + numRemovedChars = 0 + numChunks = divideRoundingUp(len(parts), chunkSize) + finalChunkSize = max(minimizeMin, 1) + + # Map words to the chunk indexes in which they are present. + words = {} + for chunk, line in enumerate(parts): + for match in re.finditer(r'(?<=[\w\d_])\.(\w+)', line): + word = match.group(1) + if not word in words: + words[word] = [chunk] + else: + words[word] += [chunk] + + # All patterns have been removed sucessfully. + if len(words) == 0: + return 0 + + print "Starting a round with chunks of " + quantity(chunkSize, atom) + "." + summary = ['S' for i in range(numChunks)] + + for word, chunks in words.items(): + chunkIndexes = {} + for chunkStart in chunks: + chunkIdx = int(chunkStart / chunkSize) + if not chunkIdx in chunkIndexes: + chunkIndexes[chunkIdx] = [chunkStart] + else: + chunkIndexes[chunkIdx] += [chunkStart] + + for chunkIdx, chunkStarts in chunkIndexes.items(): + # Unless this is the final size, let's try to remove couple of + # prefixes, otherwise wait for the final size to remove each of them + # individually. + if len(chunkStarts) == 1 and finalChunkSize != chunkSize: + continue + + description = "'" + word + "' in " + description += "chunk #" + str(chunkIdx) + " of " + str(numChunks) + " chunks of size " + str(chunkSize) + + maybeRemoved = 0 + newParts = parts + for chunkStart in chunkStarts: + subst = re.sub("[\w_.]+\." + word, word, newParts[chunkStart]) + maybeRemoved += len(newParts[chunkStart]) - len(subst) + newParts = newParts[:chunkStart] + [ subst ] + newParts[(chunkStart+1):] + + if interesting(newParts): + print "Yay, reduced it by removing prefixes of " + description + " :)" + numRemovedChars += maybeRemoved + summary[chunkIdx] = 's' + words[word] = [ c for c in chunks if c not in chunkIndexes ] + if len(words[word]) == 0: + del words[word] + else: + print "Removing prefixes of " + description + " made the file 'uninteresting'." + + numSurvivingChars = numChars - numRemovedChars + printableSummary = " ".join(["".join(summary[(2 * i):min(2 * (i + 1), numChunks + 1)]) for i in range(numChunks / 2 + numChunks % 2)]) + print "" + print "Done with a round of chunk size " + str(chunkSize) + "!" + print quantity(summary.count('S'), "chunk") + " survived; " + \ + quantity(summary.count('s'), "chunk") + " shortened." + print quantity(numSurvivingChars, "character") + " survived; " + \ + quantity(numRemovedChars, "character") + " removed." + print "Which chunks survived: " + printableSummary + print "" + + writeTestcaseTemp("did-round-" + str(chunkSize), True); + + return numRemovedChars + + # Helpers def divideRoundingUp(n, d): From a84f94bdabb537b102980d329d9b45774320564e Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 23 Jul 2015 17:42:42 +0200 Subject: [PATCH 06/10] Add replace-arguments-by-globals strategy, to later minimize arguments computations. --- lithium/lithium.py | 234 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 233 insertions(+), 1 deletion(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index 178c984..c991543 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -8,6 +8,7 @@ import time import sys import re +import string path0 = os.path.dirname(os.path.abspath(__file__)) path1 = os.path.abspath(os.path.join(path0, os.pardir, 'interestingness')) @@ -32,7 +33,7 @@ def usage(): * --char (-c). Don't treat lines as atomic units; treat the file as a sequence of characters rather than a sequence of lines. -* --strategy=[minimize, minimize-around, minimize-balanced, replace-properties-by-globals]. +* --strategy=[minimize, minimize-around, minimize-balanced, replace-properties-by-globals, replace-arguments-by-globals]. default: minimize. * --testcase=filename. default: last thing on the command line, which can double as passing in. @@ -141,6 +142,7 @@ def main(): 'minimize-around': minimizeSurroundingPairs, 'minimize-balanced': minimizeBalancedPairs, 'replace-properties-by-globals': replacePropertiesByGlobals, + 'replace-arguments-by-globals': replaceArgumentsByGlobals, }.get(strategy, None) if not strategyFunction: @@ -957,6 +959,236 @@ def tryMakingGlobals(chunkSize, numChars): return numRemovedChars +# +# This Strategy attempt at replacing arguments by globals, for each named +# argument of a function we add a setter of the global of the same name before +# the function call. The goal is to remove functions by making empty arguments +# lists instead. +# +# function foo(a,b) { +# list = a + b; +# } +# foo(2, 3) +# +# becomes: +# +# function foo() { +# list = a + b; +# } +# a = 2; +# b = 3; +# foo() +# +# The next logical step is inlining the body of the function at the call-site. +# +def replaceArgumentsByGlobals(): + roundNum = 0 + while 1: + numRemovedArguments = tryArgumentsAsGlobals(roundNum) + roundNum += 1 + + if numRemovedArguments and (minimizeRepeat == "always" or minimizeRepeat == "last"): + # Repeat with the same chunk size + pass + else: + # Done + break + + writeTestcase(testcaseFilename) + + print "=== LITHIUM SUMMARY ===" + print " Tests performed: " + str(testCount) + print " Test total: " + quantity(testTotal, atom) + + +def tryArgumentsAsGlobals(roundNum): + """Make a single run through the testcase, trying to remove chunks of size chunkSize. + + Returns True iff any chunks were removed.""" + + global parts + + numMovedArguments = 0 + numSurvivedArguments = 0 + + # Map words to the chunk indexes in which they are present. + functions = {} + anonymousQueue = [] + anonymousStack = [] + for chunk, line in enumerate(parts): + # Match function definition with at least one argument. + for match in re.finditer(r'(?:function\s+(\w+)|(\w+)\s*=\s*function)\s*\((\s*\w+\s*(?:,\s*\w+\s*)*)\)', line): + fun = match.group(1) + if fun is None: + fun = match.group(2) + + if match.group(3) == "": + args = [] + else: + args = match.group(3).split(',') + + if not fun in functions: + functions[fun] = { "defs": args, "argsPattern": match.group(3), "chunk": chunk, "uses": [] } + else: + functions[fun]["defs"] = args + functions[fun]["argsPattern"] = match.group(3) + functions[fun]["chunk"] = chunk + + + # Match anonymous function definition, which are surrounded by parentheses. + for match in re.finditer(r'\(function\s*\w*\s*\(((?:\s*\w+\s*(?:,\s*\w+\s*)*)?)\)\s*{', line): + if match.group(1) == "": + args = [] + else: + args = match.group(1).split(',') + anonymousStack += [{ "defs": args, "chunk": chunk, "use": None, "useChunk": 0 }] + + # Match calls of anonymous function. + for match in re.finditer(r'}\s*\)\s*\(((?:[^()]|\([^,()]*\))*)\)', line): + if len(anonymousStack) == 0: + continue + anon = anonymousStack[-1] + anonymousStack = anonymousStack[:-1] + if match.group(1) == "" and len(anon["defs"]) == 0: + continue + if match.group(1) == "": + args = [] + else: + args = match.group(1).split(',') + anon["use"] = args + anon["useChunk"] = chunk + anonymousQueue += [anon] + + # match function calls. (and some definitions) + for match in re.finditer(r'((\w+)\s*\(((?:[^()]|\([^,()]*\))*)\))', line): + pattern = match.group(1) + fun = match.group(2) + if match.group(3) == "": + args = [] + else: + args = match.group(3).split(',') + if not fun in functions: + functions[fun] = { "uses": [] } + functions[fun]["uses"] += [{ "values": args, "chunk": chunk, "pattern": pattern }] + + + # All patterns have been removed sucessfully. + if len(functions) == 0 and len(anonymousQueue) == 0: + return 0 + + print "Starting removing function arguments." + + for fun, argsMap in functions.items(): + description = "arguments of '" + fun + "'" + if "defs" not in argsMap or len(argsMap["uses"]) == 0: + print "Ignoring " + description + " because it is 'uninteresting'." + continue + + maybeMovedArguments = 0 + newParts = parts + + # Remove the function definition arguments + argDefs = argsMap["defs"] + defChunk = argsMap["chunk"] + subst = string.replace(newParts[defChunk], argsMap["argsPattern"], "", 1) + newParts = newParts[:defChunk] + [ subst ] + newParts[(defChunk+1):] + + # Copy callers arguments to globals. + for argUse in argsMap["uses"]: + values = argUse["values"] + chunk = argUse["chunk"] + if chunk == defChunk and values == argDefs: + continue + while len(values) < len(argDefs): + values = values + ["undefined"] + setters = "".join([ a + " = " + v + ";\n" for a, v in zip(argDefs, values) ]) + subst = setters + newParts[chunk] + newParts = newParts[:chunk] + [ subst ] + newParts[(chunk+1):] + maybeMovedArguments += len(argDefs); + + if interesting(newParts): + print "Yay, reduced it by removing " + description + " :)" + numMovedArguments += maybeMovedArguments + else: + numSurvivedArguments += maybeMovedArguments + print "Removing " + description + " made the file 'uninteresting'." + + for argUse in argsMap["uses"]: + chunk = argUse["chunk"] + values = argUse["values"] + if chunk == defChunk and values == argDefs: + continue + + newParts = parts + subst = string.replace(newParts[chunk], argUse["pattern"], fun + "()", 1) + if newParts[chunk] == subst: + continue + newParts = newParts[:chunk] + [ subst ] + newParts[(chunk+1):] + maybeMovedArguments = len(values); + + descriptionChunk = description + " at " + atom + " #" + str(chunk) + if interesting(newParts): + print "Yay, reduced it by removing " + descriptionChunk + " :)" + numMovedArguments += maybeMovedArguments + else: + numSurvivedArguments += maybeMovedArguments + print "Removing " + descriptionChunk + " made the file 'uninteresting'." + + # Remove immediate anonymous function calls. + for anon in anonymousQueue: + noopChanges = 0 + maybeMovedArguments = 0 + newParts = parts + + argDefs = anon["defs"] + defChunk = anon["chunk"] + values = anon["use"] + chunk = anon["useChunk"] + description = "arguments of anonymous function at #" + atom + " " + str(defChunk) + + # Remove arguments of the function. + subst = string.replace(newParts[defChunk], ",".join(argDefs), "", 1) + if newParts[defChunk] == subst: + noopChanges += 1 + newParts = newParts[:defChunk] + [ subst ] + newParts[(defChunk+1):] + + # Replace arguments by their value in the scope of the function. + while len(values) < len(argDefs): + values = values + ["undefined"] + setters = "".join([ "var " + a + " = " + v + ";\n" for a, v in zip(argDefs, values) ]) + subst = newParts[defChunk] + "\n" + setters + if newParts[defChunk] == subst: + noopChanges += 1 + newParts = newParts[:defChunk] + [ subst ] + newParts[(defChunk+1):] + + # Remove arguments of the anonymous function call. + subst = string.replace(newParts[chunk], ",".join(anon["use"]), "", 1) + if newParts[chunk] == subst: + noopChanges += 1 + newParts = newParts[:chunk] + [ subst ] + newParts[(chunk+1):] + maybeMovedArguments += len(values); + + if noopChanges == 3: + continue + + if interesting(newParts): + print "Yay, reduced it by removing " + description + " :)" + numMovedArguments += maybeMovedArguments + else: + numSurvivedArguments += maybeMovedArguments + print "Removing " + description + " made the file 'uninteresting'." + + + print "" + print "Done with this round!" + print quantity(numMovedArguments, "argument") + " moved;" + print quantity(numSurvivedArguments, "argument") + " survived." + + writeTestcaseTemp("did-round-" + str(roundNum), True); + + return numMovedArguments + + # Helpers def divideRoundingUp(n, d): From 4737990548073f84bb97bc6d887d9e1806faf5f5 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 23 Jul 2015 17:50:32 +0200 Subject: [PATCH 07/10] Add --symbols option to split by tokens, instead of splitting by lines / characters. --- lithium/lithium.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index c991543..10021c4 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -33,6 +33,10 @@ def usage(): * --char (-c). Don't treat lines as atomic units; treat the file as a sequence of characters rather than a sequence of lines. +* --symbols + Treat the file as a sequence of strings separated by tokens. The + characters by which the strings are delimited are defined by the + --cutBefore, and --cutAfter options. * --strategy=[minimize, minimize-around, minimize-balanced, replace-properties-by-globals, replace-arguments-by-globals]. default: minimize. * --testcase=filename. @@ -68,6 +72,8 @@ def usage(): minimizeRepeatFirstRound = False atom = "line" +cutAfter = "?=;{[" +cutBefore = "]}:" conditionScript = None conditionArgs = None @@ -95,7 +101,7 @@ def main(): try: # XXX Consider using optparse (with disable_interspersed_args) or argparse (with argparse.REMAINDER) - opts, args = getopt.getopt(sys.argv[1:], "hc", ["help", "char", "strategy=", "repeat=", "min=", "max=", "chunksize=", "chunkstart=", "testcase=", "tempdir=", "repeatfirstround", "maxruntime="]) + opts, args = getopt.getopt(sys.argv[1:], "hc", ["help", "char", "symbols", "cutBefore=", "cutAfter=", "strategy=", "repeat=", "min=", "max=", "chunksize=", "chunkstart=", "testcase=", "tempdir=", "repeatfirstround", "maxruntime="]) except getopt.GetoptError, exc: usageError(exc.msg) @@ -179,6 +185,8 @@ def processOptions(opts): tempDir = a elif o in ("-c", "--char"): atom = "char" + elif o in ("-s", "--symbols"): + atom = "symbol-delimiter" elif o == "--strategy": strategy = a elif o == "--min": @@ -283,6 +291,10 @@ def readTestcaseLine(line): elif atom == "char": for char in line: parts.append(char) + elif atom == "symbol-delimiter": + cutter = '[' + cutBefore + ']?[^' + cutBefore + cutAfter + ']*(?:[' + cutAfter + ']|$|(?=[' + cutBefore + ']))' + for statement in re.finditer(cutter, line): + parts.append(statement.group(0)) def writeTestcase(filename): with open(filename, "w") as file: From ce5eba62426cb7f6d6638764f07dcadb8a4a7085 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Mon, 3 Aug 2015 15:32:11 +0200 Subject: [PATCH 08/10] Add check-only in the list of strategies of the usage output. --- lithium/lithium.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index 10021c4..a6642fe 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -37,7 +37,7 @@ def usage(): Treat the file as a sequence of strings separated by tokens. The characters by which the strings are delimited are defined by the --cutBefore, and --cutAfter options. -* --strategy=[minimize, minimize-around, minimize-balanced, replace-properties-by-globals, replace-arguments-by-globals]. +* --strategy=[check-only, minimize, minimize-around, minimize-balanced, replace-properties-by-globals, replace-arguments-by-globals]. default: minimize. * --testcase=filename. default: last thing on the command line, which can double as passing in. From 2003269e22361e201f93bb1dba1ca4b4572d2968 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Thu, 17 Dec 2015 17:26:13 +0000 Subject: [PATCH 09/10] Add a new interesting test to check different behaviour based on command line switches. --- interestingness/diffTest.py | 48 +++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 interestingness/diffTest.py diff --git a/interestingness/diffTest.py b/interestingness/diffTest.py new file mode 100644 index 0000000..65477f9 --- /dev/null +++ b/interestingness/diffTest.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +from optparse import OptionParser + +import timedRun +import filecmp + + +def parseOptions(arguments): + parser = OptionParser() + parser.disable_interspersed_args() + parser.add_option('-t', '--timeout', type='int', action='store', dest='condTimeout', + default=120, + help='Optionally set the timeout. Defaults to "%default" seconds.') + parser.add_option('-a', '--a-arg', type='string', action='append', dest='aArgs', + default=[], + help='Set of extra arguments given to first run.') + parser.add_option('-b', '--b-arg', type='string', action='append', dest='bArgs', + default=[], + help='Set of extra arguments given to second run.') + + options, args = parser.parse_args(arguments) + + return options.condTimeout, options.aArgs, options.bArgs, args + +def interesting(cliArgs, tempPrefix): + (timeout, aArgs, bArgs, args) = parseOptions(cliArgs) + + aRuninfo = timedRun.timed_run(args[:1] + aArgs + args[1:], timeout, tempPrefix + "-a") + bRuninfo = timedRun.timed_run(args[:1] + bArgs + args[1:], timeout, tempPrefix + "-b") + timeString = " (1st Run: %.3f seconds) (2nd Run: %.3f seconds)" % (aRuninfo.elapsedtime, bRuninfo.elapsedtime) + + if aRuninfo.sta != timedRun.TIMED_OUT and bRuninfo.sta != timedRun.TIMED_OUT: + if aRuninfo.rc != bRuninfo.rc: + print ("[Interesting] Different return code. (%d, %d) " % (aRuninfo.rc, bRuninfo.rc)) + timeString + return True + if not filecmp.cmp(aRuninfo.out, bRuninfo.out): + print "[Interesting] Different output. " + timeString + return True + if not filecmp.cmp(aRuninfo.err, bRuninfo.err): + print "[Interesting] Different error output. " + timeString + return True + else: + print "[Uninteresting] At least one test timed out." + timeString + return False + + print "[Uninteresting] Identical behaviour." + timeString + return False From 9d08d1e03017fc7c76d7f3f4a958112609d49035 Mon Sep 17 00:00:00 2001 From: "Nicolas B. Pierron" Date: Fri, 18 Mar 2016 13:40:24 +0000 Subject: [PATCH 10/10] Exit early if the file is smaller than the final Chunk size. --- lithium/lithium.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lithium/lithium.py b/lithium/lithium.py index a6642fe..0aa8a76 100755 --- a/lithium/lithium.py +++ b/lithium/lithium.py @@ -393,7 +393,7 @@ def minimize(): if chunkStart >= len(parts): writeTestcaseTemp("did-round-" + str(chunkSize), True); - last = (chunkSize == finalChunkSize) + last = (chunkSize <= finalChunkSize) empty = (len(parts) == 0) print "" if not empty and anyChunksRemoved and (minimizeRepeat == "always" or (minimizeRepeat == "last" and last)): @@ -451,7 +451,7 @@ def minimizeSurroundingPairs(): while 1: anyChunksRemoved = tryRemovingSurroundingChunks(chunkSize); - last = (chunkSize == finalChunkSize) + last = (chunkSize <= finalChunkSize) if anyChunksRemoved and (minimizeRepeat == "always" or (minimizeRepeat == "last" and last)): # Repeat with the same chunk size @@ -599,7 +599,7 @@ def minimizeBalancedPairs(): while 1: anyChunksRemoved = tryRemovingBalancedPairs(chunkSize); - last = (chunkSize == finalChunkSize) + last = (chunkSize <= finalChunkSize) if anyChunksRemoved and (minimizeRepeat == "always" or (minimizeRepeat == "last" and last)): # Repeat with the same chunk size @@ -865,7 +865,7 @@ def replacePropertiesByGlobals(): numRemovedChars = tryMakingGlobals(chunkSize, numChars); numChars -= numRemovedChars - last = (chunkSize == finalChunkSize) + last = (chunkSize <= finalChunkSize) if numRemovedChars and (minimizeRepeat == "always" or (minimizeRepeat == "last" and last)): # Repeat with the same chunk size