Passing all tests

michaelweiser · Mar 19, 2018 · 8cc27b6 · 8cc27b6
1 parent 12c7b19
commit 8cc27b6
Show file tree

Hide file tree

Showing 11 changed files with 387 additions and 376 deletions.
diff --git a/peepdf/JSAnalysis.py b/peepdf/JSAnalysis.py
@@ -58,8 +58,8 @@ class Global(object):
 
 # Regex that matches any character that's <32 && >127 and not a whitespace.
 bad_chars_re = "|".join(re.escape(chr(ch)) for ch in (
-    [ch for ch in xrange(32) if chr(ch) not in "\n\r\t\f"] +
-    [ch for ch in xrange(128, 256)]
+    [ch for ch in range(32) if chr(ch) not in "\n\r\t\f"] +
+    [ch for ch in range(128, 256)]
 ))
 
 def analyseJS(code, context=None, manualAnalysis=False):
@@ -244,7 +244,7 @@ def searchObfuscatedFunctions(jsCode, function):
     return obfuscatedFunctionsInfo
 
 
-def unescape(escapedBytes, unicode=True):
+def unescape(escapedBytes, str=True):
     '''
         This method unescapes the given string
 
@@ -253,13 +253,13 @@ def unescape(escapedBytes, unicode=True):
     '''
     # TODO: modify to accept a list of escaped strings?
     unescapedBytes = ''
-    if unicode:
+    if str:
         unicodePadding = '\x00'
     else:
         unicodePadding = ''
     try:
-        if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\u') != -1 or escapedBytes.find('%') != -1:
-            if escapedBytes.lower().find('\u') != -1:
+        if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\\u') != -1 or escapedBytes.find('%') != -1:
+            if escapedBytes.lower().find('\\u') != -1:
                 splitBytes = escapedBytes.split('\\')
             else:
                 splitBytes = escapedBytes.split('%')

diff --git a/peepdf/PDFConsole.py b/peepdf/PDFConsole.py
diff --git a/peepdf/PDFCore.py b/peepdf/PDFCore.py
@@ -30,6 +30,8 @@
 import random
 import re
 import sys
+import six
+
 
 import peepdf.aes as AES
 from peepdf.PDFUtils import (
@@ -1266,8 +1268,8 @@ def update(self, decrypt=False):
         self.value = '<< '
         self.rawValue = '<< '
         self.encryptedValue = '<< '
-        keys = self.elements.keys()
-        values = self.elements.values()
+        keys = list(self.elements.keys())
+        values = list(self.elements.values())
         for i in range(len(keys)):
             if values[i] is None:
                 errorMessage = 'Non-existing value for key "'+str(keys[i])+'"'
@@ -1435,7 +1437,7 @@ def getElementByName(self, name, recursive=False):
             else:
                 return self.elements[name]
         if recursive:
-            for element in self.elements.values():
+            for element in list(self.elements.values()):
                 if element is not None and (element.getType() == 'dictionary' or element.getType() == 'array'):
                     retElements += element.getElementByName(name)
         return retElements
@@ -1688,8 +1690,8 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'):
         self.value = '<< '
         self.rawValue = '<< '
         self.encryptedValue = '<< '
-        keys = self.elements.keys()
-        values = self.elements.values()
+        keys = list(self.elements.keys())
+        values = list(self.elements.values())
         if not onlyElements:
             self.references = []
             self.errors = []
@@ -2645,7 +2647,7 @@ def setElement(self, name, value, update=True):
         return (0, '')
 
     def setElements(self, newElements):
-        oldElements = self.elements.keys()
+        oldElements = list(self.elements.keys())
         for oldElement in oldElements:
             if oldElement not in newElements:
                 if oldElement in ['/Filter', '/FFilter']:
@@ -2746,8 +2748,8 @@ def update(self, modifiedCompressedObjects=False, onlyElements=False, decrypt=Fa
         self.value = '<< '
         self.rawValue = '<< '
         self.encryptedValue = '<< '
-        keys = self.elements.keys()
-        values = self.elements.values()
+        keys = list(self.elements.keys())
+        values = list(self.elements.values())
         if not onlyElements:
             self.errors = []
             self.references = []
@@ -3616,7 +3618,7 @@ def getFreeObjectIds(self):
         return ids
 
     def getIndex(self, objectId):
-        objectIds = range(self.firstObject, self.firstObject+self.numObjects)
+        objectIds = list(range(self.firstObject, self.firstObject+self.numObjects))
         if objectId in objectIds:
             return objectIds.index(objectId)
         else:
@@ -4091,15 +4093,15 @@ def getObjects(self):
 
     def getObjectsByString(self, toSearch):
         matchedObjects = []
-        for indirectObject in self.objects.values():
+        for indirectObject in list(self.objects.values()):
             if indirectObject.contains(toSearch):
                 matchedObjects.append(indirectObject.getId())
         return matchedObjects
 
     def getObjectsIds(self):
         sortedIdsOffsets = []
         sortedIds = []
-        for indirectObject in self.objects.values():
+        for indirectObject in list(self.objects.values()):
             sortedIdsOffsets.append([indirectObject.getId(), indirectObject.getOffset()])
         sortedIdsOffsets = sorted(sortedIdsOffsets, key=lambda x: x[1])
         for i in range(len(sortedIdsOffsets)):
@@ -4260,7 +4262,7 @@ def updateObjects(self):
                 else:
                     return (-1, errorMessage)
             elementsToUpdate = object.getReferencesInElements()
-            keys = elementsToUpdate.keys()
+            keys = list(elementsToUpdate.keys())
             for key in keys:
                 ref = elementsToUpdate[key]
                 refId = ref[0]
@@ -5783,7 +5785,7 @@ def getCatalogObject(self, version=None, indirect=False):
         if version is None:
             catalogObjects = []
             catalogIds = self.getCatalogObjectId()
-            for i in xrange(len(catalogIds)):
+            for i in range(len(catalogIds)):
                 id = catalogIds[i]
                 if id is not None:
                     catalogObject = self.getObject(id, i, indirect)
@@ -5909,7 +5911,7 @@ def getInfoObject(self, version=None, indirect=False):
         if version is None:
             infoObjects = []
             infoIds = self.getInfoObjectId()
-            for i in xrange(len(infoIds)):
+            for i in range(len(infoIds)):
                 id = infoIds[i]
                 if id is not None:
                     infoObject = self.getObject(id, i, indirect)
@@ -6035,7 +6037,7 @@ def getOffsets(self, version=None):
         offsetsArray = []
 
         if version is None:
-            versions = range(self.updates+1)
+            versions = list(range(self.updates+1))
         else:
             versions = [version]
 
@@ -6137,7 +6139,7 @@ def getReferencesTo(self, id, version=None):
         if version is None:
             for i in range(self.updates + 1):
                 indirectObjectsDict = self.body[i].getObjects()
-                for indirectObject in indirectObjectsDict.values():
+                for indirectObject in list(indirectObjectsDict.values()):
                     if indirectObject is not None:
                         object = indirectObject.getObject()
                         if object is not None:
@@ -6148,7 +6150,7 @@ def getReferencesTo(self, id, version=None):
             if version > self.updates or version < 0:
                 return None
             indirectObjectsDict = self.body[version].getObjects()
-            for indirectObject in indirectObjectsDict.values():
+            for indirectObject in list(indirectObjectsDict.values()):
                 if indirectObject is not None:
                     object = indirectObject.getObject()
                     if object is not None:
@@ -6309,7 +6311,7 @@ def getTree(self, version=None):
         tree = []
 
         if version is None:
-            versions = range(self.updates+1)
+            versions = list(range(self.updates+1))
         else:
             versions = [version]
 
@@ -6346,7 +6348,7 @@ def getTree(self, version=None):
                                 type = dictType
                             else:
                                 if type == 'dictionary' and len(elements) == 1:
-                                    type = elements.keys()[0]
+                                    type = list(elements.keys())[0]
                     references = self.getReferencesIn(id, version)
                     for i in range(len(references)):
                         referencesIds.append(int(references[i].split()[0]))
@@ -6901,10 +6903,10 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
         file = open(fileName, 'rb')
         for line in file:
             if versionLine == '':
-                pdfHeaderIndex = line.find('%PDF-')
-                psHeaderIndex = line.find('%!PS-Adobe-')
+                pdfHeaderIndex = line.find(b'%PDF-')
+                psHeaderIndex = line.find(b'%!PS-Adobe-')
                 if pdfHeaderIndex != -1 or psHeaderIndex != -1:
-                    index = line.find('\r')
+                    index = line.find(b'\r')
                     if index != -1 and index+1 < len(line) and line[index+1] != '\n':
                         index += 1
                         versionLine = line[:index]
@@ -6926,9 +6928,9 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
         file.close()
 
         # Getting the specification version
-        versionLine = versionLine.replace('\r', '')
-        versionLine = versionLine.replace('\n', '')
-        matchVersion = re.findall('%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})', versionLine)
+        versionLine = versionLine.replace(b'\r', b'')
+        versionLine = versionLine.replace(b'\n', b'')
+        matchVersion = re.findall(b'%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})', versionLine)
         if matchVersion == []:
             if forceMode:
                 pdfFile.setVersion(versionLine)
@@ -6968,22 +6970,27 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
         pdfFile.setSHA256(hashlib.sha256(fileContent).hexdigest())
 
         # Getting the number of updates in the file
-        while fileContent.find('%%EOF') != -1:
-            self.readUntilSymbol(fileContent, '%%EOF')
+        while fileContent.find(b'%%EOF') != -1:
+            self.charCounter = 0
+            self.readUntilSymbol(fileContent, b'%%EOF')
+
             self.readUntilEndOfLine(fileContent)
+
             self.fileParts.append(fileContent[:self.charCounter])
-            fileContent = fileContent[self.charCounter:]
-            self.charCounter = 0
+            if six.PY3:
+                fileContent = fileContent[self.charCounter + len(b'%%EOF'):]
+            else:
+                fileContent = fileContent[self.charCounter:]
         else:
             if self.fileParts == []:
-                errorMessage = '%%EOF not found'
+                errorMessage = b'%%EOF not found'
                 if forceMode:
                     pdfFile.addError(errorMessage)
                     self.fileParts.append(fileContent)
                 else:
                     sys.exit(errorMessage)
         pdfFile.setUpdates(len(self.fileParts) - 1)
-
+        #raise Exception(ccc)
         # Getting the body, cross reference table and trailer of each part of the file
         for i in range(len(self.fileParts)):
             bodyOffset = 0
@@ -7012,15 +7019,15 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
             if xrefContent is not None:
                 xrefOffset = bodyOffset + len(bodyContent)
                 trailerOffset = xrefOffset + len(xrefContent)
-                bodyContent = bodyContent.strip('\r\n')
-                xrefContent = xrefContent.strip('\r\n')
-                trailerContent = trailerContent.strip('\r\n')
+                bodyContent = bodyContent.strip(b'\r\n')
+                xrefContent = xrefContent.strip(b'\r\n')
+                trailerContent = trailerContent.strip(b'\r\n')
             else:
                 if trailerContent is not None:
                     xrefOffset = -1
                     trailerOffset = bodyOffset + len(bodyContent)
-                    bodyContent = bodyContent.strip('\r\n')
-                    trailerContent = trailerContent.strip('\r\n')
+                    bodyContent = bodyContent.strip(b'\r\n')
+                    trailerContent = trailerContent.strip(b'\r\n')
                 else:
                     errorMessage = 'PDF sections not found'
                     if forceMode:
@@ -7183,16 +7190,16 @@ def parsePDFSections(self, content, forceMode=False, looseMode=False):
         xrefContent = None
         trailerContent = None
 
-        indexTrailer = content.find('trailer')
+        indexTrailer = content.find(b'trailer')
         if indexTrailer != -1:
             restContent = content[:indexTrailer]
             auxTrailer = content[indexTrailer:]
-            indexEOF = auxTrailer.find('%%EOF')
+            indexEOF = auxTrailer.find(b'%%EOF')
             if indexEOF == -1:
                 trailerContent = auxTrailer
             else:
                 trailerContent = auxTrailer[:indexEOF+5]
-            indexXref = restContent.find('xref')
+            indexXref = restContent.find(b'xref')
             if indexXref != -1:
                 bodyContent = restContent[:indexXref]
                 xrefContent = restContent[indexXref:]
@@ -7202,11 +7209,11 @@ def parsePDFSections(self, content, forceMode=False, looseMode=False):
                     pdfFile.addError('Xref section not found')
             return [bodyContent, xrefContent, trailerContent]
 
-        indexTrailer = content.find('startxref')
+        indexTrailer = content.find(b'startxref')
         if indexTrailer != -1:
             restContent = content[:indexTrailer]
             auxTrailer = content[indexTrailer:]
-            indexEOF = auxTrailer.find('%%EOF')
+            indexEOF = auxTrailer.find(b'%%EOF')
             if indexEOF == -1:
                 trailerContent = auxTrailer
             else:
@@ -8130,9 +8137,13 @@ def readUntilSymbol(self, string, symbol):
             @param symbol
             @return A tuple (status,statusContent), where statusContent is the characters read in case status = 0 or an error in case status = -1
         '''
-        if not isinstance(string, str):
+
+        if not isinstance(string, bytes):
             return (-1, 'Bad string')
+
         newString = string[self.charCounter:]
+
+        self.charCounter = 0
         index = newString.find(symbol)
         if index == -1:
             errorMessage = 'Symbol "'+symbol+'" not found'

diff --git a/peepdf/PDFCrypto.py b/peepdf/PDFCrypto.py
@@ -31,7 +31,7 @@
 import warnings
 import sys
 import peepdf.aes
-from itertools import cycle, izip
+from itertools import cycle
 warnings.filterwarnings("ignore")
 
 paddingString = '\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A'
@@ -337,4 +337,4 @@ def xor(bytes, key):
         @return: The xored bytes
     '''
     key = cycle(key)
-    return ''.join(chr(ord(x) ^ ord(y)) for (x, y) in izip(bytes, key))
+    return ''.join(chr(ord(x) ^ ord(y)) for (x, y) in zip(bytes, key))
diff --git a/peepdf/PDFFilters.py b/peepdf/PDFFilters.py
@@ -469,7 +469,7 @@ def pre_prediction(stream, predictor, columns, colors, bits):
     # PNG prediction
     if predictor >= 10 and predictor <= 15:
         # PNG prediction can vary from row to row
-        for row in xrange(len(stream) / columns):
+        for row in range(len(stream) / columns):
             rowdata = [ord(x) for x in stream[(row * columns):((row + 1) * columns)]]
             filterByte = predictor - 10
             rowdata = [filterByte] + rowdata
@@ -537,7 +537,7 @@ def post_prediction(decodedStream, predictor, columns, colors, bits):
         numSamplesPerRow = columns + 1
         bytesPerSample = (colors * bits + 7) / 8
         upRowdata = (0,) * numSamplesPerRow
-        for row in xrange(numRows):
+        for row in range(numRows):
             rowdata = [ord(x) for x in decodedStream[(row * bytesPerRow):((row + 1) * bytesPerRow)]]
             # PNG prediction can vary from row to row
             filterByte = rowdata[0]
@@ -787,12 +787,12 @@ def dctDecode(stream, parameters):
     decodedStream = ''
     try:
         from PIL import Image
-        import StringIO
+        import io
     except:
         return (-1, 'Python Imaging Library (PIL) not installed')
     # Quick implementation, assuming the library can detect the parameters
     try:
-        im = Image.open(StringIO.StringIO(stream))
+        im = Image.open(io.StringIO(stream))
         decodedStream = im.tostring()
         return (0, decodedStream)
     except: