Skip to content

Commit

Permalink
Passing all tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Evert0x committed Mar 19, 2018
1 parent 12c7b19 commit 8cc27b6
Show file tree
Hide file tree
Showing 11 changed files with 387 additions and 376 deletions.
12 changes: 6 additions & 6 deletions peepdf/JSAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ class Global(object):

# Regex that matches any character that's <32 && >127 and not a whitespace.
bad_chars_re = "|".join(re.escape(chr(ch)) for ch in (
[ch for ch in xrange(32) if chr(ch) not in "\n\r\t\f"] +
[ch for ch in xrange(128, 256)]
[ch for ch in range(32) if chr(ch) not in "\n\r\t\f"] +
[ch for ch in range(128, 256)]
))

def analyseJS(code, context=None, manualAnalysis=False):
Expand Down Expand Up @@ -244,7 +244,7 @@ def searchObfuscatedFunctions(jsCode, function):
return obfuscatedFunctionsInfo


def unescape(escapedBytes, unicode=True):
def unescape(escapedBytes, str=True):
'''
This method unescapes the given string
Expand All @@ -253,13 +253,13 @@ def unescape(escapedBytes, unicode=True):
'''
# TODO: modify to accept a list of escaped strings?
unescapedBytes = ''
if unicode:
if str:
unicodePadding = '\x00'
else:
unicodePadding = ''
try:
if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\u') != -1 or escapedBytes.find('%') != -1:
if escapedBytes.lower().find('\u') != -1:
if escapedBytes.lower().find('%u') != -1 or escapedBytes.lower().find('\\u') != -1 or escapedBytes.find('%') != -1:
if escapedBytes.lower().find('\\u') != -1:
splitBytes = escapedBytes.split('\\')
else:
splitBytes = escapedBytes.split('%')
Expand Down
564 changes: 282 additions & 282 deletions peepdf/PDFConsole.py

Large diffs are not rendered by default.

95 changes: 53 additions & 42 deletions peepdf/PDFCore.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
import random
import re
import sys
import six


import peepdf.aes as AES
from peepdf.PDFUtils import (
Expand Down Expand Up @@ -1266,8 +1268,8 @@ def update(self, decrypt=False):
self.value = '<< '
self.rawValue = '<< '
self.encryptedValue = '<< '
keys = self.elements.keys()
values = self.elements.values()
keys = list(self.elements.keys())
values = list(self.elements.values())
for i in range(len(keys)):
if values[i] is None:
errorMessage = 'Non-existing value for key "'+str(keys[i])+'"'
Expand Down Expand Up @@ -1435,7 +1437,7 @@ def getElementByName(self, name, recursive=False):
else:
return self.elements[name]
if recursive:
for element in self.elements.values():
for element in list(self.elements.values()):
if element is not None and (element.getType() == 'dictionary' or element.getType() == 'array'):
retElements += element.getElementByName(name)
return retElements
Expand Down Expand Up @@ -1688,8 +1690,8 @@ def update(self, onlyElements=False, decrypt=False, algorithm='RC4'):
self.value = '<< '
self.rawValue = '<< '
self.encryptedValue = '<< '
keys = self.elements.keys()
values = self.elements.values()
keys = list(self.elements.keys())
values = list(self.elements.values())
if not onlyElements:
self.references = []
self.errors = []
Expand Down Expand Up @@ -2645,7 +2647,7 @@ def setElement(self, name, value, update=True):
return (0, '')

def setElements(self, newElements):
oldElements = self.elements.keys()
oldElements = list(self.elements.keys())
for oldElement in oldElements:
if oldElement not in newElements:
if oldElement in ['/Filter', '/FFilter']:
Expand Down Expand Up @@ -2746,8 +2748,8 @@ def update(self, modifiedCompressedObjects=False, onlyElements=False, decrypt=Fa
self.value = '<< '
self.rawValue = '<< '
self.encryptedValue = '<< '
keys = self.elements.keys()
values = self.elements.values()
keys = list(self.elements.keys())
values = list(self.elements.values())
if not onlyElements:
self.errors = []
self.references = []
Expand Down Expand Up @@ -3616,7 +3618,7 @@ def getFreeObjectIds(self):
return ids

def getIndex(self, objectId):
objectIds = range(self.firstObject, self.firstObject+self.numObjects)
objectIds = list(range(self.firstObject, self.firstObject+self.numObjects))
if objectId in objectIds:
return objectIds.index(objectId)
else:
Expand Down Expand Up @@ -4091,15 +4093,15 @@ def getObjects(self):

def getObjectsByString(self, toSearch):
matchedObjects = []
for indirectObject in self.objects.values():
for indirectObject in list(self.objects.values()):
if indirectObject.contains(toSearch):
matchedObjects.append(indirectObject.getId())
return matchedObjects

def getObjectsIds(self):
sortedIdsOffsets = []
sortedIds = []
for indirectObject in self.objects.values():
for indirectObject in list(self.objects.values()):
sortedIdsOffsets.append([indirectObject.getId(), indirectObject.getOffset()])
sortedIdsOffsets = sorted(sortedIdsOffsets, key=lambda x: x[1])
for i in range(len(sortedIdsOffsets)):
Expand Down Expand Up @@ -4260,7 +4262,7 @@ def updateObjects(self):
else:
return (-1, errorMessage)
elementsToUpdate = object.getReferencesInElements()
keys = elementsToUpdate.keys()
keys = list(elementsToUpdate.keys())
for key in keys:
ref = elementsToUpdate[key]
refId = ref[0]
Expand Down Expand Up @@ -5783,7 +5785,7 @@ def getCatalogObject(self, version=None, indirect=False):
if version is None:
catalogObjects = []
catalogIds = self.getCatalogObjectId()
for i in xrange(len(catalogIds)):
for i in range(len(catalogIds)):
id = catalogIds[i]
if id is not None:
catalogObject = self.getObject(id, i, indirect)
Expand Down Expand Up @@ -5909,7 +5911,7 @@ def getInfoObject(self, version=None, indirect=False):
if version is None:
infoObjects = []
infoIds = self.getInfoObjectId()
for i in xrange(len(infoIds)):
for i in range(len(infoIds)):
id = infoIds[i]
if id is not None:
infoObject = self.getObject(id, i, indirect)
Expand Down Expand Up @@ -6035,7 +6037,7 @@ def getOffsets(self, version=None):
offsetsArray = []

if version is None:
versions = range(self.updates+1)
versions = list(range(self.updates+1))
else:
versions = [version]

Expand Down Expand Up @@ -6137,7 +6139,7 @@ def getReferencesTo(self, id, version=None):
if version is None:
for i in range(self.updates + 1):
indirectObjectsDict = self.body[i].getObjects()
for indirectObject in indirectObjectsDict.values():
for indirectObject in list(indirectObjectsDict.values()):
if indirectObject is not None:
object = indirectObject.getObject()
if object is not None:
Expand All @@ -6148,7 +6150,7 @@ def getReferencesTo(self, id, version=None):
if version > self.updates or version < 0:
return None
indirectObjectsDict = self.body[version].getObjects()
for indirectObject in indirectObjectsDict.values():
for indirectObject in list(indirectObjectsDict.values()):
if indirectObject is not None:
object = indirectObject.getObject()
if object is not None:
Expand Down Expand Up @@ -6309,7 +6311,7 @@ def getTree(self, version=None):
tree = []

if version is None:
versions = range(self.updates+1)
versions = list(range(self.updates+1))
else:
versions = [version]

Expand Down Expand Up @@ -6346,7 +6348,7 @@ def getTree(self, version=None):
type = dictType
else:
if type == 'dictionary' and len(elements) == 1:
type = elements.keys()[0]
type = list(elements.keys())[0]
references = self.getReferencesIn(id, version)
for i in range(len(references)):
referencesIds.append(int(references[i].split()[0]))
Expand Down Expand Up @@ -6901,10 +6903,10 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
file = open(fileName, 'rb')
for line in file:
if versionLine == '':
pdfHeaderIndex = line.find('%PDF-')
psHeaderIndex = line.find('%!PS-Adobe-')
pdfHeaderIndex = line.find(b'%PDF-')
psHeaderIndex = line.find(b'%!PS-Adobe-')
if pdfHeaderIndex != -1 or psHeaderIndex != -1:
index = line.find('\r')
index = line.find(b'\r')
if index != -1 and index+1 < len(line) and line[index+1] != '\n':
index += 1
versionLine = line[:index]
Expand All @@ -6926,9 +6928,9 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
file.close()

# Getting the specification version
versionLine = versionLine.replace('\r', '')
versionLine = versionLine.replace('\n', '')
matchVersion = re.findall('%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})', versionLine)
versionLine = versionLine.replace(b'\r', b'')
versionLine = versionLine.replace(b'\n', b'')
matchVersion = re.findall(b'%(PDF-|!PS-Adobe-\d{1,2}\.\d{1,2}\sPDF-)(\d{1,2}\.\d{1,2})', versionLine)
if matchVersion == []:
if forceMode:
pdfFile.setVersion(versionLine)
Expand Down Expand Up @@ -6968,22 +6970,27 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
pdfFile.setSHA256(hashlib.sha256(fileContent).hexdigest())

# Getting the number of updates in the file
while fileContent.find('%%EOF') != -1:
self.readUntilSymbol(fileContent, '%%EOF')
while fileContent.find(b'%%EOF') != -1:
self.charCounter = 0
self.readUntilSymbol(fileContent, b'%%EOF')

self.readUntilEndOfLine(fileContent)

self.fileParts.append(fileContent[:self.charCounter])
fileContent = fileContent[self.charCounter:]
self.charCounter = 0
if six.PY3:
fileContent = fileContent[self.charCounter + len(b'%%EOF'):]
else:
fileContent = fileContent[self.charCounter:]
else:
if self.fileParts == []:
errorMessage = '%%EOF not found'
errorMessage = b'%%EOF not found'
if forceMode:
pdfFile.addError(errorMessage)
self.fileParts.append(fileContent)
else:
sys.exit(errorMessage)
pdfFile.setUpdates(len(self.fileParts) - 1)

#raise Exception(ccc)
# Getting the body, cross reference table and trailer of each part of the file
for i in range(len(self.fileParts)):
bodyOffset = 0
Expand Down Expand Up @@ -7012,15 +7019,15 @@ def parse(self, fileName, forceMode=False, looseMode=False, manualAnalysis=False
if xrefContent is not None:
xrefOffset = bodyOffset + len(bodyContent)
trailerOffset = xrefOffset + len(xrefContent)
bodyContent = bodyContent.strip('\r\n')
xrefContent = xrefContent.strip('\r\n')
trailerContent = trailerContent.strip('\r\n')
bodyContent = bodyContent.strip(b'\r\n')
xrefContent = xrefContent.strip(b'\r\n')
trailerContent = trailerContent.strip(b'\r\n')
else:
if trailerContent is not None:
xrefOffset = -1
trailerOffset = bodyOffset + len(bodyContent)
bodyContent = bodyContent.strip('\r\n')
trailerContent = trailerContent.strip('\r\n')
bodyContent = bodyContent.strip(b'\r\n')
trailerContent = trailerContent.strip(b'\r\n')
else:
errorMessage = 'PDF sections not found'
if forceMode:
Expand Down Expand Up @@ -7183,16 +7190,16 @@ def parsePDFSections(self, content, forceMode=False, looseMode=False):
xrefContent = None
trailerContent = None

indexTrailer = content.find('trailer')
indexTrailer = content.find(b'trailer')
if indexTrailer != -1:
restContent = content[:indexTrailer]
auxTrailer = content[indexTrailer:]
indexEOF = auxTrailer.find('%%EOF')
indexEOF = auxTrailer.find(b'%%EOF')
if indexEOF == -1:
trailerContent = auxTrailer
else:
trailerContent = auxTrailer[:indexEOF+5]
indexXref = restContent.find('xref')
indexXref = restContent.find(b'xref')
if indexXref != -1:
bodyContent = restContent[:indexXref]
xrefContent = restContent[indexXref:]
Expand All @@ -7202,11 +7209,11 @@ def parsePDFSections(self, content, forceMode=False, looseMode=False):
pdfFile.addError('Xref section not found')
return [bodyContent, xrefContent, trailerContent]

indexTrailer = content.find('startxref')
indexTrailer = content.find(b'startxref')
if indexTrailer != -1:
restContent = content[:indexTrailer]
auxTrailer = content[indexTrailer:]
indexEOF = auxTrailer.find('%%EOF')
indexEOF = auxTrailer.find(b'%%EOF')
if indexEOF == -1:
trailerContent = auxTrailer
else:
Expand Down Expand Up @@ -8130,9 +8137,13 @@ def readUntilSymbol(self, string, symbol):
@param symbol
@return A tuple (status,statusContent), where statusContent is the characters read in case status = 0 or an error in case status = -1
'''
if not isinstance(string, str):

if not isinstance(string, bytes):
return (-1, 'Bad string')

newString = string[self.charCounter:]

self.charCounter = 0
index = newString.find(symbol)
if index == -1:
errorMessage = 'Symbol "'+symbol+'" not found'
Expand Down
4 changes: 2 additions & 2 deletions peepdf/PDFCrypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
import warnings
import sys
import peepdf.aes
from itertools import cycle, izip
from itertools import cycle
warnings.filterwarnings("ignore")

paddingString = '\x28\xBF\x4E\x5E\x4E\x75\x8A\x41\x64\x00\x4E\x56\xFF\xFA\x01\x08\x2E\x2E\x00\xB6\xD0\x68\x3E\x80\x2F\x0C\xA9\xFE\x64\x53\x69\x7A'
Expand Down Expand Up @@ -337,4 +337,4 @@ def xor(bytes, key):
@return: The xored bytes
'''
key = cycle(key)
return ''.join(chr(ord(x) ^ ord(y)) for (x, y) in izip(bytes, key))
return ''.join(chr(ord(x) ^ ord(y)) for (x, y) in zip(bytes, key))
8 changes: 4 additions & 4 deletions peepdf/PDFFilters.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def pre_prediction(stream, predictor, columns, colors, bits):
# PNG prediction
if predictor >= 10 and predictor <= 15:
# PNG prediction can vary from row to row
for row in xrange(len(stream) / columns):
for row in range(len(stream) / columns):
rowdata = [ord(x) for x in stream[(row * columns):((row + 1) * columns)]]
filterByte = predictor - 10
rowdata = [filterByte] + rowdata
Expand Down Expand Up @@ -537,7 +537,7 @@ def post_prediction(decodedStream, predictor, columns, colors, bits):
numSamplesPerRow = columns + 1
bytesPerSample = (colors * bits + 7) / 8
upRowdata = (0,) * numSamplesPerRow
for row in xrange(numRows):
for row in range(numRows):
rowdata = [ord(x) for x in decodedStream[(row * bytesPerRow):((row + 1) * bytesPerRow)]]
# PNG prediction can vary from row to row
filterByte = rowdata[0]
Expand Down Expand Up @@ -787,12 +787,12 @@ def dctDecode(stream, parameters):
decodedStream = ''
try:
from PIL import Image
import StringIO
import io
except:
return (-1, 'Python Imaging Library (PIL) not installed')
# Quick implementation, assuming the library can detect the parameters
try:
im = Image.open(StringIO.StringIO(stream))
im = Image.open(io.StringIO(stream))
decodedStream = im.tostring()
return (0, decodedStream)
except:
Expand Down
Loading

0 comments on commit 8cc27b6

Please sign in to comment.