Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Computation of section entropies for ELF, MACHO and PE (in progress) #501

Closed
wants to merge 39 commits into from
Closed
Changes from 1 commit
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
d4f448f
VB header parsing
pruzko Nov 16, 2018
e486337
project info header parsing I.
pruzko Nov 16, 2018
25cd4b9
fileinfo wrapper
pruzko Dec 12, 2018
0edf72e
fileinfo wrapper
pruzko Dec 12, 2018
8de01b1
string utils
pruzko Dec 12, 2018
fe1b65d
etern table plain presentation
pruzko Dec 13, 2018
68160c4
Visual Basic Objects presentation
pruzko Dec 13, 2018
0e73efe
some more presentation
pruzko Dec 18, 2018
2cff3fe
presentation of language and project related stuff
pruzko Jan 31, 2019
ff31384
extern and object table hashes
pruzko Jan 31, 2019
a533784
obj table hash presentation
pruzko Feb 1, 2019
ce2096a
doxygen fix
pruzko Feb 1, 2019
da87bfe
basic json presentation
pruzko Feb 1, 2019
5c64274
extern and object tables json presentation
pruzko Feb 1, 2019
eda798f
COM Registration Data extraction and presentation + object Table GUID
pruzko Feb 4, 2019
72fd3b9
prefix cut for project path
pruzko Feb 5, 2019
7fc8f94
modification for clawler needs
pruzko Feb 5, 2019
e4b2049
addresses fixed
pruzko Feb 8, 2019
c7138cb
export table extraction fixed
pruzko Feb 9, 2019
70431d8
COM Registration Info parsed
pruzko Feb 9, 2019
338de0e
COM Registration Info plain presentation
pruzko Feb 9, 2019
6b3e171
COM Registration Info json presentation
pruzko Feb 9, 2019
f748b61
modification for crawler only
pruzko Feb 11, 2019
6b54ce6
ascii parsing fixed and VB presentation fixed
pruzko Feb 11, 2019
aa53bc5
relicts removed
pruzko Feb 11, 2019
3121e8c
empty object table presentation fixed
pruzko Feb 12, 2019
26df133
entropy function
pruzko Feb 14, 2019
27a494c
elf sections
pruzko Feb 14, 2019
771ae2a
PE and fileformat
pruzko Feb 14, 2019
c7e5c34
more PE and macho
pruzko Feb 14, 2019
551fb3c
fileinfo and presentation
pruzko Feb 14, 2019
0a7d70b
undefined entropy fix
pruzko Feb 15, 2019
48e3078
section parsing bug fixed
pruzko Feb 15, 2019
8445769
section for coff and other formats added
pruzko Feb 15, 2019
89529ed
overlay entropy
pruzko Feb 16, 2019
20a8e7a
overlay entropy fix'd
pruzko Feb 16, 2019
2c340a8
rebased and fixed based on PR comments
pruzko Feb 21, 2019
b8dfaeb
reinterpret cast added
pruzko Feb 21, 2019
70d21b4
header for array added
pruzko Feb 21, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
addresses fixed
pruzko committed Feb 21, 2019
commit e4b20491fb9692edb1c72874822a4e3171caf2a5
12 changes: 5 additions & 7 deletions include/retdec/fileformat/file_format/pe/pe_format.h
Original file line number Diff line number Diff line change
@@ -98,13 +98,11 @@ class PeFormat : public FileFormat
/// @}
/// @name Visual Basic methods
/// @{
bool parseVisualBasicProjectInfo(std::size_t structureOffset, std::size_t baseAddress);
bool parseVisualBasicExternTable(std::size_t structureOffset, std::size_t baseAddress,
std::size_t nEntries);
bool parseVisualBasicObjectTable(std::size_t structureOffset, std::size_t baseAddress);
bool parseVisualBasicObjects(std::size_t structureOffset, std::size_t baseAddress,
std::size_t nObjects);
bool parseVisualBasicComRegistrationData(std::size_t structureOffset, std::size_t baseAddress);
bool parseVisualBasicProjectInfo(std::size_t structureOffset);
bool parseVisualBasicExternTable(std::size_t structureOffset, std::size_t nEntries);
bool parseVisualBasicObjectTable(std::size_t structureOffset);
bool parseVisualBasicObjects(std::size_t structureOffset, std::size_t nObjects);
bool parseVisualBasicComRegistrationData(std::size_t structureOffset);
/// @}
protected:
PeLib::PeFile *file; ///< PeLib representation of PE file
16 changes: 14 additions & 2 deletions src/fileformat/file_format/file_format.cpp
Original file line number Diff line number Diff line change
@@ -1121,7 +1121,13 @@ bool FileFormat::getOffsetFromAddress(unsigned long long &result, unsigned long
return false;
}

result = secSeg->getOffset() + (address - secSeg->getAddress());
auto secSegAddr = secSeg->getAddress();
if (secSegAddr > address)
{
return false;
}

result = secSeg->getOffset() + (address - secSegAddr);
return true;
}

@@ -1141,7 +1147,13 @@ bool FileFormat::getAddressFromOffset(unsigned long long &result, unsigned long
return false;
}

result = secSeg->getAddress() + (offset - secSeg->getOffset());
auto secSegOffset = secSeg->getOffset();
if (secSegOffset > offset)
{
return false;
}

result = secSeg->getAddress() + (offset - secSegOffset);
return true;
}

107 changes: 42 additions & 65 deletions src/fileformat/file_format/pe/pe_format.cpp
Original file line number Diff line number Diff line change
@@ -633,11 +633,10 @@ void PeFormat::loadVisualBasicHeader()
auto allBytes = getBytes();
std::vector<std::uint8_t> bytes;
unsigned long long version = 0;
unsigned long long baseAddress = 0;
std::size_t vbHeaderAddress = 0;
std::size_t vbHeaderOffset = 0;
std::size_t vbProjectInfoOffset = 0;
std::size_t vbComDataRegistrationOffset = 0;
unsigned long long vbHeaderAddress = 0;
unsigned long long vbHeaderOffset = 0;
unsigned long long vbProjectInfoOffset = 0;
unsigned long long vbComDataRegistrationOffset = 0;
std::string projLanguageDLL;
std::string projBackupLanguageDLL;
std::string projExeName;
@@ -658,20 +657,12 @@ void PeFormat::loadVisualBasicHeader()
return;
}

if (!getImageBaseAddress(baseAddress))
{
return;
}

vbHeaderAddress = bytes[4] << 24 | bytes[3] << 16 | bytes[2] << 8 | bytes[1];

if (vbHeaderAddress < baseAddress)
if (!getOffsetFromAddress(vbHeaderOffset, vbHeaderAddress))
{
return;
}

vbHeaderOffset = vbHeaderAddress - baseAddress;

if (!getBytes(bytes, vbHeaderOffset, vbh.headerSize()) || bytes.size() != vbh.headerSize())
{
return;
@@ -725,8 +716,6 @@ void PeFormat::loadVisualBasicHeader()
vbh.projNameOffset = byteSwap32(vbh.projNameOffset);
}

// TODO check VB header magic

if (vbh.projExeNameOffset != 0)
{
projExeName = retdec::utils::readNullTerminatedAscii(allBytes.data(), allBytes.size(),
@@ -765,26 +754,23 @@ void PeFormat::loadVisualBasicHeader()
visualBasicInfo.setLanguageDLLPrimaryLCID(vbh.LCID1);
visualBasicInfo.setLanguageDLLSecondaryLCID(vbh.LCID2);

if (vbh.projectInfoAddr >= baseAddress)
if (getOffsetFromAddress(vbProjectInfoOffset, vbh.projectInfoAddr))
{
vbProjectInfoOffset = vbh.projectInfoAddr - baseAddress;
parseVisualBasicProjectInfo(vbProjectInfoOffset, baseAddress);
parseVisualBasicProjectInfo(vbProjectInfoOffset);
}

if (vbh.COMRegisterDataAddr >= baseAddress)
if (getOffsetFromAddress(vbComDataRegistrationOffset, vbh.COMRegisterDataAddr))
{
vbComDataRegistrationOffset = vbh.COMRegisterDataAddr - baseAddress;
parseVisualBasicComRegistrationData(vbComDataRegistrationOffset, baseAddress);
parseVisualBasicComRegistrationData(vbComDataRegistrationOffset);
}
}

/**
* Parse visual basic COM registration data
* @param structureOffset Offset in file where the structure starts
* @param baseAddress Base address
* @return @c true if COM retistration data was successfuly parsed, @c false otherwise
*/
bool PeFormat::parseVisualBasicComRegistrationData(std::size_t structureOffset, std::size_t baseAddress)
bool PeFormat::parseVisualBasicComRegistrationData(std::size_t structureOffset)
{
auto allBytes = getBytes();
std::vector<std::uint8_t> bytes;
@@ -851,14 +837,13 @@ bool PeFormat::parseVisualBasicComRegistrationData(std::size_t structureOffset,
/**
* Parse visual basic project info
* @param structureOffset Offset in file where the structure starts
* @param baseAddress Base address
* @return @c true if project info was successfuly parsed, @c false otherwise
*/
bool PeFormat::parseVisualBasicProjectInfo(std::size_t structureOffset, std::size_t baseAddress)
bool PeFormat::parseVisualBasicProjectInfo(std::size_t structureOffset)
{
std::vector<std::uint8_t> bytes;
std::size_t vbExternTableOffset = 0;
std::size_t vbObjectTableOffset = 0;
unsigned long long vbExternTableOffset = 0;
unsigned long long vbObjectTableOffset = 0;
std::string projPath;
std::size_t offset = 0;
struct VBProjInfo vbpi;
@@ -900,16 +885,14 @@ bool PeFormat::parseVisualBasicProjectInfo(std::size_t structureOffset, std::siz
visualBasicInfo.setProjectPath(projPath);
visualBasicInfo.setPcode(vbpi.nativeCodeAddr == 0);

if (vbpi.externalTableAddr >= baseAddress)
if (getOffsetFromAddress(vbExternTableOffset, vbpi.externalTableAddr))
{
vbExternTableOffset = vbpi.externalTableAddr - baseAddress;
parseVisualBasicExternTable(vbExternTableOffset, baseAddress, vbpi.nExternals);
parseVisualBasicExternTable(vbExternTableOffset, vbpi.nExternals);
}

if (vbpi.objectTableAddr >= baseAddress)
if (getOffsetFromAddress(vbObjectTableOffset, vbpi.objectTableAddr))
{
vbObjectTableOffset = vbpi.objectTableAddr - baseAddress;
parseVisualBasicObjectTable(vbObjectTableOffset, baseAddress);
parseVisualBasicObjectTable(vbObjectTableOffset);
}

return true;
@@ -918,18 +901,16 @@ bool PeFormat::parseVisualBasicProjectInfo(std::size_t structureOffset, std::siz
/**
* Parse visual basic extern table
* @param structureOffset Offset in file where the structure starts
* @param baseAddress Base address
* @param nEntries Number of entries in table
* @return @c true if extern table was successfuly parsed, @c false otherwise
*/
bool PeFormat::parseVisualBasicExternTable(std::size_t structureOffset, std::size_t baseAddress,
std::size_t nEntries)
bool PeFormat::parseVisualBasicExternTable(std::size_t structureOffset, std::size_t nEntries)
{
auto allBytes = getBytes();
std::vector<std::uint8_t> bytes;
struct VBExternTableEntry entry;
struct VBExternTableEntryData entryData;
std::size_t vbExternEntryDataOffset = 0;
unsigned long long vbExternEntryDataOffset = 0;
std::size_t offset = 0;

for (std::size_t i = 0; i < nEntries; i++)
@@ -953,13 +934,15 @@ bool PeFormat::parseVisualBasicExternTable(std::size_t structureOffset, std::siz
entry.importDataAddr = byteSwap32(entry.importDataAddr);
}

if (entry.type != static_cast<std::uint32_t>(VBExternTableEntryType::external)
|| entry.importDataAddr < baseAddress)
if (entry.type != static_cast<std::uint32_t>(VBExternTableEntryType::external))
{
continue;
}

vbExternEntryDataOffset = entry.importDataAddr - baseAddress;
if (!getOffsetFromAddress(vbExternEntryDataOffset, entry.importDataAddr))
{
continue;
}

if (!getBytes(bytes, vbExternEntryDataOffset, entryData.structureSize())
|| bytes.size() != entryData.structureSize())
@@ -977,15 +960,15 @@ bool PeFormat::parseVisualBasicExternTable(std::size_t structureOffset, std::siz
entryData.apiNameAddr = byteSwap32(entryData.apiNameAddr);
}

if (entryData.moduleNameAddr >= baseAddress)
unsigned long long moduleNameOffset;
if (!getOffsetFromAddress(moduleNameOffset, entryData.moduleNameAddr))
{
std::size_t moduleNameOffset = entryData.moduleNameAddr - baseAddress;
moduleName = retdec::utils::readNullTerminatedAscii(allBytes.data(), allBytes.size(), moduleNameOffset);
}

if (entryData.apiNameAddr >= baseAddress)
unsigned long long apiNameOffset;
if (!getOffsetFromAddress(apiNameOffset, entryData.apiNameAddr))
{
std::size_t apiNameOffset = entryData.apiNameAddr - baseAddress;
apiName = retdec::utils::readNullTerminatedAscii(allBytes.data(), allBytes.size(), apiNameOffset);
}

@@ -1003,16 +986,15 @@ bool PeFormat::parseVisualBasicExternTable(std::size_t structureOffset, std::siz
/**
* Parse visual basic object table
* @param structureOffset Offset in file where the structure starts
* @param baseAddress Base address
* @return @c true if object table was successfuly parsed, @c false otherwise
*/
bool PeFormat::parseVisualBasicObjectTable(std::size_t structureOffset, std::size_t baseAddress)
bool PeFormat::parseVisualBasicObjectTable(std::size_t structureOffset)
{
auto allBytes = getBytes();
std::vector<std::uint8_t> bytes;
std::size_t offset = 0;
std::size_t projectNameOffset = 0;
std::size_t objectDescriptorsOffset = 0;
unsigned long long projectNameOffset = 0;
unsigned long long objectDescriptorsOffset = 0;
struct VBObjectTable vbot;
std::string projName;

@@ -1069,17 +1051,15 @@ bool PeFormat::parseVisualBasicObjectTable(std::size_t structureOffset, std::siz
visualBasicInfo.setProjectSecondaryLCID(vbot.LCID2);
visualBasicInfo.setObjectTableGUID(vbot.objectGUID);

if (!visualBasicInfo.hasProjectName() && vbot.projectNameAddr >= baseAddress)
if (!visualBasicInfo.hasProjectName() && getOffsetFromAddress(projectNameOffset, vbot.projectNameAddr))
{
projectNameOffset = vbot.projectNameAddr - baseAddress;
projName = retdec::utils::readNullTerminatedAscii(allBytes.data(), allBytes.size(), projectNameOffset);
visualBasicInfo.setProjectName(projName);
}

if (vbot.objectDescriptorsAddr >= baseAddress)
if (getOffsetFromAddress(objectDescriptorsOffset, vbot.objectDescriptorsAddr))
{
objectDescriptorsOffset = vbot.objectDescriptorsAddr - baseAddress;
parseVisualBasicObjects(objectDescriptorsOffset, baseAddress, vbot.nObjects);
parseVisualBasicObjects(objectDescriptorsOffset, vbot.nObjects);
}

visualBasicInfo.computeObjectTableHashes();
@@ -1089,12 +1069,10 @@ bool PeFormat::parseVisualBasicObjectTable(std::size_t structureOffset, std::siz
/**
* Parse visual basic objects
* @param structureOffset Offset in file where the public object descriptors array starts
* @param baseAddress Base address
* @param nObjects Number of objects in array
* @return @c true if objects were successfuly parsed, @c false otherwise
*/
bool PeFormat::parseVisualBasicObjects(std::size_t structureOffset, std::size_t baseAddress,
std::size_t nObjects)
bool PeFormat::parseVisualBasicObjects(std::size_t structureOffset, std::size_t nObjects)
{
auto allBytes = getBytes();
std::vector<std::uint8_t> bytes;
@@ -1140,20 +1118,19 @@ bool PeFormat::parseVisualBasicObjects(std::size_t structureOffset, std::size_t
vbpod.null = byteSwap32(vbpod.null);
}

if (vbpod.objectNameAddr < baseAddress)
unsigned long long objectNameOffset;
if (!getOffsetFromAddress(objectNameOffset, vbpod.objectNameAddr))
{
continue;
}


std::size_t objectNameOffset = vbpod.objectNameAddr - baseAddress;
std::string objectName = readNullTerminatedAscii(allBytes.data(), allBytes.size(), objectNameOffset);
object = std::make_unique<VisualBasicObject>();
object->setName(objectName);

if (vbpod.methodNamesAddr >= baseAddress)
unsigned long long methodAddrOffset;
if (getOffsetFromAddress(methodAddrOffset, vbpod.methodNamesAddr))
{
std::size_t methodAddrOffset = vbpod.methodNamesAddr - baseAddress;
for (std::size_t mIdx = 0; mIdx < vbpod.nMethods; mIdx++)
{
if (!getBytes(bytes, methodAddrOffset + mIdx * sizeof(std::uint32_t), sizeof(std::uint32_t))
@@ -1162,19 +1139,19 @@ bool PeFormat::parseVisualBasicObjects(std::size_t structureOffset, std::size_t
break;
}

std::uint32_t methodNameAddr = *reinterpret_cast<std::uint32_t *>(bytes.data());
auto methodNameAddr = *reinterpret_cast<std::uint32_t *>(bytes.data());

if (!isLittleEndian())
{
methodNameAddr = byteSwap32(methodNameAddr);
}

if (methodNameAddr < baseAddress)
unsigned long long methodNameOffset;
if (!getOffsetFromAddress(methodNameOffset, methodNameAddr))
{
continue;
}

std::size_t methodNameOffset = methodNameAddr - baseAddress;
std::string methodName = readNullTerminatedAscii(allBytes.data(), allBytes.size(), methodNameOffset);
object->addMethod(methodName);
}