Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to Capstone release 4.0.2 with patch #1086

Merged
merged 6 commits into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmake/deps.cmake
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@

# URL is for Capstone release 4.0.2.
set(CAPSTONE_URL
"https://github.com/aquynh/capstone/archive/bc8a649b35188786754ea1b0bddd5cb48a039162.zip"
"https://github.com/capstone-engine/capstone/archive/1d230532840a37ac032c6ab80128238fc930c6c1.zip"
CACHE STRING "URL of Capstone archive to use."
)
set(CAPSTONE_ARCHIVE_SHA256
"7d3075bce1f5622279c16a6f62fe8c548d4544bfc82292f6bf43907d0317fd10"
"659097fcda59ce927937f73dd87a4606de6e768b352045a077ed8d2165b7e935"
CACHE STRING ""
)

Expand Down
11 changes: 9 additions & 2 deletions deps/capstone/patch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,16 @@ string(REPLACE
"${content}"
)

if("${new_content}" STREQUAL "${content}")
string(REPLACE
"static void printS16ImmOperand(MCInst *MI, unsigned OpNo, SStream *O)\n{\n\tif (MCOperand_isImm(MCInst_getOperand(MI, OpNo))) {\n\t\tunsigned short Imm = (unsigned short)MCOperand_getImm(MCInst_getOperand(MI, OpNo));\n if (Imm > HEX_THRESHOLD)\n SStream_concat(O, \"0x%x\", Imm);\n else\n SStream_concat(O, \"%u\", Imm);\n"
"static void printS16ImmOperand(MCInst *MI, unsigned OpNo, SStream *O)\n{\n\tif (MCOperand_isImm(MCInst_getOperand(MI, OpNo))) {\n// RetDec fix\n\t\tshort Imm = (short)MCOperand_getImm(MCInst_getOperand(MI, OpNo));\n\t\tSStream_concat(O, \"%d\", Imm);\n"
new_content2
"${new_content}"
)

if("${new_content2}" STREQUAL "${content}")
message(STATUS "-- Patching: ${full_path} skipped")
else()
message(STATUS "-- Patching: ${full_path} patched")
file(WRITE "${full_path}" "${new_content}")
file(WRITE "${full_path}" "${new_content2}")
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,34 @@ class ParamReturn : public llvm::ModulePass
// Modification of functions in IR.
//
private:
// Calls to dynamically-linked functions go through the procedure linkage
// table (PLT). RetDec turns a PLT entry into a function, say
// malloc@plt, that appears to do nothing but call the external function,
// say malloc (though the assembly code will do a jump rather than a
// call). User code that logically wants to call malloc instead calls
// malloc@plt (and sets up arguments as if calling malloc). The
// malloc@plt code first jumps to the dynamic linker which modifies it so
// that subsequent calls to malloc@plt will jump directly to malloc. We
// say that malloc@plt wraps malloc. The call to malloc in malloc@plt
// will not have any arguments setup, so malloc will appear to have
// no parameters or returns (unless that information is provided by
// link-time-information, debug information, or name demangling), but it
// needs to have the same parameter types and return type as
// malloc@plt. The propagateWrapped methods copy the argument information
// from the DataFlowEntry of the wrapping function to the wrapped
// function. Then, when the calls to the wrapping function are inlined
// (in connectWrappers), effectively the call to the wrapping function is
// changed into a call to the wrapped function.
//
// The motivation for this change is the programs that analyze the
// output of RetDec (either the C code, or the LLVM code) want to
// recognize library functions and treat them specially. This
// change makes it so that the library function names are used
// directly (rather than the plt version) and they are passed
// their parameters correctly.

void propagateWrapped();
void propagateWrapped(DataFlowEntry& de);
void applyToIr();
void applyToIr(DataFlowEntry& de);
void connectWrappers(const DataFlowEntry& de);
Expand Down
2 changes: 1 addition & 1 deletion include/retdec/common/range.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class InvalidRangeException : public std::exception

virtual const char* what() const noexcept override
{
return "Invalid Range: end is greater than start";
return "Invalid Range: start is greater than end";
}
};

Expand Down
57 changes: 55 additions & 2 deletions src/bin2llvmir/optimizations/param_return/param_return.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ bool ParamReturn::run()
collectAllCalls();
// dumpInfo();
filterCalls();
// dumpInfo();
propagateWrapped();
// dumpInfo();
applyToIr();

Expand Down Expand Up @@ -373,6 +375,7 @@ void ParamReturn::collectExtraData(DataFlowEntry* dataflow) const
//
if (CallInst* wrappedCall = getWrapper(fnc))
{
dataflow->setWrappedCall(wrappedCall);
auto* wf = wrappedCall->getCalledFunction();
auto* ltiFnc = _lti->getLlvmFunctionFree(wf->getName());
if (ltiFnc)
Expand All @@ -397,7 +400,6 @@ void ParamReturn::collectExtraData(DataFlowEntry* dataflow) const
dataflow->setVariadic();
}
dataflow->setRetType(ltiFnc->getReturnType());
dataflow->setWrappedCall(wrappedCall);

return;
}
Expand All @@ -407,7 +409,6 @@ void ParamReturn::collectExtraData(DataFlowEntry* dataflow) const
{
LOG << "wrapper: " << _demangler->demangleToString(wf->getName()) << std::endl;
modifyWithDemangledData(*dataflow, demFuncPair);
dataflow->setWrappedCall(wrappedCall);

return;
}
Expand Down Expand Up @@ -578,6 +579,7 @@ void ParamReturn::dumpInfo(const DataFlowEntry& de) const
auto wrappedCall = de.getWrappedCall();

LOG << "\n\t>|" << called->getName().str() << std::endl;
LOG << "\t>|&DataFlowEntry : " << &de << std::endl;
LOG << "\t>|fnc call : " << de.isFunction() << std::endl;
LOG << "\t>|val call : " << de.isValue() << std::endl;
LOG << "\t>|variadic : " << de.isVariadic() << std::endl;
Expand Down Expand Up @@ -921,6 +923,57 @@ void ParamReturn::modifyType(DataFlowEntry& de) const
de.setArgs(std::move(args));
}

void ParamReturn::propagateWrapped() {
for (auto& p : _fnc2calls)
{
propagateWrapped(p.second);
}
}

void ParamReturn::propagateWrapped(DataFlowEntry& de) {
auto* fnc = de.getFunction();
auto* wrappedCall = de.getWrappedCall();
if (fnc == nullptr || wrappedCall == nullptr)
{
return;
}

llvm::CallInst* wrappedCall2 = nullptr;
for (inst_iterator I = inst_begin(fnc), E = inst_end(fnc); I != E; ++I)
{
if (auto* c = dyn_cast<CallInst>(&*I))
{
auto* cf = c->getCalledFunction();
if (cf && !cf->isIntrinsic()) // && cf->isDeclaration())
{
wrappedCall2 = c;
break;
}
}
}

if (wrappedCall != wrappedCall2) {
// Something strange. Reset wrapped call and give up.
de.setWrappedCall(nullptr);
return;
}
auto* callee = wrappedCall->getCalledFunction();
auto fIt = _fnc2calls.find(callee);
assert (fIt != _fnc2calls.end());
DataFlowEntry& wrapDe = fIt->second;
// dumpInfo(de);
// dumpInfo(wrapDe);

if (!wrapDe.argTypes().empty()) {
// Types have already been supplied.
return;
}

wrapDe.setArgTypes(std::vector(de.argTypes()), std::vector(de.argNames()));
wrapDe.setRetType(de.getRetType());
// dumpInfo(wrapDe);
}

void ParamReturn::applyToIr()
{
for (auto& p : _fnc2calls)
Expand Down
14 changes: 1 addition & 13 deletions src/capstone2llvmir/powerpc/powerpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2145,19 +2145,7 @@ void Capstone2LlvmIrTranslatorPowerpc_impl::translateB(cs_insn* i, cs_ppc* pi, l
uint32_t crReg = PPC_REG_CR0;
ppc_bc crBc = pi->bc;

// TODO: Special handling because of Capstone bug:
// https://github.com/aquynh/capstone/issues/968
if (i->id == PPC_INS_BDZLA)
{
if (pi->op_count != 1
|| pi->operands[0].type != PPC_OP_IMM)
{
throw GenericError("unhandled PPC_INS_BDZLA format");
}

target = llvm::ConstantInt::get(getDefaultType(), pi->operands[0].imm - i->address);
}
else if (toLR)
if (toLR)
{
target = loadRegister(PPC_REG_LR, irb);

Expand Down
2 changes: 2 additions & 0 deletions src/capstone2llvmir/x86/x86_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2113,6 +2113,8 @@ Capstone2LlvmIrTranslatorX86_impl::_i2fm =
{X86_INS_VCMPGE_OQPD, nullptr},
{X86_INS_VCMPGT_OQPD, nullptr},
{X86_INS_VCMPTRUE_USPD, nullptr},
{X86_INS_ENDBR32, &Capstone2LlvmIrTranslatorX86_impl::translateNop},
{X86_INS_ENDBR64, &Capstone2LlvmIrTranslatorX86_impl::translateNop},

{X86_INS_ENDING, nullptr}, // mark the end of the list of insn
};
Expand Down
7 changes: 6 additions & 1 deletion src/loader/loader/pe/pe_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,12 @@ Segment* PeImage::addSingleSegment(std::uint64_t address, std::vector<std::uint8

bool PeImage::canAddSegment(std::uint64_t address, std::uint64_t memSize) const
{
retdec::common::Range<std::uint64_t> newSegRange(address, memSize ? address + memSize : address + 1);
std::uint64_t end = memSize ? address + memSize : address + 1;
// check for potential overflow - wrap around, memsize should be at most 32bit, so this could suffice
if (end < address)
end = std::numeric_limits<std::uint64_t>::max();

retdec::common::Range<std::uint64_t> newSegRange(address, end);
for (const auto& seg : getSegments())
{
auto overlapResult = OverlapResolver::resolve(retdec::common::Range<std::uint64_t>(seg->getAddress(), seg->getEndAddress()), newSegRange);
Expand Down