Skip to content

Commit

Permalink
Implement rebase blob support (#442)
Browse files Browse the repository at this point in the history
Co-authored-by: Florian Magin <[email protected]>
  • Loading branch information
fmagin and fmagin authored Nov 19, 2023
1 parent e2e2db1 commit 63ebfce
Show file tree
Hide file tree
Showing 5 changed files with 627 additions and 16 deletions.
111 changes: 104 additions & 7 deletions cle/backends/macho/binding.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

import logging
import struct
from typing import TYPE_CHECKING, Callable, Dict, Tuple
from typing import TYPE_CHECKING, Callable, Dict, Optional, Tuple

from cle.address_translator import AT
from cle.backends.relocation import Relocation
from cle.errors import CLEInvalidBinaryError

from .macho_enums import RebaseOpcode, RebaseType
from .symbol import AbstractMachOSymbol, BindingSymbol, DyldBoundSymbol, SymbolTableSymbol

if TYPE_CHECKING:
Expand Down Expand Up @@ -201,6 +202,103 @@ def do_lazy_bind(self, blob):

log.debug("Done binding lazy symbols")

def do_rebases(self, blob: bytes):
"""
Handles the rebase blob
Implementation based closely on ImageLoaderMachOCompressed::rebase from dyld
https://github.com/apple-opensource/dyld/blob/e3f88907bebb8421f50f0943595f6874de70ebe0/src/ImageLoaderMachOCompressed.cpp#L382-L463
:param blob:
:return:
"""
if blob is None:
return

# State variables
reloc_type: Optional[RebaseType] = None
done = False
segment = None
address = None
index = 0
end = len(blob)
while not done and index < end:
opcode, immediate = RebaseOpcode.parse_byte(blob[index])
index += 1

if opcode == RebaseOpcode.DONE:
done = True

elif opcode == RebaseOpcode.SET_TYPE_IMM:
reloc_type = RebaseType(immediate)

elif opcode == RebaseOpcode.SET_SEGMENT_AND_OFFSET_ULEB:
segment = self.binary.segments[immediate]
offset, index = self.read_uleb(blob, index)
address = segment.vaddr + offset

elif opcode == RebaseOpcode.ADD_ADDR_ULEB:
uleb, index = self.read_uleb(blob, index)
address += uleb

elif opcode == RebaseOpcode.ADD_ADDR_IMM_SCALED:
address += immediate * self.binary.arch.bytes

elif opcode == RebaseOpcode.DO_REBASE_IMM_TIMES:
for _ in range(immediate):
self.rebase_at(address, reloc_type)
address += self.binary.arch.bytes

elif opcode == RebaseOpcode.DO_REBASE_ULEB_TIMES:
count, index = self.read_uleb(blob, index)
for _ in range(count):
if address >= segment.vaddr + segment.memsize:
raise CLEInvalidBinaryError()
self.rebase_at(address, reloc_type)
address += self.binary.arch.bytes

elif opcode == RebaseOpcode.DO_REBASE_ADD_ADDR_ULEB:
self.rebase_at(address, reloc_type)
uleb, index = self.read_uleb(blob, index)
address += uleb + self.binary.arch.bytes

elif opcode == RebaseOpcode.DO_REBASE_ULEB_TIMES_SKIPPING_ULEB:
count, index = self.read_uleb(blob, index)
skip, index = self.read_uleb(blob, index)
for _ in range(count):
if address >= segment.vaddr + segment.memsize:
raise CLEInvalidBinaryError()
self.rebase_at(address, reloc_type)
address += skip + self.binary.arch.bytes

else:
raise CLEInvalidBinaryError("Invalid opcode for current binding: %#x" % opcode)

@staticmethod
def read_uleb(blob, offset) -> Tuple[int, int]:
"""
little helper to read ulebs, that also returns the new index
:param blob:
:param offset:
:return:
"""
uleb, length = read_uleb(blob, offset)
return uleb, offset + length

def rebase_at(self, address: int, ty: RebaseType):
relative_rebase_location = AT.from_lva(address, self.binary).to_rva()
unslid_pointer = self.binary.memory.unpack_word(relative_rebase_location)
relative_pointer = AT.from_lva(unslid_pointer, self.binary).to_rva()

if ty == RebaseType.POINTER:
reloc = MachOPointerRelocation(self.binary, relative_rebase_location, relative_pointer)
elif ty == RebaseType.TEXT_ABSOLUTE32:
reloc = MachOPointerRelocation(self.binary, relative_rebase_location, relative_pointer)
elif ty == RebaseType.TEXT_PCREL32:
raise NotImplementedError()
else:
raise ValueError("Invalid rebase type: %#x" % ty)
self.binary.relocs.append(reloc)

def _do_bind_generic(
self,
blob,
Expand Down Expand Up @@ -387,7 +485,7 @@ def n_opcode_do_bind_uleb_times_skipping_uleb(s: BindingState, b: "MachO", _i: i
return s


class MachORelocation(Relocation):
class MachOSymbolRelocation(Relocation):
"""
Generic Relocation for MachO. It handles relocations that point to symbols
"""
Expand Down Expand Up @@ -423,11 +521,10 @@ def __repr__(self):
return f"<MachO Reloc for {self.symbol} at {hex(self.relative_addr)}>"


class MachOChainedFixup(Relocation):
class MachOPointerRelocation(Relocation):
"""
A special kind of relocation that handles internal pointers in the binary.
This was introduced with iOS15+ and is somewhat explained here
https://github.com/datatheorem/strongarm/blob/release/chained_fixup_pointers.md
A relocation for a pointer without any associated symbol
These are either generated while handling the rebase blob, or while parsing chained fixups
"""

def __init__(self, owner: "MachO", relative_addr: int, data):
Expand Down Expand Up @@ -487,7 +584,7 @@ def default_binding_handler(state: BindingState, binary: "MachO"):
log.debug("Updating address %#x with symobl %r @ %#x", location, state.sym_name, value)
addr = AT.from_lva(location, binary).to_rva()
data = struct.pack(binary.struct_byteorder + ("Q" if binary.arch.bits == 64 else "I"), value)
reloc = MachORelocation(binary, symbol, addr, data)
reloc = MachOSymbolRelocation(binary, symbol, addr, data)
binary.relocs.append(reloc)
symbol.bind_xrefs.append(location)
elif state.binding_type == 2: # ABSOLUTE32
Expand Down
7 changes: 4 additions & 3 deletions cle/backends/macho/macho.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from sortedcontainers import SortedKeyList

from cle.backends.backend import AT, Backend, register_backend
from cle.backends.macho.binding import BindingHelper, MachOChainedFixup, MachORelocation, read_uleb
from cle.backends.macho.binding import BindingHelper, MachOPointerRelocation, MachOSymbolRelocation, read_uleb
from cle.backends.regions import Regions
from cle.errors import CLECompatibilityError, CLEInvalidBinaryError, CLEOperationError

Expand Down Expand Up @@ -458,6 +458,7 @@ def do_binding(self):
bh = BindingHelper(self) # TODO: Make this configurable
bh.do_normal_bind(self.binding_blob)
bh.do_lazy_bind(self.lazy_binding_blob)
bh.do_rebases(self.rebase_blob)
if self.weak_binding_blob is not None and len(self.weak_binding_blob) > 0:
log.info(
"Found weak binding blob. According to current state of knowledge, weak binding "
Expand Down Expand Up @@ -990,15 +991,15 @@ def _parse_dyld_chained_fixups(self):
if bind is not None:
libOrdinal, _addend = bind
import_symbol = self._dyld_imports[libOrdinal]
reloc = MachORelocation(self, import_symbol, current_chain_addr, None)
reloc = MachOSymbolRelocation(self, import_symbol, current_chain_addr, None)
self.relocs.append(reloc)
# Legacy Code uses bind_xrefs, explicitly add this to make this compatible for now
import_symbol.bind_xrefs.append(reloc.dest_addr + self.linked_base)
log.debug("Binding for %s found at %x", import_symbol, current_chain_addr)
elif rebase is not None:
target = self.linked_base + rebase
location: MemoryPointer = self.linked_base + current_chain_addr
anon_reloc = MachOChainedFixup(owner=self, relative_addr=current_chain_addr, data=rebase)
anon_reloc = MachOPointerRelocation(owner=self, relative_addr=current_chain_addr, data=rebase)
self.relocs.append(anon_reloc)
log.debug("Rebase to %x found at %x", target, location)

Expand Down
49 changes: 49 additions & 0 deletions cle/backends/macho/macho_enums.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from enum import IntEnum
from typing import Tuple


class LoadCommands(IntEnum):
Expand Down Expand Up @@ -164,3 +165,51 @@ class MH_flags(IntEnum):
# Only for use on dylibs.
# When this bit is set, the dylib is part of the dyld shared cache, rather than loose in the filesystem
MH_DYLIB_IN_CACHE = 0x80000000


class RebaseType(IntEnum):
"""
from mach-o/loader.h
"""

POINTER = 1
TEXT_ABSOLUTE32 = 2
TEXT_PCREL32 = 3


class RebaseOpcode(IntEnum):
"""
from mach-o/loader.h
#define REBASE_OPCODE_MASK 0xF0
#define REBASE_IMMEDIATE_MASK 0x0F
#define REBASE_OPCODE_DONE 0x00
#define REBASE_OPCODE_SET_TYPE_IMM 0x10
#define REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB 0x20
#define REBASE_OPCODE_ADD_ADDR_ULEB 0x30
#define REBASE_OPCODE_ADD_ADDR_IMM_SCALED 0x40
#define REBASE_OPCODE_DO_REBASE_IMM_TIMES 0x50
#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES 0x60
#define REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB 0x70
#define REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB 0x80
"""

DONE = 0x00
SET_TYPE_IMM = 0x10
SET_SEGMENT_AND_OFFSET_ULEB = 0x20
ADD_ADDR_ULEB = 0x30
ADD_ADDR_IMM_SCALED = 0x40
DO_REBASE_IMM_TIMES = 0x50
DO_REBASE_ULEB_TIMES = 0x60
DO_REBASE_ADD_ADDR_ULEB = 0x70
DO_REBASE_ULEB_TIMES_SKIPPING_ULEB = 0x80

@staticmethod
def parse_byte(byte: int) -> Tuple["RebaseOpcode", int]:
"""
Split a byte into the RebaseOpcode and the immediate value
:param byte:
:return:
"""
assert 0 <= byte <= 255
return RebaseOpcode(byte & 0xF0), byte & 0x0F
6 changes: 3 additions & 3 deletions tests/test_macho_dyld.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import cle
from cle import MachO
from cle.backends.macho.binding import MachOChainedFixup, MachORelocation
from cle.backends.macho.binding import MachOPointerRelocation, MachOSymbolRelocation

TEST_BASE = Path(__file__).resolve().parent.parent.parent / "binaries"

Expand Down Expand Up @@ -39,7 +39,7 @@ def test_fixups():
0x10000C128: 0x1000079F0,
}

actual = {r.rebased_addr: r.value for r in binary.relocs if isinstance(r, MachOChainedFixup)}
actual = {r.rebased_addr: r.value for r in binary.relocs if isinstance(r, MachOPointerRelocation)}
assert actual == expected


Expand Down Expand Up @@ -147,7 +147,7 @@ def test_symbols():
result = [
(r.rebased_addr, r.resolvedby.name)
for r in binary.relocs
if isinstance(r, MachORelocation) # only relocs that deal with symbols
if isinstance(r, MachOSymbolRelocation) # only relocs that deal with symbols
]
assert expected == result

Expand Down
Loading

0 comments on commit 63ebfce

Please sign in to comment.