Capstone Disassembler Cheatsheet
Installation Instructions
# Using vcpkg
vcpkg install capstone
# Using Chocolatey
choco install capstone
# Debian/Ubuntu
sudo apt-get install libcapstone-dev python3-capstone
# RHEL/CentOS
sudo yum install capstone-devel python3-capstone
# Arch Linux
sudo pacman -S capstone python-capstone
brew install capstone
pip3 install capstone
Initialize Capstone:
from capstone import *
md = Cs (CS_ARCH_X86 , CS_MODE_64 )
Basic disassembly:
CODE = b"\x55 \x48 \x8b \x05 \xb8 \x13 \x00 \x00 "
for i in md .disasm (CODE , 0x1000 ):
print (f"0x{ i .address :x} :\t { i .mnemonic } \t { i .op_str } " )
ARM disassembly:
md = Cs (CS_ARCH_ARM , CS_MODE_ARM )
MIPS disassembly:
md = Cs (CS_ARCH_MIPS , CS_MODE_MIPS32 )
PowerPC disassembly:
md = Cs (CS_ARCH_PPC , CS_MODE_32 )
32-bit mode:
md = Cs (CS_ARCH_X86 , CS_MODE_32 )
ARM thumb mode:
md = Cs (CS_ARCH_ARM , CS_MODE_THUMB )
Big endian mode:
md = Cs (CS_ARCH_ARM , CS_MODE_BIG_ENDIAN )
Get instruction details:
for i in md .disasm (CODE , 0x1000 , count = 1 ):
print (f"ID: { i .id } " )
print (f"Size: { i .size } " )
print (f"Bytes: { i .bytes } " )
Access operands:
for i in md .disasm (CODE , 0x1000 ):
for op in i .operands :
print (f"Operand: { op .type } " )
Skip data:
md .skipdata = True
md .skipdata_setup = ("db" , None , None )
Set syntax:
md .syntax = CS_OPT_SYNTAX_ATT # AT&T syntax
Enable detail mode:
Analyze memory references:
for i in md .disasm (CODE , 0x1000 ):
if i .op_str .find ('[' ) != - 1 :
print (f"Memory reference: { i .op_str } " )
Get memory operands:
for i in md .disasm (CODE , 0x1000 ):
if len (i .operands ) > 0 :
for op in i .operands :
if op .type == CS_OP_MEM :
print (f"Base: { op .mem .base } " )
print (f"Index: { op .mem .index } " )
print (f"Scale: { op .mem .scale } " )
Get register name:
reg_name = md .reg_name (reg_id )
Get register groups:
for i in md .disasm (CODE , 0x1000 ):
if len (i .regs_read ) > 0 :
print ("Registers read:" , [md .reg_name (x ) for x in i .regs_read ])
Check for errors:
if md .errno != CS_ERR_OK :
print (f"Error: { md .errno } " )
Error handling wrapper:
try :
for i in md .disasm (CODE , 0x1000 ):
print (f"{ i .mnemonic } { i .op_str } " )
except CsError as e :
print (f"Error: { e } " )
Get instruction groups:
for i in md .disasm (CODE , 0x1000 ):
if len (i .groups ) > 0 :
print ("Groups:" , i .groups )
Set option:
md .set_option (CS_OPT_SYNTAX , CS_OPT_SYNTAX_INTEL )
Get option:
syntax = md .get_option (CS_OPT_SYNTAX )
Analyze function:
def analyze_function (code , address ):
for i in md .disasm (code , address ):
if i .group (CS_GRP_CALL ):
print (f"Call at 0x{ i .address :x} " )
Find specific instructions:
def find_instruction (code , mnemonic ):
return [i for i in md .disasm (code , 0 ) if i .mnemonic == mnemonic ]
Identify jumps:
def find_jumps (code , address ):
jumps = []
for i in md .disasm (code , address ):
if i .group (CS_GRP_JUMP ):
jumps .append (i )
return jumps
With Binary Ninja:
def capstone_to_binja (instruction ):
return {
'address' : instruction .address ,
'size' : instruction .size ,
'mnemonic' : instruction .mnemonic ,
'op_str' : instruction .op_str
}
With IDA Pro:
def ida_to_capstone (ea , size ):
bytes = get_bytes (ea , size )
return next (md .disasm (bytes , ea ))
Batch processing:
def process_batch (code , batch_size = 1000 ):
for i in range (0 , len (code ), batch_size ):
batch = code [i :i + batch_size ]
for insn in md .disasm (batch , 0x1000 + i ):
yield insn
Iterator usage:
iterator = md .disasm_lite (CODE , 0x1000 )
Custom output format:
def format_instruction (insn ):
return f"{ insn .address :08x} : { insn .mnemonic :8} { insn .op_str } "
Print all details:
def print_instruction_details (insn ):
print (f"Address: 0x{ insn .address :x} " )
print (f"Mnemonic: { insn .mnemonic } " )
print (f"Op str: { insn .op_str } " )
print (f"Size: { insn .size } " )
print (f"Bytes: { insn .bytes .hex ()} " )
Analyze binary file:
def analyze_file (filename ):
with open (filename , 'rb' ) as f :
code = f .read ()
for i in md .disasm (code , 0x0 ):
print (f"0x{ i .address :x} :\t { i .mnemonic } \t { i .op_str } " )
Instruction statistics:
def get_statistics (code ):
stats = {}
for i in md .disasm (code , 0 ):
stats [i .mnemonic ] = stats .get (i .mnemonic , 0 ) + 1
return stats
Function boundary detection:
def find_function_end (code , start ):
for i in md .disasm (code [start :], 0 ):
if i .mnemonic == 'ret' :
return start + i .address
Cross references:
def find_xrefs (code , target ):
xrefs = []
for i in md .disasm (code , 0 ):
if i .op_str .find (hex (target )) != - 1 :
xrefs .append (i .address )
return xrefs
Check for privileged instructions:
def check_privileged (code ):
privileged = []
for i in md .disasm (code , 0 ):
if i .group (CS_GRP_PRIVILEGE ):
privileged .append (i )
return privileged
Find system calls:
def find_syscalls (code ):
return [i for i in md .disasm (code , 0 ) if i .mnemonic == 'syscall' ]
Track stack operations:
def analyze_stack (code ):
stack_delta = 0
for i in md .disasm (code , 0 ):
if i .mnemonic == 'push' :
stack_delta -= 8
elif i .mnemonic == 'pop' :
stack_delta += 8
return stack_delta
Find instruction patterns:
def find_pattern (code , pattern ):
results = []
for i in md .disasm (code , 0 ):
if all (getattr (i , attr ) == value for attr , value in pattern .items ()):
results .append (i )
return results
Match instruction sequence:
def match_sequence (code , sequence ):
matches = []
buffer = []
for i in md .disasm (code , 0 ):
buffer .append (i )
if len (buffer ) == len (sequence ):
if all (b .mnemonic == s for b , s in zip (buffer , sequence )):
matches .append (buffer [0 ].address )
buffer .pop (0 )
return matches
Export to JSON:
import json
def export_to_json (instructions ):
return json .dumps ([{
'address' : i .address ,
'mnemonic' : i .mnemonic ,
'op_str' : i .op_str
} for i in instructions ])