From 726d66869e9fa1088acdc0c7df4f6ea57ea7fa62 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Sat, 23 Nov 2024 22:16:22 -0800 Subject: [PATCH 1/5] CHB:ARM: save cfg info --- CodeHawk/CHB/bchanalyze/bCHFileIO.ml | 63 +++++++++++++++++++ CodeHawk/CHB/bchanalyze/bCHFileIO.mli | 6 +- CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml | 8 +++ .../CHB/bchlibarm32/bCHARMAssemblyFunction.ml | 2 + .../bchlibarm32/bCHARMAssemblyFunctions.ml | 8 ++- .../bchlibarm32/bCHARMAssemblyInstructions.ml | 10 +++ CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli | 1 + .../bchlibarm32/bCHConstructARMFunction.ml | 13 +++- 8 files changed, 107 insertions(+), 4 deletions(-) diff --git a/CodeHawk/CHB/bchanalyze/bCHFileIO.ml b/CodeHawk/CHB/bchanalyze/bCHFileIO.ml index 442201d1..5ec422d5 100644 --- a/CodeHawk/CHB/bchanalyze/bCHFileIO.ml +++ b/CodeHawk/CHB/bchanalyze/bCHFileIO.ml @@ -59,6 +59,7 @@ open BCHMIPSDictionary (* bchlibarm32 *) open BCHARMAssemblyInstructions open BCHARMDictionary +open BCHARMLoopStructure (* bchlibpower32 *) open BCHPowerAssemblyInstructions @@ -92,6 +93,7 @@ let get_bch_root (info:string):xml_element_int = end +(* applies to x86 only *) let save_functions_list () = let filename = get_functions_filename () in let doc = xmlDocument () in @@ -125,6 +127,67 @@ let save_functions_list () = end +let save_arm_functions_list () = + let filename = get_functions_filename () in + let doc = xmlDocument () in + let root = get_bch_root "functions" in + let ffNode = xmlElement "functions" in + let subnodes = ref [] in + begin + BCHARMAssemblyFunctions.arm_assembly_functions#itera (fun faddr f -> + let fNode = xmlElement "fn" in + let jtc = f#get_jumptable_count in + let (translation, lc, ld, ujc) = + try + begin + BCHTranslateARMToCHIF.translate_arm_assembly_function f; + record_arm_loop_levels faddr; + ("ok", + get_arm_loop_count_from_table f, + get_arm_loop_depth_from_table f, + (-1)) + end + with + | BCH_failure p -> + let finfo = BCHFunctionInfo.get_function_info faddr in + let ujc = finfo#get_unknown_jumps_count in + (CHPrettyUtil.pretty_to_string p, (-1), (-1), ujc) in + let set = fNode#setAttribute in + let seti = fNode#setIntAttribute in + let setx t x = set t x#to_hex_string in + begin + (if functions_data#has_function_name faddr then + let name = (functions_data#get_function faddr)#get_function_name in + let name = + if has_control_characters name then + "__xx__" ^ (hex_string name) + else + name in + set "name" name); + setx "va" faddr; + seti "ic" f#get_instruction_count; + seti "bc" f#get_block_count; + (if jtc > 0 then seti "jtc" jtc); + (if translation = "ok" then + begin + (if lc > 0 then seti "lc" lc); + (if ld > 0 then seti "ld" ld); + end + else + begin + set "tr" "x"; + seti "ujc" ujc + end); + subnodes := fNode :: !subnodes + end); + ffNode#appendChildren !subnodes; + doc#setNode root; + root#appendChildren [ffNode]; + file_output#saveFile filename doc#toPretty + end + + + let save_global_state () = let filename = get_global_state_filename () in let doc = xmlDocument () in diff --git a/CodeHawk/CHB/bchanalyze/bCHFileIO.mli b/CodeHawk/CHB/bchanalyze/bCHFileIO.mli index fa2c2939..2e4b867e 100644 --- a/CodeHawk/CHB/bchanalyze/bCHFileIO.mli +++ b/CodeHawk/CHB/bchanalyze/bCHFileIO.mli @@ -36,8 +36,12 @@ open BCHLibTypes (* bchlibx86 *) open BCHLibx86Types - +(** save function cfg info (x86 only) *) val save_functions_list: unit -> unit + +(** save function cfg info for arm functions *) +val save_arm_functions_list: unit -> unit + val save_global_state: unit -> unit val save_system_info: unit -> unit val save_resultmetrics: xml_element_int -> unit diff --git a/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml b/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml index 3177a79b..6018c7c8 100644 --- a/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml +++ b/CodeHawk/CHB/bchcmdline/bCHXBinaryAnalyzer.ml @@ -107,6 +107,7 @@ let export_directory = ref "" let savecfgs = ref false let save_xml = ref false (* save disassembly status in xml *) let save_asm = ref false +let save_asm_cfg_info = ref false (* save functions list with cfg info in xml *) let set_datablocks = ref false (* only supported for arm *) let construct_all_functions = ref false @@ -204,6 +205,8 @@ let speclist = "save disassembly status in xml for bulk evaluation"); ("-save_asm", Arg.Unit (fun () -> save_asm := true), "save assembly listing in the analysis directory"); + ("-save_asm_cfg_info", Arg.Unit (fun () -> save_asm_cfg_info := true), + "save list of functions with cfg info to xml file (may be slow)"); ("-construct_all_functions", Arg.Unit (fun () -> construct_all_functions := true), "construct all functions even if analyzing only a few of them"); @@ -588,6 +591,11 @@ let main () = (get_duplicate_coverage_filename ()) (STR (BCHARMAssemblyFunctions.arm_assembly_functions#duplicates_to_string)); pr_timing [STR "duplicates listing saved"]; + (if !save_asm_cfg_info then + begin + save_arm_functions_list (); + pr_timing [STR "function cfg info saved"] + end); save_system_info (); pr_timing [STR "system_info saved"]; save_arm_dictionary (); diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunction.ml b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunction.ml index 2d1deae2..048d78fb 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunction.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunction.ml @@ -106,6 +106,8 @@ object (self) method get_block_count = List.length blocks + method get_jumptable_count = List.length self#get_jumptables + method get_not_valid_instr_count = let c = ref 0 in let _ = diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunctions.ml b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunctions.ml index 0fc0d958..f3c17af6 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunctions.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyFunctions.ml @@ -915,6 +915,12 @@ let get_arm_disassembly_metrics () = let loaded_imports = [] in let imports = imported_imports @ loaded_imports in let numunknown = !arm_assembly_instructions#get_num_unknown_instructions in + let jumptables = + List.fold_left (fun acc (va, _) -> + if arm_assembly_functions#includes_instruction_address va then + acc + 1 + else + acc) 0 !arm_assembly_instructions#get_jumptables in { dm_unknown_instrs = numunknown; dm_instrs = instrs; dm_functions = arm_assembly_functions#get_num_functions; @@ -922,7 +928,7 @@ let get_arm_disassembly_metrics () = dm_pcoverage = 100.0 *. (float_of_int coverage) /. (float_of_int instrs) ; dm_overlap = overlap; dm_alloverlap = alloverlap; - dm_jumptables = List.length system_info#get_jumptables; + dm_jumptables = jumptables; dm_datablocks = List.length system_info#get_data_blocks; dm_imports = imports; dm_so_imports = system_info#dmso_metrics; diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml index 5244fefb..b9eb1944 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMAssemblyInstructions.ml @@ -37,6 +37,7 @@ open CHXmlDocument (* bchlib *) open BCHBasicTypes open BCHByteUtilities +open BCHConstantDefinitions open BCHDataBlock open BCHDoubleword open BCHFunctionData @@ -702,6 +703,15 @@ object (self) ^ " Faddr:<" ^ v#to_hex_string ^ ">" + else if has_symbolic_address_name v then + let name = get_symbolic_address_name v in + " " + ^ (fixed_length_string addr 10) + ^ " Sym:<" + ^ v#to_hex_string + ^ ":" + ^ name + ^ ">" else if elf_header#is_code_address v then " " ^ (fixed_length_string addr 10) diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli b/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli index 75670906..9632a778 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli @@ -1813,6 +1813,7 @@ class type arm_assembly_function_int = method get_function_md5: string method get_instruction_count: int method get_block_count: int + method get_jumptable_count: int method get_not_valid_instr_count: int method get_true_conditional_return: arm_assembly_block_int option diff --git a/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml b/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml index 09d97408..417cc1cb 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml @@ -254,13 +254,22 @@ let get_successors if !arm_assembly_instructions#is_code_address addr then [addr] else - []) + let floc = get_floc_by_address faddr instr#get_address in + begin + floc#f#set_unknown_jumptarget instr#get_address#to_hexstring; + [] + end) [] (numerical_to_doubleword tgt) | _ -> []) (* no information available, give up *) - | Branch _ | BranchExchange _ -> [] + | Branch _ | BranchExchange _ -> + let floc = get_floc_by_address faddr instr#get_address in + begin + floc#f#set_unknown_jumptarget instr#get_address#to_hex_string; + [] + end | _ -> next () in From 52487826548177befd6570887c84fb1a4e9e34b1 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 25 Nov 2024 17:11:18 -0800 Subject: [PATCH 2/5] CHB:ARM: add aggregate for BX indirect call --- .../bchlibarm32/bCHARMInstructionAggregate.ml | 70 ++++++++++++++++++- CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli | 2 + .../bchlibarm32/bCHConstructARMFunction.ml | 3 +- CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml | 3 +- .../CHB/bchlibarm32/bCHFnARMDictionary.ml | 28 +++++++- 5 files changed, 102 insertions(+), 4 deletions(-) diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml b/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml index 69b054a5..6ba8ea68 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHARMInstructionAggregate.ml @@ -56,6 +56,7 @@ let arm_aggregate_kind_to_string (k: arm_aggregate_kind_t) = ^ " target addresses" | ThumbITSequence it -> it#toString | LDMSTMSequence s -> s#toString + | BXCall (_, i2) -> "BXCall at " ^ i2#get_address#to_hex_string class arm_instruction_aggregate_t @@ -109,6 +110,11 @@ object (self) | LDMSTMSequence _ -> true | _ -> false + method is_bx_call = + match self#kind with + | BXCall _ -> true + | _ -> false + method write_xml (_node: xml_element_int) = () method toCHIF (_faddr: doubleword_int) = [] @@ -173,6 +179,18 @@ let make_ldm_stm_sequence_aggregate ~anchor:(List.hd (List.tl ldmstmseq#instrs)) +let make_bx_call_aggregate + (movinstr: arm_assembly_instruction_int) + (bxinstr: arm_assembly_instruction_int): arm_instruction_aggregate_int = + let kind = BXCall (movinstr, bxinstr) in + make_arm_instruction_aggregate + ~kind + ~instrs:[movinstr; bxinstr] + ~entry:movinstr + ~exitinstr:bxinstr + ~anchor:bxinstr + + let disassemble_arm_instructions (ch: pushback_stream_int) (iaddr: doubleword_int) (n: int) = for _i = 1 to n do @@ -232,6 +250,52 @@ let identify_ldmstm_sequence | _ -> None +(* format of BX-Call (in ARM) + + An indirect jump combined with a MOV of the PC into the LR converts + into an indirect call (because PC holds the instruction-address + 8) + + MOV LR, PC + BX Rx + *) +let identify_bx_call + (ch: pushback_stream_int) + (instr: arm_assembly_instruction_int): + (arm_assembly_instruction_int * arm_assembly_instruction_int) option = + let disassemble (iaddr: doubleword_int) = + let instrpos = ch#pos in + let bytes = ch#read_doubleword in + let opcode = + try + disassemble_arm_instruction ch iaddr bytes + with + | _ -> + let _ = + chlog#add + "bx-call disassemble-instruction" + (LBLOCK [iaddr#toPretty]) in + OpInvalid in + let instrbytes = ch#sub instrpos 4 in + let instr = make_arm_assembly_instruction iaddr true opcode instrbytes in + begin + set_arm_assembly_instruction instr; + instr + end in + match instr#get_opcode with + | Move (_, ACCAlways, dst, src, _, _) + when src#is_register + && dst#get_register = ARLR + && src#get_register = ARPC -> + begin + let bxinstr = disassemble (instr#get_address#add_int 4) in + match bxinstr#get_opcode with + | BranchExchange (ACCAlways, op) when op#is_register -> + Some (instr, bxinstr) + | _ -> None + end + | _ -> None + + let identify_arm_aggregate (ch: pushback_stream_int) (instr: arm_assembly_instruction_int): @@ -254,4 +318,8 @@ let identify_arm_aggregate match identify_ldmstm_sequence ch instr with | Some ldmstmseq -> Some (make_ldm_stm_sequence_aggregate ldmstmseq) - | _ -> None + | _ -> + match identify_bx_call ch instr with + | Some (movinstr, bxinstr) -> + Some (make_bx_call_aggregate movinstr bxinstr) + | _ -> None diff --git a/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli b/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli index 9632a778..3c4cbd95 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli +++ b/CodeHawk/CHB/bchlibarm32/bCHARMTypes.mli @@ -1523,6 +1523,7 @@ type arm_aggregate_kind_t = | ARMJumptable of arm_jumptable_int | ThumbITSequence of thumb_it_sequence_int | LDMSTMSequence of ldm_stm_sequence_int + | BXCall of arm_assembly_instruction_int * arm_assembly_instruction_int class type arm_instruction_aggregate_int = @@ -1544,6 +1545,7 @@ class type arm_instruction_aggregate_int = method is_jumptable: bool method is_it_sequence: bool method is_ldm_stm_sequence: bool + method is_bx_call: bool (* i/o *) method write_xml: xml_element_int -> unit diff --git a/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml b/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml index 417cc1cb..8a56b0c6 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHConstructARMFunction.ml @@ -256,7 +256,8 @@ let get_successors else let floc = get_floc_by_address faddr instr#get_address in begin - floc#f#set_unknown_jumptarget instr#get_address#to_hexstring; + floc#f#set_unknown_jumptarget + instr#get_address#to_hex_string; [] end) [] diff --git a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml index 70df91a6..1bcb11b5 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml @@ -719,7 +719,8 @@ let set_block_boundaries () = | Branch _ | BranchExchange _ -> (* Don't break up TBB/TBH and other jumptable sequences *) (match instr#is_in_aggregate with - | Some dw -> not (get_aggregate dw)#is_jumptable + | Some dw when (get_aggregate dw)#is_jumptable -> false + | Some dw when (get_aggregate dw)#is_bx_call -> false | _ -> true) | CompareBranchZero _ | CompareBranchNonzero _ -> true | LoadRegister (_, dst, _, _, _, _) diff --git a/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml b/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml index 4ec4ef47..53e83184 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml @@ -403,6 +403,20 @@ object (self) agginstr#get_address#to_hex_string :: acc) [] agg#instrs in tags @ ("subsumes" :: deps) in + let add_bx_call_defs + ?(xprs: xpr_t list = []) + ?(rdefs: int list = []) + (tags: string list) + (args: int list): (string list * int list) = + let tagstring = List.hd tags in + let xprcount = List.length xprs in + let rdefcount = List.length rdefs in + let tagstring = tagstring ^ (string_repeat "x" xprcount) in + let tagstring = tagstring ^ (string_repeat "r" rdefcount) in + let args = args @ (List.map xd#index_xpr xprs) @ rdefs in + let tags = (tagstring :: (List.tl tags)) @ ["bx-call"] in + (tags, args) in + let register_function_prototype (name: string) = if function_summary_library#has_so_function name then let fs = function_summary_library#get_so_function name in @@ -844,7 +858,7 @@ object (self) let tags = add_optional_subsumption [tagstring] in (tags, args) - | BranchExchange _ when instr#is_aggregate_anchor -> + | BranchExchange (_, tgt) when instr#is_aggregate_anchor -> let iaddr = instr#get_address in let agg = (!arm_assembly_instructions)#get_aggregate iaddr in if agg#is_jumptable then @@ -861,6 +875,18 @@ object (self) let tags = tagstring :: ["agg-jt"] in let tags = add_subsumption_dependents agg tags in (tags, args) + else if agg#is_bx_call then + let (tags, args) = callinstr_key() in + let xtgt = tgt#to_expr floc in + let xxtgt = rewrite_expr xtgt in + let rdefs = (get_rdef xtgt) :: (get_all_rdefs xxtgt) in + let (tags, args) = + add_bx_call_defs ~xprs:[xtgt; xxtgt] ~rdefs tags args in + (* let (tagstring, args) = + mk_instrx_data ~xprs:[xtgt; xxtgt] ~rdefs () in + let tags = tagstring :: ["agg-bxcall"] in *) + let tags = add_subsumption_dependents agg tags in + (tags, args) else raise (BCH_failure From ccb12b3f7143946d952d25cf2b05462413514101 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Mon, 25 Nov 2024 22:12:47 -0800 Subject: [PATCH 3/5] CHB: add support for arrays of function pointers --- CodeHawk/CHB/bchlib/bCHBCTypeUtil.ml | 16 +++++++++++++++ CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli | 6 ++++++ CodeHawk/CHB/bchlib/bCHCallbackTables.ml | 26 +++++++++++++++++------- CodeHawk/CHB/bchlib/bCHVersion.ml | 4 ++-- CodeHawk/CHB/bchlibelf/bCHELFHeader.ml | 15 +++++++++++--- 5 files changed, 55 insertions(+), 12 deletions(-) diff --git a/CodeHawk/CHB/bchlib/bCHBCTypeUtil.ml b/CodeHawk/CHB/bchlib/bCHBCTypeUtil.ml index 6701ad2f..e27faeda 100644 --- a/CodeHawk/CHB/bchlib/bCHBCTypeUtil.ml +++ b/CodeHawk/CHB/bchlib/bCHBCTypeUtil.ml @@ -243,6 +243,19 @@ let get_element_type (t: btype_t) = raise (BCH_failure (LBLOCK [STR "Not an array type"])) +let get_array_length (t: btype_t): int traceresult = + match t with + | TArray (_, Some len, _) -> + (match len with + | Const (CInt (i64, _, _)) -> Ok (Int64.to_int i64) + | _ -> + Error ["Array does not have a constant length: " ^ (exp_to_string len)]) + | TArray _ -> + Error ["Array does not have a length"] + | _ -> + Error ["get_array_length: not an array: " ^ (btype_to_string t)] + + (* ======================================================= size and alignment *) let resolve_type (btype: btype_t) = bcfiles#resolve_type btype @@ -1177,6 +1190,7 @@ let struct_field_categories (ty: btype_t): string list = | Error e -> e | Ok ty -> match ty with + | TArray (TComp (ckey, _), _, _) | TPtr (TPtr (TComp (ckey, _), _), _) | TPtr (TComp (ckey, _), _) -> let compinfo = bcfiles#get_compinfo ckey in @@ -1187,6 +1201,8 @@ let struct_field_categories (ty: btype_t): string list = | TPtr (TFun _, _) -> "address" | _ -> "unknown") compinfo.bcfields + | TArray ((TFun _ | TPtr (TFun _, _)), _, _) -> ["address"] + | rty -> [btype_to_string ty; btype_to_string rty] diff --git a/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli b/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli index 9af37adc..9f47edb1 100644 --- a/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli +++ b/CodeHawk/CHB/bchlib/bCHBCTypeUtil.mli @@ -170,6 +170,12 @@ val size_of_int_ikind: ikind_t -> int val size_of_float_fkind: fkind_t -> int +(** [get_array_length ty] returns the length (number of elements) of an array. + + An error value is returned if the array does not have a constant length, or + does not have a length at all, of if the type is not an array. *) +val get_array_length: btype_t -> int traceresult + (** [size_of_btype ty] returns the size (in bytes) of type [ty]. An error value is returned if the size cannot be determined. This may diff --git a/CodeHawk/CHB/bchlib/bCHCallbackTables.ml b/CodeHawk/CHB/bchlib/bCHCallbackTables.ml index 0b3d6a17..363fea45 100644 --- a/CodeHawk/CHB/bchlib/bCHCallbackTables.ml +++ b/CodeHawk/CHB/bchlib/bCHCallbackTables.ml @@ -192,12 +192,21 @@ object (self) let compinfo = bcfiles#get_compinfo ckey in List.iteri (fun i fld -> H.add table (i * 4) fld.bfname) compinfo.bcfields + | TArray (TComp (ckey, _), _, _) -> + let compinfo = bcfiles#get_compinfo ckey in + List.iteri (fun i fld -> + H.add table (i * 4) fld.bfname) compinfo.bcfields + | TArray ((TFun _ | TPtr (TFun _, _)), _, _) -> + H.add table 0 ("cbp_" ^ cba) | _ -> - raise - (BCH_failure - (LBLOCK [ - STR "Unexpected type in creating callback table: "; - btype_to_pretty recty])) in + let msg = + LBLOCK [ + STR "Unexpected type in creating callback table: "; + btype_to_pretty recty] in + begin + ch_error_log#add "call-back-table problem" msg; + raise (BCH_failure msg) + end in table val offsettypes = @@ -214,9 +223,12 @@ object (self) let _ = match recty with | TFun _ -> H.add table 0 ty - | TPtr (TFun (rty, args, b, attr), _) -> + | TPtr (TFun (rty, args, b, attr), _) + | TArray (TFun (rty, args, b, attr), _, _) + | TArray (TPtr (TFun (rty, args, b, attr), _), _, _) -> H.add table 0 (TFun (rty, args, b, attr)) - | TPtr (TComp (ckey, _), _) -> + | TArray (TComp (ckey, _), _, _) + | TPtr (TComp (ckey, _), _) -> let compinfo = bcfiles#get_compinfo ckey in List.iteri (fun i fld -> let offset = i * 4 in diff --git a/CodeHawk/CHB/bchlib/bCHVersion.ml b/CodeHawk/CHB/bchlib/bCHVersion.ml index c6c19ac2..84b00f88 100644 --- a/CodeHawk/CHB/bchlib/bCHVersion.ml +++ b/CodeHawk/CHB/bchlib/bCHVersion.ml @@ -95,8 +95,8 @@ end let version = new version_info_t - ~version:"0.6.0_20241119" - ~date:"2024-11-19" + ~version:"0.6.0_20241125" + ~date:"2024-11-25" ~licensee: None ~maxfilesize: None () diff --git a/CodeHawk/CHB/bchlibelf/bCHELFHeader.ml b/CodeHawk/CHB/bchlibelf/bCHELFHeader.ml index e3b46363..4701e3c1 100644 --- a/CodeHawk/CHB/bchlibelf/bCHELFHeader.ml +++ b/CodeHawk/CHB/bchlibelf/bCHELFHeader.ml @@ -558,12 +558,15 @@ object(self) system_info#initialize_jumptables system_info#is_code_address xstrings method private extract_call_back_table + ?(len=None) (callbacktable: call_back_table_int) (va: doubleword_int) (fieldkinds: string list) = let nullrecord = ref false in + let count = ref 0 in + let bound = match len with Some len -> len | _ -> BCHDoubleword.e15 in let currva = ref va in - while not !nullrecord do + while not !nullrecord && (!count < bound) do let cbvalues = ref [] in begin List.iteri (fun i s -> @@ -582,7 +585,8 @@ object(self) | _ -> CBTag "**unknown**") | "value" -> CBValue (mkNumerical pv#to_int) | _ -> CBValue numerical_zero in - cbvalues := ((i * 4), cbv) :: !cbvalues) fieldkinds; + cbvalues := ((i * 4), cbv) :: !cbvalues) fieldkinds; + count := !count + 1; (if List.for_all (fun (_, v) -> match v with | CBValue n -> n#equal numerical_zero || n#equal (mkNumerical (-1)) @@ -649,7 +653,12 @@ object(self) let fieldkinds = struct_field_categories varinfo.bvtype in let callbacktable = callbacktables#new_table addr varinfo.bvtype in let va = TR.tget_ok (string_to_doubleword addr) in - self#extract_call_back_table callbacktable va fieldkinds + if is_array_type varinfo.bvtype then + let len = get_array_length varinfo.bvtype in + self#extract_call_back_table + ~len:(TR.to_option len) callbacktable va fieldkinds + else + self#extract_call_back_table callbacktable va fieldkinds else chlog#add "call-back-table-variable" From f2525b1de94aa2d0a39f860fd3f54c8ab3792987 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Tue, 26 Nov 2024 12:13:37 -0800 Subject: [PATCH 4/5] CHB:ARM: result reporting for BX-call --- .../CHB/bchlibarm32/bCHFnARMDictionary.ml | 69 +++++++++---------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml b/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml index 53e83184..abd890fe 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHFnARMDictionary.ml @@ -785,6 +785,39 @@ object (self) let (tags, args) = add_optional_instr_condition tagstring args c in (tags, args) + | BranchExchange (_, tgt) when instr#is_aggregate_anchor -> + let iaddr = instr#get_address in + let agg = (!arm_assembly_instructions)#get_aggregate iaddr in + if agg#is_jumptable then + let jt = agg#jumptable in + let indexregop = jt#index_operand in + let xrn = indexregop#to_expr floc in + let xxrn = rewrite_expr xrn in + let rdefs = (get_rdef xrn) :: (get_all_rdefs xxrn) in + let (tagstring, args) = + mk_instrx_data + ~xprs:[xrn; xxrn] + ~rdefs:rdefs + () in + let tags = tagstring :: ["agg-jt"] in + let tags = add_subsumption_dependents agg tags in + (tags, args) + else if agg#is_bx_call then + let (tags, args) = callinstr_key() in + let xtgt = tgt#to_expr floc in + let xxtgt = rewrite_expr xtgt in + let rdefs = (get_rdef xtgt) :: (get_all_rdefs xxtgt) in + let (tags, args) = + add_bx_call_defs ~xprs:[xtgt; xxtgt] ~rdefs tags args in + let tags = add_subsumption_dependents agg tags in + (tags, args) + else + raise + (BCH_failure + (LBLOCK [ + STR "Aggregate for BranchExchange not recognized at "; + iaddr#toPretty])) + | Branch _ | BranchExchange _ when floc#has_call_target -> callinstr_key () @@ -858,42 +891,6 @@ object (self) let tags = add_optional_subsumption [tagstring] in (tags, args) - | BranchExchange (_, tgt) when instr#is_aggregate_anchor -> - let iaddr = instr#get_address in - let agg = (!arm_assembly_instructions)#get_aggregate iaddr in - if agg#is_jumptable then - let jt = agg#jumptable in - let indexregop = jt#index_operand in - let xrn = indexregop#to_expr floc in - let xxrn = rewrite_expr xrn in - let rdefs = (get_rdef xrn) :: (get_all_rdefs xxrn) in - let (tagstring, args) = - mk_instrx_data - ~xprs:[xrn; xxrn] - ~rdefs:rdefs - () in - let tags = tagstring :: ["agg-jt"] in - let tags = add_subsumption_dependents agg tags in - (tags, args) - else if agg#is_bx_call then - let (tags, args) = callinstr_key() in - let xtgt = tgt#to_expr floc in - let xxtgt = rewrite_expr xtgt in - let rdefs = (get_rdef xtgt) :: (get_all_rdefs xxtgt) in - let (tags, args) = - add_bx_call_defs ~xprs:[xtgt; xxtgt] ~rdefs tags args in - (* let (tagstring, args) = - mk_instrx_data ~xprs:[xtgt; xxtgt] ~rdefs () in - let tags = tagstring :: ["agg-bxcall"] in *) - let tags = add_subsumption_dependents agg tags in - (tags, args) - else - raise - (BCH_failure - (LBLOCK [ - STR "Aggregate for BranchExchange not recognized at "; - iaddr#toPretty])) - | BranchExchange (c, tgt) when tgt#is_register && tgt#get_register = ARLR -> let r0_op = arm_register_op AR0 RD in From a2f4a427a621cb68eddad765d1e290cc18f54645 Mon Sep 17 00:00:00 2001 From: Henny Sipma Date: Wed, 27 Nov 2024 10:42:54 -0800 Subject: [PATCH 5/5] CHB:ARM: disable hearistic for finding non-returning functions --- CodeHawk/CHB/bchlib/bCHVersion.ml | 4 +- CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml | 17 +++- .../CHB/bchlibarm32/bCHTranslateARMToCHIF.ml | 93 ++++++++++--------- 3 files changed, 66 insertions(+), 48 deletions(-) diff --git a/CodeHawk/CHB/bchlib/bCHVersion.ml b/CodeHawk/CHB/bchlib/bCHVersion.ml index 84b00f88..bd42ea4a 100644 --- a/CodeHawk/CHB/bchlib/bCHVersion.ml +++ b/CodeHawk/CHB/bchlib/bCHVersion.ml @@ -95,8 +95,8 @@ end let version = new version_info_t - ~version:"0.6.0_20241125" - ~date:"2024-11-25" + ~version:"0.6.0_20241127" + ~date:"2024-11-27" ~licensee: None ~maxfilesize: None () diff --git a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml index 1bcb11b5..b815f2ec 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHDisassembleARM.ml @@ -56,7 +56,6 @@ open BCHELFTypes open BCHARMAssemblyFunctions open BCHARMAssemblyInstruction open BCHARMAssemblyInstructions -open BCHARMCallSitesRecords open BCHARMInstructionAggregate open BCHARMPseudocode open BCHARMOpcodeRecords @@ -455,6 +454,18 @@ let get_so_target (tgtaddr:doubleword_int) (_instr:arm_assembly_instruction_int) None +let register_non_returning_functions () = + List.map (fun fndata -> + if fndata#is_non_returning then + () + else if fndata#has_name then + let fname = fndata#get_function_name in + if function_summary_library#has_so_function fname then + let fsum = function_summary_library#get_so_function fname in + if fsum#is_nonreturning then + fndata#set_non_returning) functions_data#get_functions + + (* can be used before functions have been constructed *) let is_nr_call_instruction (instr:arm_assembly_instruction_int) = match instr#get_opcode with @@ -947,9 +958,11 @@ let construct_functions_arm ?(construct_all_functions=false) () = List.iter (fun dw -> ignore (functions_data#add_function dw)) (List.map (fun s -> TR.tget_ok (string_to_doubleword s)) fns_included) in + let _ = register_non_returning_functions () in let _ = collect_call_targets () in let _ = set_block_boundaries () in let _ = pr_timing [STR "block boundaries set"] in + (* Disabled for now; it generates too many spurious non-returning functions. let _ = !arm_assembly_instructions#collect_callsites in let _ = pr_timing [STR "callsites collected"] in let _ = @@ -958,7 +971,7 @@ let construct_functions_arm ?(construct_all_functions=false) () = if functions_data#is_function_entry_point faddr then let fndata = functions_data#get_function faddr in fndata#set_non_returning) nonrfns in - let _ = pr_timing [STR "non-returning functions set"] in + let _ = pr_timing [STR "non-returning functions set"] in *) let fnentrypoints = if ((List.length fns_included) = 0) || construct_all_functions then functions_data#get_function_entry_points diff --git a/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml b/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml index 557ab13e..df8ebdd0 100644 --- a/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml +++ b/CodeHawk/CHB/bchlibarm32/bCHTranslateARMToCHIF.ml @@ -643,13 +643,14 @@ let translate_arm_instruction | Some dw -> (get_aggregate dw)#is_jumptable | _ -> false in let check_storage (_op: arm_operand_int) (v: variable_t) = - if (floc#env#is_unknown_memory_variable v) || v#isTemporary then - ch_error_log#add - "unknown storage location" - (LBLOCK [ - floc#l#toPretty; - STR " "; - STR (arm_opcode_to_string instr#get_opcode)]) in + if BCHSystemSettings.system_settings#collect_data then + if (floc#env#is_unknown_memory_variable v) || v#isTemporary then + ch_error_log#add + "unknown storage location" + (LBLOCK [ + floc#l#toPretty; + STR " "; + STR (arm_opcode_to_string instr#get_opcode)]) in let calltgt_cmds (_tgt: arm_operand_int): cmd_t list = let callargs = floc#get_call_arguments in @@ -2803,18 +2804,19 @@ let translate_arm_instruction let xrn = rewrite_expr floc (rn#to_expr floc) in let xrm = rewrite_expr floc (rm#to_expr floc) in begin - ch_error_log#add - "assignment to unknown memory" - (LBLOCK [ - floc#l#toPretty; - STR " STR ["; - rn#toPretty; - STR ", "; - rm#toPretty; - STR "]; base: "; - x2p xrn; - STR ", offset: "; - x2p xrm]); + (if BCHSystemSettings.system_settings#collect_data then + ch_error_log#add + "assignment to unknown memory" + (LBLOCK [ + floc#l#toPretty; + STR " STR ["; + rn#toPretty; + STR ", "; + rm#toPretty; + STR "]; base: "; + x2p xrn; + STR ", offset: "; + x2p xrm])); [] end else @@ -2870,18 +2872,19 @@ let translate_arm_instruction let xrn = rewrite_expr floc (rn#to_expr floc) in let xrm = rewrite_expr floc (rm#to_expr floc) in begin - ch_error_log#add - "assignment to unknown memory" - (LBLOCK [ - floc#l#toPretty; - STR " STRB ["; - rn#toPretty; - STR ", "; - rm#toPretty; - STR "]; base: "; - x2p xrn; - STR ", offset: "; - x2p xrm]); + (if BCHSystemSettings.system_settings#collect_data then + ch_error_log#add + "assignment to unknown memory" + (LBLOCK [ + floc#l#toPretty; + STR " STRB ["; + rn#toPretty; + STR ", "; + rm#toPretty; + STR "]; base: "; + x2p xrn; + STR ", offset: "; + x2p xrm])); [] end else @@ -3033,18 +3036,19 @@ let translate_arm_instruction let xrn = rewrite_expr floc (rn#to_expr floc) in let xrm = rewrite_expr floc (rm#to_expr floc) in begin - ch_error_log#add - "assignment to unknown memory" - (LBLOCK [ - floc#l#toPretty; - STR " STRH ["; - rn#toPretty; - STR ", "; - rm#toPretty; - STR "]; base: "; - x2p xrn; - STR ", offset: "; - x2p xrm]); + (if BCHSystemSettings.system_settings#collect_data then + ch_error_log#add + "assignment to unknown memory" + (LBLOCK [ + floc#l#toPretty; + STR " STRH ["; + rn#toPretty; + STR ", "; + rm#toPretty; + STR "]; base: "; + x2p xrn; + STR ", offset: "; + x2p xrm])); [] end else @@ -3069,7 +3073,8 @@ let translate_arm_instruction if mem#is_offset_address_writeback then let addr_r = mem#to_updated_offset_address floc in log_tfold_default - (log_error "invalid write-back address" ((p2s floc#l#toPretty) ^ ": STRH")) + (log_error + "invalid write-back address" ((p2s floc#l#toPretty) ^ ": STRH")) (fun (_, addr) -> let rnreg = rn#to_register in let (vrn, ucmds) =