Skip to content

Commit b6763ac

Browse files
committed
binja: retrieve the LLIL instruction itself without requesting the entire IL function
1 parent 5a284de commit b6763ac

File tree

4 files changed

+36
-50
lines changed

4 files changed

+36
-50
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
- ghidra: fix saving of base address @mr-tz
3232
- binja: support loading raw x86/x86_64 shellcode #2489 @xusheng6
3333
- binja: fix crash when the IL of certain functions are not available. #2249 @xusheng6
34+
- binja: major performance improvement on the binja extractor. #1414 @xusheng6
3435

3536
### capa Explorer Web
3637

capa/features/extractors/binja/function.py

Lines changed: 7 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,13 @@
77
# See the License for the specific language governing permissions and limitations under the License.
88
from typing import Iterator
99

10-
from binaryninja import Function, BinaryView, SymbolType, ILException, RegisterValueType, LowLevelILOperation
10+
from binaryninja import Function, BinaryView, SymbolType, LowLevelILOperation
1111

1212
from capa.features.file import FunctionName
1313
from capa.features.common import Feature, Characteristic
1414
from capa.features.address import Address, AbsoluteVirtualAddress
1515
from capa.features.extractors import loops
16+
from capa.features.extractors.binja.helpers import get_llil_instr_at_addr
1617
from capa.features.extractors.base_extractor import FunctionHandle
1718

1819

@@ -24,14 +25,7 @@ def extract_function_calls_to(fh: FunctionHandle):
2425
# Everything that is a code reference to the current function is considered a caller, which actually includes
2526
# many other references that are NOT a caller. For example, an instruction `push function_start` will also be
2627
# considered a caller to the function
27-
llil = None
28-
try:
29-
# Temporary fix for https://github.com/Vector35/binaryninja-api/issues/6020. Since `.llil` can throw an
30-
# exception rather than returning None
31-
llil = caller.llil
32-
except ILException:
33-
continue
34-
28+
llil = get_llil_instr_at_addr(func.view, caller.address)
3529
if (llil is None) or llil.operation not in [
3630
LowLevelILOperation.LLIL_CALL,
3731
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
@@ -40,14 +34,13 @@ def extract_function_calls_to(fh: FunctionHandle):
4034
]:
4135
continue
4236

43-
if llil.dest.value.type not in [
44-
RegisterValueType.ImportedAddressValue,
45-
RegisterValueType.ConstantValue,
46-
RegisterValueType.ConstantPointerValue,
37+
if llil.dest.operation not in [
38+
LowLevelILOperation.LLIL_CONST,
39+
LowLevelILOperation.LLIL_CONST_PTR,
4740
]:
4841
continue
4942

50-
address = llil.dest.value.value
43+
address = llil.dest.constant
5144
if address != func.start:
5245
continue
5346

capa/features/extractors/binja/helpers.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
77
# See the License for the specific language governing permissions and limitations under the License.
88
import re
9-
from typing import Callable
9+
from typing import Callable, Optional
1010
from dataclasses import dataclass
1111

12-
from binaryninja import BinaryView, LowLevelILInstruction
12+
from binaryninja import BinaryView, LowLevelILFunction, LowLevelILInstruction
1313
from binaryninja.architecture import InstructionTextToken
1414

1515

@@ -67,3 +67,13 @@ def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str:
6767
s.append(chr(c))
6868

6969
return "".join(s)
70+
71+
72+
def get_llil_instr_at_addr(bv: BinaryView, addr: int) -> Optional[LowLevelILInstruction]:
73+
arch = bv.arch
74+
buffer = bv.read(addr, arch.max_instr_length)
75+
llil = LowLevelILFunction(arch=arch)
76+
llil.current_address = addr
77+
if arch.get_instruction_low_level_il(buffer, addr, llil) == 0:
78+
return None
79+
return llil[0]

capa/features/extractors/binja/insn.py

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
BinaryView,
1414
ILRegister,
1515
SymbolType,
16-
ILException,
1716
BinaryReader,
1817
RegisterValueType,
1918
LowLevelILOperation,
@@ -24,7 +23,7 @@
2423
from capa.features.insn import API, MAX_STRUCTURE_SIZE, Number, Offset, Mnemonic, OperandNumber, OperandOffset
2524
from capa.features.common import MAX_BYTES_FEATURE_SIZE, Bytes, String, Feature, Characteristic
2625
from capa.features.address import Address, AbsoluteVirtualAddress
27-
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs
26+
from capa.features.extractors.binja.helpers import DisassemblyInstruction, visit_llil_exprs, get_llil_instr_at_addr
2827
from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle
2928

3029
# security cookie checks may perform non-zeroing XORs, these are expected within a certain
@@ -37,40 +36,23 @@
3736
# 2. The function must only make one call/jump to another address
3837
# If the function being checked is a stub function, returns the target address. Otherwise, return None.
3938
def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]:
40-
funcs = bv.get_functions_at(addr)
41-
for func in funcs:
42-
if len(func.basic_blocks) != 1:
43-
continue
44-
45-
call_count = 0
46-
call_target = None
47-
try:
48-
llil = func.llil
49-
except ILException:
50-
return None
39+
llil = get_llil_instr_at_addr(bv, addr)
40+
if llil is None or llil.operation not in [
41+
LowLevelILOperation.LLIL_CALL,
42+
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
43+
LowLevelILOperation.LLIL_JUMP,
44+
LowLevelILOperation.LLIL_TAILCALL,
45+
]:
46+
return None
5147

52-
if llil is None:
53-
continue
48+
if llil.dest.value.type not in [
49+
RegisterValueType.ImportedAddressValue,
50+
RegisterValueType.ConstantValue,
51+
RegisterValueType.ConstantPointerValue,
52+
]:
53+
return None
5454

55-
for il in llil.instructions:
56-
if il.operation in [
57-
LowLevelILOperation.LLIL_CALL,
58-
LowLevelILOperation.LLIL_CALL_STACK_ADJUST,
59-
LowLevelILOperation.LLIL_JUMP,
60-
LowLevelILOperation.LLIL_TAILCALL,
61-
]:
62-
call_count += 1
63-
if il.dest.value.type in [
64-
RegisterValueType.ImportedAddressValue,
65-
RegisterValueType.ConstantValue,
66-
RegisterValueType.ConstantPointerValue,
67-
]:
68-
call_target = il.dest.value.value
69-
70-
if call_count == 1 and call_target is not None:
71-
return call_target
72-
73-
return None
55+
return llil.dest.value.value
7456

7557

7658
def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]:

0 commit comments

Comments
 (0)