From 0c3a095cf178bf7ba4125912709df61acc8609f7 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 26 Aug 2025 11:43:18 +0200 Subject: [PATCH 1/2] High-level disassembly API --- src/LLVM.jl | 1 + src/disasm.jl | 38 ++++++++++++++++++++++++++++++++++++++ test/disasm.jl | 10 ++++++++++ test/runtests.jl | 1 + 4 files changed, 50 insertions(+) create mode 100644 src/disasm.jl create mode 100644 test/disasm.jl diff --git a/src/LLVM.jl b/src/LLVM.jl index 5d59ba7e..d332ce4c 100644 --- a/src/LLVM.jl +++ b/src/LLVM.jl @@ -80,6 +80,7 @@ include("debuginfo.jl") include("utils.jl") include("orc.jl") include("newpm.jl") +include("disasm.jl") # high-level functionality include("state.jl") diff --git a/src/disasm.jl b/src/disasm.jl new file mode 100644 index 00000000..cc77135e --- /dev/null +++ b/src/disasm.jl @@ -0,0 +1,38 @@ +@checked struct DisasmContext + ref::API.LLVMDisasmContextRef +end + +function create_disasm(triple) + ctx = DisasmContext(API.LLVMCreateDisasm(triple, C_NULL, 0, C_NULL, C_NULL)) + return ctx +end + +function dispose(ctx::DisasmContext) + API.LLVMDisasmDispose(ctx.ref) + return nothing +end + +function disassemble_code(io, ctx::DisasmContext, native_code::Vector{UInt8}, code_addr::Csize_t = 0; bufsize=32) + pos = 1 + buf = Vector{UInt8}(undef, bufsize) + + while pos < length(native_code) + GC.@preserve native_code begin + bytes = pointer(native_code, pos) + pc = code_addr + pos - 1 + nbytes = length(native_code) - pos + 1 + nb = API.LLVMDisasmInstruction(ctx, bytes, nbytes, pc, buf, bufsize) + end + if nb == 0 + # Disassembly failed + break + end + pos += nb + for byte in buf + byte == 0x00 && break + write(io, byte) + end + write(io, '\n') + end + return nothing +end \ No newline at end of file diff --git a/test/disasm.jl b/test/disasm.jl new file mode 100644 index 00000000..a2d0607e --- /dev/null +++ b/test/disasm.jl @@ -0,0 +1,10 @@ +@testset "X86 Disassembly" begin + # Example from jitdump + native_code = UInt8[0x55, 0x48, 0x89, 0xe5, 0x49, 0x8b, 0x45, 0x10, 0x48, 0x8b, 0x40, 0x10, 0x48, 0x8b, 0x00, 0x48, 0x8b, 0x16, 0x48, 0x83, 0xc6, 0x08, 0x48, 0xb8, 0xe0, 0x7c, 0xf5, 0x81, 0xe4, 0x7f, 0x00, 0x00, 0xff, 0xd0, 0x5d, 0xc3] + code_addr = 0x00007fe48befcde0 + + ctx = create_disasm("x86_64-pc-linux-gnu") + disassembled = sprint(disassemble_code, ctx, native_code, code_addr) + + @test disassembled == "\tpushq\t%rbp\n\tmovq\t%rsp, %rbp\n\tmovq\t16(%r13), %rax\n\tmovq\t16(%rax), %rax\n\tmovq\t(%rax), %rax\n\tmovq\t(%rsi), %rdx\n\taddq\t\$8, %rsi\n\tmovabsq\t\$140619409620192, %rax\n\tcallq\t*%rax\n\tpopq\t%rbp\n" +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index d97a0f1b..81ead7d5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -41,6 +41,7 @@ include("debuginfo.jl") include("util.jl") include("interop.jl") include("orc.jl") +include("disasm.jl") if !Sys.iswindows() # XXX: hangs on Windows include("jljit.jl") From 2fd8490a4c846df20ce1b463d713491ea5a03e04 Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 26 Aug 2025 12:00:54 +0200 Subject: [PATCH 2/2] fixup! High-level disassembly API --- src/disasm.jl | 6 ++++-- test/disasm.jl | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/disasm.jl b/src/disasm.jl index cc77135e..fc8f3e98 100644 --- a/src/disasm.jl +++ b/src/disasm.jl @@ -1,6 +1,7 @@ @checked struct DisasmContext ref::API.LLVMDisasmContextRef end +Base.unsafe_convert(::Type{API.LLVMDisasmContextRef}, dc::DisasmContext) = dc.ref function create_disasm(triple) ctx = DisasmContext(API.LLVMCreateDisasm(triple, C_NULL, 0, C_NULL, C_NULL)) @@ -17,11 +18,12 @@ function disassemble_code(io, ctx::DisasmContext, native_code::Vector{UInt8}, co buf = Vector{UInt8}(undef, bufsize) while pos < length(native_code) - GC.@preserve native_code begin + GC.@preserve native_code buf begin bytes = pointer(native_code, pos) pc = code_addr + pos - 1 nbytes = length(native_code) - pos + 1 - nb = API.LLVMDisasmInstruction(ctx, bytes, nbytes, pc, buf, bufsize) + # Need to use pointer(buf) here since the API is annotated as Cstring + nb = API.LLVMDisasmInstruction(ctx, bytes, nbytes, pc, pointer(buf), bufsize) end if nb == 0 # Disassembly failed diff --git a/test/disasm.jl b/test/disasm.jl index a2d0607e..679358cc 100644 --- a/test/disasm.jl +++ b/test/disasm.jl @@ -1,3 +1,5 @@ +import LLVM: create_disasm, disassemble_code + @testset "X86 Disassembly" begin # Example from jitdump native_code = UInt8[0x55, 0x48, 0x89, 0xe5, 0x49, 0x8b, 0x45, 0x10, 0x48, 0x8b, 0x40, 0x10, 0x48, 0x8b, 0x00, 0x48, 0x8b, 0x16, 0x48, 0x83, 0xc6, 0x08, 0x48, 0xb8, 0xe0, 0x7c, 0xf5, 0x81, 0xe4, 0x7f, 0x00, 0x00, 0xff, 0xd0, 0x5d, 0xc3] @@ -5,6 +7,7 @@ ctx = create_disasm("x86_64-pc-linux-gnu") disassembled = sprint(disassemble_code, ctx, native_code, code_addr) + LLVM.dispose(ctx) @test disassembled == "\tpushq\t%rbp\n\tmovq\t%rsp, %rbp\n\tmovq\t16(%r13), %rax\n\tmovq\t16(%rax), %rax\n\tmovq\t(%rax), %rax\n\tmovq\t(%rsi), %rdx\n\taddq\t\$8, %rsi\n\tmovabsq\t\$140619409620192, %rax\n\tcallq\t*%rax\n\tpopq\t%rbp\n" end \ No newline at end of file