From 5b35813de4a4463e53ea2b027e7c377f52ecf0fa Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 06:53:15 +0200 Subject: [PATCH 01/97] armv6m: initial commit of the assembler module Signed-off-by: Paul Guyot --- libs/jit/include/jit.hrl | 1 + libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_armv6m_asm.erl | 705 ++++++++++++++++++++++++ src/libAtomVM/jit.h | 1 + tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_armv6m_asm_tests.erl | 546 ++++++++++++++++++ 6 files changed, 1255 insertions(+) create mode 100644 libs/jit/src/jit_armv6m_asm.erl create mode 100644 tests/libs/jit/jit_armv6m_asm_tests.erl diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index 427fa40aec..6c08a80661 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -22,6 +22,7 @@ -define(JIT_ARCH_X86_64, 1). -define(JIT_ARCH_AARCH64, 2). +-define(JIT_ARCH_ARMV6M, 3). -define(JIT_VARIANT_PIC, 1). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index a5810feff9..450e8048d1 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -29,6 +29,7 @@ set(ERLANG_MODULES jit_stream_mmap jit_aarch64 jit_aarch64_asm + jit_armv6m_asm jit_x86_64 jit_x86_64_asm ) diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl new file mode 100644 index 0000000000..44c8cbc1cf --- /dev/null +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -0,0 +1,705 @@ +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + +-module(jit_armv6m_asm). + +-export([ + adds/2, + adds/3, + subs/2, + subs/3, + muls/2, + b/1, + bcc/2, + blx/1, + bx/1, + cmp/2, + ands/2, + orrs/2, + ldr/2, + lsls/2, + lsls/3, + lsrs/2, + lsrs/3, + mov/2, + movs/2, + str/2, + tst/2, + stp/4, + ldp/4, + adr/2, + push/1, + pop/1 +]). + +-export_type([ + cc/0 +]). + +-type arm_gpr_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15 + | sp + | lr + | pc. + +-type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers for assembly generation +%% for r0 to r30 +reg_to_num(r0) -> 0; +reg_to_num(r1) -> 1; +reg_to_num(r2) -> 2; +reg_to_num(r3) -> 3; +reg_to_num(r4) -> 4; +reg_to_num(r5) -> 5; +reg_to_num(r6) -> 6; +reg_to_num(r7) -> 7; +reg_to_num(r8) -> 8; +reg_to_num(r9) -> 9; +reg_to_num(r10) -> 10; +reg_to_num(r11) -> 11; +reg_to_num(r12) -> 12; +reg_to_num(r13) -> 13; +reg_to_num(r14) -> 14; +reg_to_num(r15) -> 15; +%% Stack pointer (SP) is r13 +reg_to_num(sp) -> 13; +%% Link register (LR) is r14 +reg_to_num(lr) -> 14; +%% Program counter (PC) is r15 +reg_to_num(pc) -> 15. + +%% Convert condition atom to condition code number +-spec cond_to_num(atom()) -> 0..15. +% Equal (Z set) +cond_to_num(eq) -> 0; +% Not equal (Z clear) +cond_to_num(ne) -> 1; +% Carry set +cond_to_num(cs) -> 2; +% Carry clear +cond_to_num(cc) -> 3; +% Minus (N set) +cond_to_num(mi) -> 4; +% Plus (N clear) +cond_to_num(pl) -> 5; +% Overflow set +cond_to_num(vs) -> 6; +% Overflow clear +cond_to_num(vc) -> 7; +% Higher (unsigned) +cond_to_num(hi) -> 8; +% Lower or same (unsigned) +cond_to_num(ls) -> 9; +% Greater than or equal (signed) +cond_to_num(ge) -> 10; +% Less than (signed) +cond_to_num(lt) -> 11; +% Greater than (signed) +cond_to_num(gt) -> 12; +% Less than or equal (signed) +cond_to_num(le) -> 13; +% Always +cond_to_num(al) -> 14; +% Never +cond_to_num(nv) -> 15. + +%% Emit an ADDS instruction (Thumb encoding) +%% ADDS Rd, #imm - adds immediate value to register and sets flags (2-operand form) +-spec adds(arm_gpr_register(), integer()) -> binary(). +adds(Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + adds(Rd, Rd, Imm); +adds(Rd, Imm) when is_atom(Rd), is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% ADDS Rd, Rn, #imm - adds immediate value to register and sets flags (3-operand form) +-spec adds(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). + +adds(Rd, Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + %% Thumb ADDS (immediate, 8-bit) encoding: 00110dddiiiiiiii (Rd = Rn) + RdNum = reg_to_num(Rd), + <<(16#3000 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>; +adds(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 -> + %% Thumb ADDS (immediate, 3-bit) encoding: 0001110iiinnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + <<(16#1C00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>; +adds(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}); +adds(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + %% Thumb ADDS (register) encoding: 0001100mmmnnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + << + (16#1800 bor ((RmNum band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little + >>. + +%% Emit an unconditional branch (B) instruction (Thumb encoding, ARMv6-M) +%% offset is in bytes, relative to the PC+4 (next instruction) +%% ARMv6-M only supports 16-bit Thumb B with ±2KB range +-spec b(integer()) -> binary(). +b(Offset) when is_integer(Offset), Offset >= -2044, Offset =< 2050, (Offset rem 2) =:= 0 -> + %% Thumb B (unconditional) encoding: 11100iiiiiiiiiii + %% imm11 is (Offset / 2) signed, fits in 11 bits + %% Adjust offset by -4 to match assembler behavior (PC+4 relative) + AdjustedOffset = Offset - 4, + Offset11 = AdjustedOffset div 2, + <<(16#E000 bor (Offset11 band 16#7FF)):16/little>>; +b(Offset) when is_integer(Offset) -> + error({unencodable_offset, Offset}). + +%% Emit a branch with link register (BLR) instruction (Thumb encoding) +%% Register is the register atom (r0-r15) +-spec blx(arm_gpr_register()) -> binary(). +blx(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% Thumb BLX (register) encoding: 010001111mmmm000 + %% This branches to register and sets LR + <<(16#4780 bor (RegNum bsl 3)):16/little>>. + +%% Emit a branch register (BR) instruction (Thumb encoding) +%% Register is the register atom (r0-r15) +-spec bx(arm_gpr_register()) -> binary(). +bx(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% Thumb BX (branch exchange) encoding: 010001110mmmm000 + %% This branches to register without setting LR + <<(16#4700 bor (RegNum bsl 3)):16/little>>. + +%% Emit a load register (LDR) instruction +-spec ldr(arm_gpr_register(), {arm_gpr_register(), integer()}) -> binary(). +%% LDR Rt, [Rn, #imm5*4] - 16-bit immediate offset (0-124, multiple of 4) +ldr(Rt, {Rn, Imm}) when + is_atom(Rt), + is_atom(Rn), + is_integer(Imm), + Imm >= 0, + Imm =< 124, + (Imm rem 4) =:= 0, + Rt =/= sp, + Rt =/= pc, + Rn =/= sp, + Rn =/= pc +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + Imm5 = Imm div 4, + %% Thumb LDR immediate: 01101iiiiinnnttt + <<(16#6800 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>; +%% LDR Rt, [SP, #imm8*4] - SP-relative load (0-1020, multiple of 4) +ldr(Rt, {sp, Imm}) when + is_atom(Rt), + is_integer(Imm), + Imm >= 0, + Imm =< 1020, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + Imm8 = Imm div 4, + %% Thumb LDR SP-relative: 10011tttiiiiiiii + <<(16#9800 bor (RtNum bsl 8) bor Imm8):16/little>>; +%% LDR Rt, [PC, #imm8*4] - PC-relative load (0-1020, multiple of 4) +ldr(Rt, {pc, Imm}) when + is_atom(Rt), + is_integer(Imm), + Imm >= 0, + Imm =< 1020, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + Imm8 = Imm div 4, + %% Thumb LDR PC-relative: 01001tttiiiiiiii + <<(16#4800 bor (RtNum bsl 8) bor Imm8):16/little>>; +%% LDR Rt, [Rn, Rm] - register offset +ldr(Rt, {Rn, Rm}) when + is_atom(Rt), + is_atom(Rn), + is_atom(Rm), + Rt =/= sp, + Rt =/= pc, + Rn =/= sp, + Rn =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% Thumb LDR register: 0101100mmmnnntttt + <<(16#5800 bor (RmNum bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>. + +%% ARMv6-M Thumb MOVS instruction (sets flags) +-spec movs(arm_gpr_register(), integer() | arm_gpr_register()) -> binary(). +%% MOVS immediate - 8-bit immediates only (0-255) +movs(Rd, Imm) when + is_atom(Rd), + is_integer(Imm), + Imm >= 0, + Imm =< 255 +-> + RdNum = reg_to_num(Rd), + %% Thumb MOVS immediate: 00100dddiiiiiiii + <<(16#2000 bor (RdNum bsl 8) bor Imm):16/little>>; +%% MOVS register - low registers only (both must be r0-r7) +movs(Rd, Rm) when + is_atom(Rd), is_atom(Rm) +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + case RdNum =< 7 andalso RmNum =< 7 of + true -> + %% Thumb MOVS register: 0000000000mmmdddd + <<(16#0000 bor (RmNum bsl 3) bor RdNum):16/little>>; + false -> + error({movs_requires_low_registers, {Rd, Rm}}) + end. + +%% ARMv6-M Thumb MOV instruction (no flags, for high registers) +-spec mov(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% MOV register - at least one register must be high (r8-r15) +mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + case RdNum >= 8 orelse RmNum >= 8 of + true -> + %% Thumb MOV high register: 01000110DMmmmdddd + D = + if + RdNum >= 8 -> 1; + true -> 0 + end, + M = + if + RmNum >= 8 -> 1; + true -> 0 + end, + RdLow = RdNum band 7, + RmLow = RmNum band 7, + <<(16#4600 bor (D bsl 7) bor (M bsl 6) bor (RmLow bsl 3) bor RdLow):16/little>>; + false -> + error({mov_requires_high_register, {Rd, Rm}}) + end. + +%% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4) +str(Rt, {Rn, Imm}) when + is_atom(Rt), + is_atom(Rn), + is_integer(Imm), + Rt =/= sp, + Rt =/= pc, + Rn =/= sp, + Rn =/= pc, + Imm >= 0, + Imm =< 124, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + Imm5 = Imm div 4, + %% Thumb STR immediate: 01100iiiiinnnttt + <<(16#6000 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>; +%% SP-relative STR (0-1020, multiple of 4) +str(Rt, {sp, Imm}) when + is_atom(Rt), + is_integer(Imm), + Rt =/= sp, + Rt =/= pc, + Imm >= 0, + Imm =< 1020, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + Imm8 = Imm div 4, + %% Thumb STR SP relative: 1001ttttiiiiiiiii + <<(16#9000 bor (RtNum bsl 8) bor Imm8):16/little>>; +%% STR Rt, [Rn, Rm] - register offset +str(Rt, {Rn, Rm}) when + is_atom(Rt), + is_atom(Rn), + is_atom(Rm), + Rt =/= sp, + Rt =/= pc, + Rn =/= sp, + Rn =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% Thumb STR register: 0101000mmmnnntttt + <<(16#5000 bor (RmNum bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>. + +%% Emit a store pair (STP) instruction for 64-bit registers +%% stp(Rn, Rm, {Base}, Imm) -> binary() +%% stp(Rn, Rm, {Base, Imm}, '!') -> binary() (store-update) +-spec stp( + arm_gpr_register(), + arm_gpr_register(), + {arm_gpr_register()} | {arm_gpr_register(), integer()}, + integer() | '!' +) -> binary(). +stp(Rn, Rm, {Base}, Imm) when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + %% STP encoding: 1010100010|imm7|base|rm|rn + %% 0xa9bf0000 | ((Imm div 8) band 0x7f) << 15 | Base << 5 | Rm << 10 | Rn + << + (16#A8800000 bor ((Imm div 8) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor RnNum):32/little + >>; +stp(Rn, Rm, {Base, Imm}, '!') when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + << + (16#A9800000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor + RnNum):32/little + >>. + +%% Emit a load pair (LDP) instruction for 64-bit registers +%% ldp(Rn, Rm, {Base}, Imm) -> binary() +-spec ldp(arm_gpr_register(), arm_gpr_register(), {arm_gpr_register()}, integer()) -> + binary(). +ldp(Rn, Rm, {Base}, Imm) when + is_atom(Rn), + is_atom(Rm), + is_atom(Base), + is_integer(Imm), + Imm >= -512, + Imm =< 504, + (Imm rem 8) =:= 0 +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + BaseNum = reg_to_num(Base), + %% LDP encoding: 1010100011|imm7|base|rm|rn + << + (16#A8C00000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor + RnNum):32/little + >>. + +%% Emit a conditional branch instruction +-spec bcc(cc(), integer()) -> binary(). +%% Special case: 'al' (always) condition uses unconditional branch for efficiency +bcc(al, Offset) when is_integer(Offset) -> + b(Offset); +bcc(Cond, Offset) when + is_atom(Cond), is_integer(Offset), Offset >= -252, Offset =< 258, (Offset rem 2) =:= 0 +-> + CondNum = cond_to_num(Cond), + %% Thumb conditional branch encoding (ARMv6-M): 1101cccciiiiiiiii + %% imm8 is (Offset / 2) signed, fits in 8 bits + %% ARMv6-M only supports 16-bit Thumb conditional branches with ±256B range + %% Adjust offset by -4 to match assembler behavior (PC+4 relative) + AdjustedOffset = Offset - 4, + Offset8 = AdjustedOffset div 2, + <<(16#D000 bor (CondNum bsl 8) bor (Offset8 band 16#FF)):16/little>>; +bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> + error({unencodable_offset, Offset}). + +%% ARMv6-M Thumb CMP instruction +-spec cmp(arm_gpr_register(), arm_gpr_register() | integer()) -> binary(). +%% CMP register-register form (low registers only) +cmp(Rn, Rm) when + is_atom(Rn), + is_atom(Rm), + Rn =/= sp, + Rn =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + case RnNum =< 7 andalso RmNum =< 7 of + true -> + %% Thumb CMP register: 0100001010mmmnnn + <<(16#4280 bor (RmNum bsl 3) bor RnNum):16/little>>; + false -> + error({cmp_requires_low_registers, {Rn, Rm}}) + end; +%% CMP register-immediate form (8-bit immediate 0-255) +cmp(Rn, Imm) when + is_atom(Rn), + is_integer(Imm), + Rn =/= sp, + Rn =/= pc, + Imm >= 0, + Imm =< 255 +-> + RnNum = reg_to_num(Rn), + case RnNum =< 7 of + true -> + %% Thumb CMP immediate: 00101nnniiiiiiiii + <<(16#2800 bor (RnNum bsl 8) bor Imm):16/little>>; + false -> + error({cmp_immediate_requires_low_register, Rn}) + end; +cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% Emit an AND instruction (bitwise AND) +%% ARMv6-M Thumb ANDS instruction (register only - no immediate support) +-spec ands(arm_gpr_register(), arm_gpr_register()) -> binary(). +ands(Rd, Rm) when + is_atom(Rd), + is_atom(Rm), + Rd =/= sp, + Rd =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb ANDS (2-operand): 0100000000mmmddd + <<(16#4000 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% ARMv6-M Thumb ORRS instruction (register only - sets flags) +-spec orrs(arm_gpr_register(), arm_gpr_register()) -> binary(). +orrs(Rd, Rm) when + is_atom(Rd), + is_atom(Rm), + Rd =/= sp, + Rd =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb ORRS (2-operand): 0100001100mmmddd + <<(16#4300 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% ARMv6-M Thumb logical shift left (LSLS) instructions +-spec lsls(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). +%% LSLS Rd, Rm, #imm5 - immediate shift (1-31) +lsls(Rd, Rm, Imm) when + is_atom(Rd), + is_atom(Rm), + is_integer(Imm), + Imm >= 1, + Imm =< 31, + Rd =/= sp, + Rd =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb LSLS immediate: 00000iiiiimmmddd + <<(16#0000 bor (Imm bsl 6) bor (RmNum bsl 3) bor RdNum):16/little>>. + +-spec lsls(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% LSLS Rdn, Rm - register shift (Rdn = Rdn << Rm) +lsls(Rdn, Rm) when + is_atom(Rdn), + is_atom(Rm), + Rdn =/= sp, + Rdn =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RdnNum = reg_to_num(Rdn), + RmNum = reg_to_num(Rm), + %% Thumb LSLS register: 0100000010mmmddd + <<(16#4080 bor (RmNum bsl 3) bor RdnNum):16/little>>. + +%% ARMv6-M Thumb logical shift right (LSRS) instructions +-spec lsrs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). +%% LSRS Rd, Rm, #imm5 - immediate shift (1-32) +lsrs(Rd, Rm, Imm) when + is_atom(Rd), + is_atom(Rm), + is_integer(Imm), + Imm >= 1, + Imm =< 32, + Rd =/= sp, + Rd =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb LSRS immediate: 00001iiiiimmmddd (imm5=0 means shift by 32) + Imm5 = + if + Imm =:= 32 -> 0; + true -> Imm + end, + <<(16#0800 bor (Imm5 bsl 6) bor (RmNum bsl 3) bor RdNum):16/little>>. + +-spec lsrs(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% LSRS Rdn, Rm - register shift (Rdn = Rdn >> Rm) +lsrs(Rdn, Rm) when + is_atom(Rdn), + is_atom(Rm), + Rdn =/= sp, + Rdn =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RdnNum = reg_to_num(Rdn), + RmNum = reg_to_num(Rm), + %% Thumb LSRS register: 0100000011mmmddd + <<(16#40C0 bor (RmNum bsl 3) bor RdnNum):16/little>>. + +%% ARMv6-M Thumb TST instruction (register only) +-spec tst(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% TST Rn, Rm - test bits (performs Rn & Rm, updates flags, low registers only) +tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + case RnNum =< 7 andalso RmNum =< 7 of + true -> + %% Thumb TST register: 0100001000mmmnnn + <<(16#4200 bor (RmNum bsl 3) bor RnNum):16/little>>; + false -> + error({tst_requires_low_registers, {Rn, Rm}}) + end. + +%% Emit a SUBS instruction (Thumb encoding) +%% SUBS Rd, #imm - subtracts immediate value from register and sets flags (2-operand form) +-spec subs(arm_gpr_register(), integer()) -> binary(). +subs(Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + subs(Rd, Rd, Imm); +subs(Rd, Imm) when is_atom(Rd), is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% SUBS Rd, Rn, #imm - subtracts immediate value from register and sets flags (3-operand form) +-spec subs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). +subs(Rd, Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + %% Thumb SUBS (immediate, 8-bit) encoding: 00111dddiiiiiiii (Rd = Rn) + RdNum = reg_to_num(Rd), + <<(16#3800 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>; +subs(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 -> + %% Thumb SUBS (immediate, 3-bit) encoding: 0001111iiinnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + <<(16#1E00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>; +subs(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}); +subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> + %% Thumb SUBS (register) encoding: 0001101mmmnnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + << + (16#1A00 bor ((RmNum band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little + >>. + +%% ARMv6-M Thumb address calculation (ADR) instruction +%% ADR is implemented as ADD Rd, PC, #imm8*4 in Thumb +%% In Thumb, PC = current_instruction_address + 4, so adr(Rd, N) means: +%% Rd = (current_pc + 4) + immediate = current_pc + (N - 4) + 4 = current_pc + N +-spec adr(arm_gpr_register(), integer()) -> binary(). +adr(Rd, Offset) when + is_atom(Rd), + is_integer(Offset), + Offset >= 4, + Offset =< 1024, + (Offset rem 4) =:= 0 +-> + RdNum = reg_to_num(Rd), + %% PC-relative offset in Thumb is (PC+4) + immediate + %% So for adr(Rd, N): immediate = N - 4 + Immediate = Offset - 4, + Imm8 = Immediate div 4, + %% Thumb ADR (ADD PC-relative): 10100dddiiiiiiii + <<(16#A000 bor (RdNum bsl 8) bor Imm8):16/little>>. + +%% Emit a MULS instruction (Thumb encoding) +%% MULS Rd, Rm - multiply Rd by Rm, store result in Rd (sets flags) +-spec muls(arm_gpr_register(), arm_gpr_register()) -> binary(). +muls(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> + %% Thumb MULS encoding: 0100001101mmmrrr (Rd is both source and destination) + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + <<(16#4340 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% ARMv6-M Thumb PUSH instruction +%% PUSH {register_list} - push registers to stack (low registers + optional LR) +-spec push([arm_gpr_register()]) -> binary(). +push(RegList) when is_list(RegList) -> + %% Process register list and build bitmask + {LowRegMask, LRBit} = process_reglist(RegList, lr), + %% Thumb PUSH encoding: 1011010Rlllllll where R=LR bit, lllllll=low register mask + <<(16#B400 bor (LRBit bsl 8) bor LowRegMask):16/little>>. + +%% ARMv6-M Thumb POP instruction +%% POP {register_list} - pop registers from stack (low registers + optional PC) +-spec pop([arm_gpr_register()]) -> binary(). +pop(RegList) when is_list(RegList) -> + %% Process register list and build bitmask + {LowRegMask, PCBit} = process_reglist(RegList, pc), + %% Thumb POP encoding: 1011110Plllllll where P=PC bit, lllllll=low register mask + <<(16#BC00 bor (PCBit bsl 8) bor LowRegMask):16/little>>. + +%% Generic helper function to process register lists for PUSH/POP +process_reglist(RegList, SpecialReg) -> + RegBits = lists:foldl( + fun(Reg, Acc) -> + Acc + (1 bsl reg_to_num(Reg)) + end, + 0, + RegList + ), + LowRegsBits = RegBits band 2#1111111, + SpecialRegBit = RegBits band (1 bsl reg_to_num(SpecialReg)), + if + RegBits =/= LowRegsBits + SpecialRegBit -> + error({invalid_register, RegBits - LowRegsBits - SpecialRegBit}); + SpecialRegBit =/= 0 -> + {LowRegsBits, 1}; + true -> + {LowRegsBits, 0} + end. diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index 77caa9d578..63c72493af 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -172,6 +172,7 @@ enum TrapAndLoadResult #define JIT_ARCH_X86_64 1 #define JIT_ARCH_AARCH64 2 +#define JIT_ARCH_ARMV6M 3 #define JIT_VARIANT_PIC 1 diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 70f46ccc09..6bd6323af8 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -28,6 +28,7 @@ set(ERLANG_MODULES jit_tests_common jit_aarch64_tests jit_aarch64_asm_tests + jit_armv6m_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl new file mode 100644 index 0000000000..c8e27b6327 --- /dev/null +++ b/tests/libs/jit/jit_armv6m_asm_tests.erl @@ -0,0 +1,546 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_armv6m_asm_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +adds_test_() -> + [ + ?_assertEqual( + asm(<<16#3038:16/little>>, "adds r0, #56"), jit_armv6m_asm:adds(r0, 56) + ), + ?_assertEqual( + asm(<<16#3038:16/little>>, "adds r0, r0, #56"), jit_armv6m_asm:adds(r0, r0, 56) + ), + ?_assertEqual( + asm(<<16#3000:16/little>>, "adds r0, #0"), jit_armv6m_asm:adds(r0, 0) + ), + ?_assertEqual( + asm(<<16#3101:16/little>>, "adds r1, #1"), jit_armv6m_asm:adds(r1, 1) + ), + ?_assertEqual( + asm(<<16#1C42:16/little>>, "adds r2, r0, #1"), jit_armv6m_asm:adds(r2, r0, 1) + ), + ?_assertEqual( + asm(<<16#18c9:16/little>>, "adds r1, r1, r3"), jit_armv6m_asm:adds(r1, r1, r3) + ), + ?_assertEqual( + asm(<<16#1850:16/little>>, "adds r0, r2, r1"), jit_armv6m_asm:adds(r0, r2, r1) + ) + ]. + +subs_test_() -> + [ + ?_assertEqual( + asm(<<16#3f38:16/little>>, "subs r7, #56"), jit_armv6m_asm:subs(r7, 56) + ), + ?_assertEqual( + asm(<<16#3f38:16/little>>, "subs r7, r7, #56"), jit_armv6m_asm:subs(r7, r7, 56) + ), + ?_assertEqual( + asm(<<16#3800:16/little>>, "subs r0, #0"), jit_armv6m_asm:subs(r0, 0) + ), + ?_assertEqual( + asm(<<16#1e42:16/little>>, "subs r2, r0, #1"), jit_armv6m_asm:subs(r2, r0, 1) + ), + ?_assertEqual( + asm(<<16#1ad1:16/little>>, "subs r1, r2, r3"), jit_armv6m_asm:subs(r1, r2, r3) + ) + ]. + +muls_test_() -> + [ + ?_assertEqual( + asm(<<16#4359:16/little>>, "muls r1, r3"), jit_armv6m_asm:muls(r1, r3) + ), + ?_assertEqual( + asm(<<16#4348:16/little>>, "muls r0, r1"), jit_armv6m_asm:muls(r0, r1) + ) + ]. + +b_test_() -> + [ + %% Thumb B (unconditional) encoding tests - ARMv6-M 16-bit only + ?_assertEqual( + asm(<<16#E7FE:16/little>>, "b .+0"), jit_armv6m_asm:b(0) + ), + ?_assertEqual( + asm(<<16#E006:16/little>>, "b .+16"), jit_armv6m_asm:b(16) + ), + ?_assertEqual( + asm(<<16#E7DE:16/little>>, "b .-64"), jit_armv6m_asm:b(-64) + ), + ?_assertEqual( + asm(<<16#E000:16/little>>, "b .+4"), jit_armv6m_asm:b(4) + ), + ?_assertEqual( + asm(<<16#E3FF:16/little>>, "b .+2050"), jit_armv6m_asm:b(2050) + ), + ?_assertEqual( + asm(<<16#E400:16/little>>, "b .-2044"), jit_armv6m_asm:b(-2044) + ), + %% Test error cases for offsets too large for ARMv6-M + ?_assertError({unencodable_offset, 2052}, jit_armv6m_asm:b(2052)), + ?_assertError({unencodable_offset, -2046}, jit_armv6m_asm:b(-2046)) + ]. + +blx_test_() -> + [ + %% Thumb BLX (register) encoding tests + ?_assertEqual( + asm(<<16#4780:16/little>>, "blx r0"), jit_armv6m_asm:blx(r0) + ), + ?_assertEqual( + asm(<<16#4788:16/little>>, "blx r1"), jit_armv6m_asm:blx(r1) + ), + ?_assertEqual( + asm(<<16#47E8:16/little>>, "blx r13"), jit_armv6m_asm:blx(r13) + ) + ]. + +bx_test_() -> + [ + %% Thumb BX (branch exchange) encoding tests + ?_assertEqual( + asm(<<16#4700:16/little>>, "bx r0"), jit_armv6m_asm:bx(r0) + ), + ?_assertEqual( + asm(<<16#4708:16/little>>, "bx r1"), jit_armv6m_asm:bx(r1) + ), + ?_assertEqual( + asm(<<16#4768:16/little>>, "bx r13"), jit_armv6m_asm:bx(r13) + ) + ]. + +ldr_test_() -> + [ + %% ARMv6-M Thumb LDR immediate offset (0-124, multiple of 4) + ?_assertEqual( + asm(<<16#6889:16/little>>, "ldr r1, [r1, #8]"), + jit_armv6m_asm:ldr(r1, {r1, 8}) + ), + ?_assertEqual( + asm(<<16#6982:16/little>>, "ldr r2, [r0, #24]"), + jit_armv6m_asm:ldr(r2, {r0, 24}) + ), + %% SP-relative load (0-1020, multiple of 4) + ?_assertEqual( + asm(<<16#9f00:16/little>>, "ldr r7, [sp, #0]"), + jit_armv6m_asm:ldr(r7, {sp, 0}) + ), + ?_assertEqual( + asm(<<16#9801:16/little>>, "ldr r0, [sp, #4]"), + jit_armv6m_asm:ldr(r0, {sp, 4}) + ), + %% PC-relative load (0-1020, multiple of 4) + ?_assertEqual( + asm(<<16#4a18:16/little>>, "ldr r2, [pc, #96]"), + jit_armv6m_asm:ldr(r2, {pc, 96}) + ), + %% Register offset + ?_assertEqual( + asm(<<16#58d1:16/little>>, "ldr r1, [r2, r3]"), + jit_armv6m_asm:ldr(r1, {r2, r3}) + ) + ]. + +movs_test_() -> + [ + %% ARMv6-M Thumb MOVS instructions (sets flags) + %% MOVS immediate (8-bit only, 0-255) + ?_assertEqual( + asm(<<16#2000:16/little>>, "movs r0, #0"), + jit_armv6m_asm:movs(r0, 0) + ), + ?_assertEqual( + asm(<<16#2101:16/little>>, "movs r1, #1"), + jit_armv6m_asm:movs(r1, 1) + ), + ?_assertEqual( + asm(<<16#22ff:16/little>>, "movs r2, #255"), + jit_armv6m_asm:movs(r2, 255) + ), + %% MOVS register - low registers only (r0-r7) + ?_assertEqual( + asm(<<16#0008:16/little>>, "movs r0, r1"), + jit_armv6m_asm:movs(r0, r1) + ), + ?_assertEqual( + asm(<<16#001a:16/little>>, "movs r2, r3"), + jit_armv6m_asm:movs(r2, r3) + ) + ]. + +mov_test_() -> + [ + %% ARMv6-M Thumb MOV instructions (no flags, for high registers) + %% MOV register - requires at least one high register (r8-r15) + ?_assertEqual( + asm(<<16#4680:16/little>>, "mov r8, r0"), + jit_armv6m_asm:mov(r8, r0) + ), + ?_assertEqual( + asm(<<16#4640:16/little>>, "mov r0, r8"), + jit_armv6m_asm:mov(r0, r8) + ), + ?_assertEqual( + asm(<<16#46c8:16/little>>, "mov r8, r9"), + jit_armv6m_asm:mov(r8, r9) + ) + ]. + +str_test_() -> + [ + %% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4) + ?_assertEqual( + asm(<<16#6089:16/little>>, "str r1, [r1, #8]"), + jit_armv6m_asm:str(r1, {r1, 8}) + ), + ?_assertEqual( + asm(<<16#6182:16/little>>, "str r2, [r0, #24]"), + jit_armv6m_asm:str(r2, {r0, 24}) + ), + %% SP-relative store (0-1020, multiple of 4) + ?_assertEqual( + asm(<<16#9700:16/little>>, "str r7, [sp, #0]"), + jit_armv6m_asm:str(r7, {sp, 0}) + ), + ?_assertEqual( + asm(<<16#9001:16/little>>, "str r0, [sp, #4]"), + jit_armv6m_asm:str(r0, {sp, 4}) + ), + %% Register offset + ?_assertEqual( + asm(<<16#50d1:16/little>>, "str r1, [r2, r3]"), + jit_armv6m_asm:str(r1, {r2, r3}) + ) + ]. + +cmp_test_() -> + [ + %% ARMv6-M Thumb CMP register (low registers only) + ?_assertEqual( + asm(<<16#4288:16/little>>, "cmp r0, r1"), + jit_armv6m_asm:cmp(r0, r1) + ), + ?_assertEqual( + asm(<<16#42bb:16/little>>, "cmp r3, r7"), + jit_armv6m_asm:cmp(r3, r7) + ), + %% ARMv6-M Thumb CMP immediate (8-bit, 0-255, low registers only) + ?_assertEqual( + asm(<<16#2800:16/little>>, "cmp r0, #0"), + jit_armv6m_asm:cmp(r0, 0) + ), + ?_assertEqual( + asm(<<16#2805:16/little>>, "cmp r0, #5"), + jit_armv6m_asm:cmp(r0, 5) + ), + ?_assertEqual( + asm(<<16#2fff:16/little>>, "cmp r7, #255"), + jit_armv6m_asm:cmp(r7, 255) + ) + ]. + +ands_test_() -> + [ + %% ARMv6-M Thumb ANDS register (2-operand: Rd = Rd AND Rm) + ?_assertEqual( + asm(<<16#4008:16/little>>, "ands r0, r1"), + jit_armv6m_asm:ands(r0, r1) + ), + ?_assertEqual( + asm(<<16#4011:16/little>>, "ands r1, r2"), + jit_armv6m_asm:ands(r1, r2) + ), + ?_assertEqual( + asm(<<16#401a:16/little>>, "ands r2, r3"), + jit_armv6m_asm:ands(r2, r3) + ) + ]. + +orrs_test_() -> + [ + %% ARMv6-M Thumb ORRS register (2-operand: Rd = Rd OR Rm, sets flags) + ?_assertEqual( + asm(<<16#4308:16/little>>, "orrs r0, r1"), + jit_armv6m_asm:orrs(r0, r1) + ), + ?_assertEqual( + asm(<<16#4311:16/little>>, "orrs r1, r2"), + jit_armv6m_asm:orrs(r1, r2) + ), + ?_assertEqual( + asm(<<16#431a:16/little>>, "orrs r2, r3"), + jit_armv6m_asm:orrs(r2, r3) + ) + ]. + +lsls_test_() -> + [ + %% ARMv6-M Thumb LSLS immediate shift (1-31) + ?_assertEqual( + asm(<<16#0148:16/little>>, "lsls r0, r1, #5"), + jit_armv6m_asm:lsls(r0, r1, 5) + ), + ?_assertEqual( + asm(<<16#0212:16/little>>, "lsls r2, r2, #8"), + jit_armv6m_asm:lsls(r2, r2, 8) + ), + %% LSLS register shift + ?_assertEqual( + asm(<<16#409a:16/little>>, "lsls r2, r3"), + jit_armv6m_asm:lsls(r2, r3) + ) + ]. + +lsrs_test_() -> + [ + %% ARMv6-M Thumb LSRS immediate shift (1-32) + ?_assertEqual( + asm(<<16#0948:16/little>>, "lsrs r0, r1, #5"), + jit_armv6m_asm:lsrs(r0, r1, 5) + ), + ?_assertEqual( + asm(<<16#0a12:16/little>>, "lsrs r2, r2, #8"), + jit_armv6m_asm:lsrs(r2, r2, 8) + ), + %% LSRS register shift + ?_assertEqual( + asm(<<16#40da:16/little>>, "lsrs r2, r3"), + jit_armv6m_asm:lsrs(r2, r3) + ) + ]. + +tst_test_() -> + [ + %% ARMv6-M Thumb TST instructions (register only, low registers) + %% TST Rn, Rm - test bits (performs Rn & Rm, updates flags) + ?_assertEqual( + asm(<<16#4208:16/little>>, "tst r0, r1"), + jit_armv6m_asm:tst(r0, r1) + ), + ?_assertEqual( + asm(<<16#421a:16/little>>, "tst r2, r3"), + jit_armv6m_asm:tst(r2, r3) + ), + ?_assertEqual( + asm(<<16#4239:16/little>>, "tst r1, r7"), + jit_armv6m_asm:tst(r1, r7) + ) + ]. + +bcc_test_() -> + [ + %% Thumb conditional branch encoding tests - ARMv6-M 16-bit only + ?_assertEqual( + asm(<<16#D0FE:16/little>>, "beq .+0"), jit_armv6m_asm:bcc(eq, 0) + ), + ?_assertEqual( + asm(<<16#D1FE:16/little>>, "bne .+0"), jit_armv6m_asm:bcc(ne, 0) + ), + ?_assertEqual( + asm(<<16#D1DE:16/little>>, "bne .-64"), jit_armv6m_asm:bcc(ne, -64) + ), + ?_assertEqual( + asm(<<16#D03E:16/little>>, "beq .+128"), jit_armv6m_asm:bcc(eq, 128) + ), + ?_assertEqual( + asm(<<16#D23E:16/little>>, "bcs .+128"), jit_armv6m_asm:bcc(cs, 128) + ), + ?_assertEqual( + asm(<<16#D33E:16/little>>, "bcc .+128"), jit_armv6m_asm:bcc(cc, 128) + ), + ?_assertEqual( + asm(<<16#D43E:16/little>>, "bmi .+128"), jit_armv6m_asm:bcc(mi, 128) + ), + ?_assertEqual( + asm(<<16#D53E:16/little>>, "bpl .+128"), jit_armv6m_asm:bcc(pl, 128) + ), + ?_assertEqual( + asm(<<16#D63E:16/little>>, "bvs .+128"), jit_armv6m_asm:bcc(vs, 128) + ), + ?_assertEqual( + asm(<<16#D83E:16/little>>, "bhi .+128"), jit_armv6m_asm:bcc(hi, 128) + ), + ?_assertEqual( + asm(<<16#D93E:16/little>>, "bls .+128"), jit_armv6m_asm:bcc(ls, 128) + ), + ?_assertEqual( + asm(<<16#DA3E:16/little>>, "bge .+128"), jit_armv6m_asm:bcc(ge, 128) + ), + ?_assertEqual( + asm(<<16#DB3E:16/little>>, "blt .+128"), jit_armv6m_asm:bcc(lt, 128) + ), + ?_assertEqual( + asm(<<16#DC3E:16/little>>, "bgt .+128"), jit_armv6m_asm:bcc(gt, 128) + ), + ?_assertEqual( + asm(<<16#DD3E:16/little>>, "ble .+128"), jit_armv6m_asm:bcc(le, 128) + ), + ?_assertEqual( + asm(<<16#E03E:16/little>>, "bal .+128"), jit_armv6m_asm:bcc(al, 128) + ), + ?_assertEqual( + asm(<<16#D07F:16/little>>, "beq .+258"), jit_armv6m_asm:bcc(eq, 258) + ), + ?_assertEqual( + asm(<<16#D180:16/little>>, "bne .-252"), jit_armv6m_asm:bcc(ne, -252) + ), + %% Test error cases for offsets too large for ARMv6-M + ?_assertError({unencodable_offset, 260}, jit_armv6m_asm:bcc(eq, 260)), + ?_assertError({unencodable_offset, -254}, jit_armv6m_asm:bcc(ne, -254)) + ]. + +adr_test_() -> + [ + %% ARMv6-M Thumb ADR (PC-relative address) - implemented as ADD Rd, PC, #imm + %% adr(Rd, N) means "Rd = current_PC + N" where PC is instruction address + %% Range: 4-1024, must be multiple of 4 + ?_assertEqual( + asm(<<16#a000:16/little>>, "adr r0, .+4"), + jit_armv6m_asm:adr(r0, 4) + ), + ?_assertEqual( + asm(<<16#a101:16/little>>, "adr r1, .+8"), + jit_armv6m_asm:adr(r1, 8) + ), + ?_assertEqual( + asm(<<16#a202:16/little>>, "adr r2, .+12"), + jit_armv6m_asm:adr(r2, 12) + ), + ?_assertEqual( + asm(<<16#a708:16/little>>, "adr r7, .+36"), + jit_armv6m_asm:adr(r7, 36) + ), + %% Test maximum offset value (1024 bytes) + ?_assertEqual( + asm(<<16#a0ff:16/little>>, "adr r0, .+1024"), + jit_armv6m_asm:adr(r0, 1024) + ) + ]. + +push_test_() -> + [ + %% ARMv6-M Thumb PUSH instruction (low registers + optional LR) + %% Single register push + ?_assertEqual( + asm(<<16#b401:16/little>>, "push {r0}"), + jit_armv6m_asm:push([r0]) + ), + %% Multiple register push + ?_assertEqual( + asm(<<16#b407:16/little>>, "push {r0, r1, r2}"), + jit_armv6m_asm:push([r0, r1, r2]) + ), + %% Push with LR + ?_assertEqual( + asm(<<16#b500:16/little>>, "push {lr}"), + jit_armv6m_asm:push([lr]) + ), + %% Push registers + LR + ?_assertEqual( + asm(<<16#b507:16/little>>, "push {r0, r1, r2, lr}"), + jit_armv6m_asm:push([r0, r1, r2, lr]) + ) + ]. + +pop_test_() -> + [ + %% ARMv6-M Thumb POP instruction (low registers + optional PC) + %% Single register pop + ?_assertEqual( + asm(<<16#bc01:16/little>>, "pop {r0}"), + jit_armv6m_asm:pop([r0]) + ), + %% Multiple register pop + ?_assertEqual( + asm(<<16#bc07:16/little>>, "pop {r0, r1, r2}"), + jit_armv6m_asm:pop([r0, r1, r2]) + ), + %% Pop with PC + ?_assertEqual( + asm(<<16#bd00:16/little>>, "pop {pc}"), + jit_armv6m_asm:pop([pc]) + ), + %% Pop registers + PC + ?_assertEqual( + asm(<<16#bd07:16/little>>, "pop {r0, r1, r2, pc}"), + jit_armv6m_asm:pop([r0, r1, r2, pc]) + ) + ]. + +asm(Bin, Str) -> + case erlang:system_info(machine) of + "ATOM" -> + Bin; + "BEAM" -> + case os:cmd("which arm-elf-as") of + [] -> + Bin; + _ -> + ok = file:write_file( + "test.S", ".arch armv6-m\n.thumb\n.syntax unified\n" ++ Str ++ "\n" + ), + Dump = os:cmd( + "arm-elf-as -c test.S -o test.o && arm-elf-objdump -j .text -D test.o" + ), + DumpBin = list_to_binary(Dump), + DumpLines = binary:split(DumpBin, <<"\n">>, [global]), + AsmBin = asm_lines(DumpLines, <<>>), + if + AsmBin =:= Bin -> + ok; + true -> + io:format( + "-------------------------------------------\n" + "~s\n" + "-------------------------------------------\n", + [Dump] + ) + end, + ?assertEqual(AsmBin, Bin), + Bin + end + end. + +asm_lines([<<" ", Tail/binary>> | T], Acc) -> + [_Offset, HexStr0] = binary:split(Tail, <<":\t">>), + [HexStr, _] = binary:split(HexStr0, <<"\t">>), + AssembledBin = hex_to_bin(HexStr, <<>>), + asm_lines(T, <>); +asm_lines([_OtherLine | T], Acc) -> + asm_lines(T, Acc); +asm_lines([], Acc) -> + Acc. + +hex_to_bin(<<>>, Acc) -> + Acc; +hex_to_bin(<<" ", Tail/binary>>, Acc) -> + hex_to_bin(Tail, Acc); +hex_to_bin(HexStr, Acc) -> + [HexChunk, Rest] = binary:split(HexStr, <<" ">>), + NumBits = byte_size(HexChunk) * 4, + HexVal = binary_to_integer(HexChunk, 16), + NewAcc = <>, + hex_to_bin(Rest, NewAcc). From 7796e5cbf44c6a2505f9636406fb8a25c6d044de Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 13:18:23 +0200 Subject: [PATCH 02/97] armv6m: initial backend commit Signed-off-by: Paul Guyot --- libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_armv6m.erl | 2060 +++++++++++++++++++++++++++ libs/jit/src/jit_armv6m_asm.erl | 21 +- tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_armv6m_tests.erl | 1806 +++++++++++++++++++++++ tests/libs/jit/tests.erl | 2 + 6 files changed, 3887 insertions(+), 4 deletions(-) create mode 100644 libs/jit/src/jit_armv6m.erl create mode 100644 tests/libs/jit/jit_armv6m_tests.erl diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 450e8048d1..7aad016575 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -29,6 +29,7 @@ set(ERLANG_MODULES jit_stream_mmap jit_aarch64 jit_aarch64_asm + jit_armv6m jit_armv6m_asm jit_x86_64 jit_x86_64_asm diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl new file mode 100644 index 0000000000..61ff78670e --- /dev/null +++ b/libs/jit/src/jit_armv6m.erl @@ -0,0 +1,2060 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_armv6m). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/2, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/3 +]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). + +-define(ASSERT(Expr), true = Expr). + +%% ARMv6-M AAPCS32 ABI: r0-r3 are used for argument passing and return value. +%% r0-r1 form a double-word for 64-bit returns, additional args passed on stack. +%% r4-r11 are callee-saved registers (must be preserved across calls), +%% r12 (IP) is intra-procedure-call scratch register, +%% r13 (SP) is stack pointer, +%% r14 (LR) is link register, +%% r15 (PC) is program counter. +%% ARMv6-M has no floating-point unit, so no FP registers available. +%% +%% See: Arm® Architecture Procedure Call Standard (AAPCS32) +%% https://developer.arm.com/documentation/ihi0042/latest/ +%% +%% Registers used by the JIT backend (ARMv6-M Thumb): +%% - Argument/return: r0-r3 +%% - Callee-saved: r4-r11 (must preserve) +%% - Scratch: r12 (IP) - intra-procedure call +%% - Stack pointer: r13 (SP) +%% - Link register: r14 (LR) +%% - Program counter: r15 (PC) +%% - Available for JIT scratch: r12 only (very limited!) +%% +%% Note: ARMv6-M Thumb instructions are mostly 16-bit with limited +%% register access (many instructions only work with r0-r7). +%% +%% For more details, refer to the AAPCS32 Procedure Call Standard. + +-type armv6m_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15. + +-define(IS_GPR(Reg), + (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse + Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7 orelse Reg =:= r8 orelse Reg =:= r9 orelse + Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse + Reg =:= r15) +). +%% ARMv6-M has no FPU, so no FP registers +-define(IS_FPR(_Reg), false). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [armv6m_register()], + available_fpregs :: [armv6m_register()], + used_regs :: [armv6m_register()] +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, armv6m_register()}. +-type value() :: immediate() | vm_register() | armv6m_register() | {ptr, armv6m_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()}. + +-type maybe_free_armv6m_register() :: + {free, armv6m_register()} | armv6m_register(). + +-type condition() :: + {armv6m_register(), '<', integer()} + | {maybe_free_armv6m_register(), '<', armv6m_register()} + | {maybe_free_armv6m_register(), '==', integer()} + | {maybe_free_armv6m_register(), '!=', armv6m_register() | integer()} + | {'(int)', maybe_free_armv6m_register(), '==', integer()} + | {'(int)', maybe_free_armv6m_register(), '!=', armv6m_register() | integer()} + | {'(bool)', maybe_free_armv6m_register(), '==', false} + | {'(bool)', maybe_free_armv6m_register(), '!=', false} + | {maybe_free_armv6m_register(), '&', non_neg_integer(), '!=', integer()}. + +% ctx->e is 0x28 +% ctx->x is 0x30 +-define(CTX_REG, r0). +-define(JITSTATE_REG, r1). +-define(NATIVE_INTERFACE_REG, r2). +%% ARMv6-M uses 4-byte word size, so adjust all offsets +-define(Y_REGS, {?CTX_REG, 16#28}). +-define(X_REG(N), {?CTX_REG, 16#30 + (N * 4)}). +-define(CP, {?CTX_REG, 16#B8}). +-define(FP_REGS, {?CTX_REG, 16#C0}). +-define(BS, {?CTX_REG, 16#C8}). +-define(BS_OFFSET, {?CTX_REG, 16#D0}). +-define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). +-define(JITSTATE_CONTINUATION, {?JITSTATE_REG, 16#4}). +-define(JITSTATE_REDUCTIONCOUNT, {?JITSTATE_REG, 16#8}). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +% aarch64 ABI specific +%% ARMv6-M register mappings + +%% Link register +-define(LR_REG, r14). +%% Intra-procedure call scratch register +-define(IP_REG, r12). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). + +%% ARMv6-M register allocation: +%% - r0: context pointer (reserved) +%% - r1, r3: available (r1 saved/restored, r3 can be parameter) +%% - r2: parameter register (not available for scratch) +%% - r4-r7: callee-saved (saved/restored on entry/exit) +%% - r8-r11: high registers, limited Thumb access +%% - r12: intra-procedure call scratch +%% - r13 (SP), r14 (LR), r15 (PC): special purpose +%% Reorder to match AArch64 test expectations (r7 first) +-define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1, r12]). +-define(AVAILABLE_FPREGS, []). +-define(PARAMETER_REGS, [r0, r1, r2, r3]). +%% ARMv6-M has no FPU +-define(PARAMETER_FPREGS, []). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(_Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + available_fpregs = ?AVAILABLE_FPREGS, + used_regs = [] + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:brk(0)), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [armv6m_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [armv6m_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, available_fpregs = AvailableFP0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, AvailableFP1, Used1} = free_reg(Available0, AvailableFP0, Used0, Reg), + State#state{available_regs = Available1, available_fpregs = AvailableFP1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, available_fpregs = ?AVAILABLE_FPREGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + Offset = StreamModule:offset(Stream0), + BranchInstr = jit_armv6m_asm:b(0), + Reloc = {N, Offset, b}, + Stream1 = StreamModule:append(Stream0, BranchInstr), + jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @param Labels list of tuples with label, offset and size of the branch in bits +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state(), [{non_neg_integer(), non_neg_integer()}]) -> state(). +update_branches(#state{branches = []} = State, _Labels) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT] + } = State, + Labels +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {bcc, CC} -> jit_armv6m_asm:bcc(CC, Rel); + {adr, Reg} -> jit_armv6m_asm:adr(Reg, Rel); + b -> jit_armv6m_asm:b(Rel) + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + update_branches(State#state{stream = Stream1, branches = BranchesT}, Labels). + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), armv6m_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Primitive, + Args +) -> + PrepCall = + case Primitive of + 0 -> + jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, 0}); + N -> + jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, N * 8}) + end, + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{stream = Stream1}, + call_func_ptr(StateCall, {free, ?IP_REG}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = + case Primitive of + 0 -> + jit_armv6m_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, 0}); + N -> + jit_armv6m_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, N * 8}) + end, + Stream1 = StreamModule:append(Stream0, PrepCall), + State1 = set_args( + State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + Args + ), + #state{stream = Stream2} = State1, + Call = jit_armv6m_asm:br(Temp), + Stream3 = StreamModule:append(Stream2, Call), + State1#state{stream = Stream3, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + available_fpregs = AvailableFPRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + I1 = jit_armv6m_asm:cmp(Reg, ?CTX_REG), + I3 = + case Reg of + % Return value is already in r0 + r0 -> <<>>; + % Move to r0 (return register) + _ -> jit_armv6m_asm:mov(r0, Reg) + end, + I4 = jit_armv6m_asm:ret(), + I2 = jit_armv6m_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches} = State, Label +) -> + Offset = StreamModule:offset(Stream0), + % Placeholder offset, will be patched + I1 = jit_armv6m_asm:b(0), + Reloc = {Label, Offset, b}, + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1, branches = [Reloc | AccBranches]}. + +%% @private +-spec rewrite_branch_instruction( + jit_armv6m_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer() +) -> binary(). +rewrite_branch_instruction({cbnz, Reg}, Offset) -> + jit_armv6m_asm:cbnz(Reg, Offset); +rewrite_branch_instruction({cbnz_w, Reg}, Offset) -> + jit_armv6m_asm:cbnz_w(Reg, Offset); +rewrite_branch_instruction({tbz, Reg, Bit}, Offset) -> + jit_armv6m_asm:tbz(Reg, Bit, Offset); +rewrite_branch_instruction({tbnz, Reg, Bit}, Offset) -> + jit_armv6m_asm:tbnz(Reg, Bit, Offset); +rewrite_branch_instruction(CC, Offset) when is_atom(CC) -> + jit_armv6m_asm:bcc(CC, Offset). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, CC, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, CC} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, CC}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = jit_armv6m_asm:bcc(CC, BranchOffset), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + NewBranchInstr = rewrite_branch_instruction(CC, BranchOffset), + Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + ElseJumpInstr = jit_armv6m_asm:b(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + NewBranchInstr = rewrite_branch_instruction(CC, ElseBranchOffset), + Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs, + available_fpregs = State1#state.available_fpregs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_armv6m_asm:b(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> + { + state(), + jit_armv6m_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, + non_neg_integer() + }. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + I = jit_armv6m_asm:tbz(Reg, 63, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = State0#state{stream = Stream1}, + {State1, {tbz, Reg, 63}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + I1 = jit_armv6m_asm:cmp(Reg, Val), + % ge = greater than or equal + I2 = jit_armv6m_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}, + {State1, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp(Reg, RegB), + % ge = greater than or equal + I2 = jit_armv6m_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}, + {State1, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I = jit_armv6m_asm:cbnz(Reg, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {cbnz, Reg}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I = jit_armv6m_asm:cbnz_w(Reg, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {cbnz_w, Reg}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(int)', RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp_w(Reg, Val), + I2 = jit_armv6m_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) orelse ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp(Reg, Val), + I2 = jit_armv6m_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, eq, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(int)', RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp_w(Reg, Val), + I2 = jit_armv6m_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, eq, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp(Reg, Val), + I2 = jit_armv6m_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test lowest bit + I = jit_armv6m_asm:tbnz(Reg, 0, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {tbnz, Reg, 0}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test lowest bit + I = jit_armv6m_asm:tbz(Reg, 0, 0), + Stream1 = StreamModule:append(Stream0, I), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {tbz, Reg, 0}, 0}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test bits + TestCode = + try + jit_armv6m_asm:tst(Reg, Val) + catch + error:{unencodable_immediate, Val} -> + TestCode0 = jit_armv6m_asm:mov(Temp, Val), + TestCode1 = jit_armv6m_asm:tst(Reg, Temp), + <> + end, + I2 = jit_armv6m_asm:bcc(eq, 0), + Code = << + TestCode/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, eq, byte_size(TestCode)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % AND with mask + OffsetBefore = StreamModule:offset(Stream0), + State1 = op_imm(State0, and_, Temp, Reg, Mask), + Stream1 = State1#state.stream, + % Compare with value + I2 = jit_armv6m_asm:cmp(Temp, Val), + Stream2 = StreamModule:append(Stream1, I2), + OffsetAfter = StreamModule:offset(Stream2), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream3 = StreamModule:append(Stream2, I3), + State2 = State1#state{stream = Stream3}, + {State2, eq, OffsetAfter - OffsetBefore}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % AND with mask + OffsetBefore = StreamModule:offset(Stream0), + State1 = and_(State0, Reg, Mask), + Stream1 = State1#state.stream, + % Compare with value + I2 = jit_armv6m_asm:cmp(Reg, Val), + Stream2 = StreamModule:append(Stream1, I2), + OffsetAfter = StreamModule:offset(Stream2), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream3 = StreamModule:append(Stream2, I3), + State3 = State1#state{stream = Stream3}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, eq, OffsetAfter - OffsetBefore}. + +-spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, available_fpregs = AvFR0, used_regs = UR0} = State0, + {AvR1, AvFR1, UR1} = free_reg(AvR0, AvFR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + available_fpregs = AvFR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), [armv6m_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0, available_fpregs = AvFR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + AvFR1 = lists:delete(Reg, AvFR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1, available_fpregs = AvFR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_armv6m_asm:lsr(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_armv6m_asm:lsl(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, armv6m_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), armv6m_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + available_fpregs = AvailableFP0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, ?IP_REG}) -> []; + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + SavedRegs = [?LR_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + {SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0), + + % Set up arguments following AArch64 calling convention + State1 = set_args(State0#state{stream = Stream1}, Args), + #state{stream = Stream2} = State1, + + {FuncPtrReg, Stream3} = + case FuncPtrTuple of + {free, Reg} -> + {Reg, Stream2}; + {primitive, Primitive} -> + % We use r16 for the address. + PrepCall = + case Primitive of + 0 -> + jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, 0}); + N -> + jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, N * 8}) + end, + {?IP_REG, StreamModule:append(Stream2, PrepCall)} + end, + + % Call the function pointer (using BLR for call with return) + Call = jit_armv6m_asm:blr(FuncPtrReg), + Stream4 = StreamModule:append(Stream3, Call), + + % If r0 is in used regs, save it to another temporary register + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + {Stream5, ResultReg} = + case lists:member(r0, SavedRegs) of + true -> + [Temp | _] = AvailableRegs1, + {StreamModule:append(Stream4, jit_armv6m_asm:mov(Temp, r0)), Temp}; + false -> + {Stream4, r0} + end, + + Stream6 = pop_registers(SavedRegsOdd, lists:reverse(SavedRegs), StreamModule, Stream5), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + AvailableFP1 = FreeRegs ++ AvailableFP0, + AvailableFP2 = lists:delete(ResultReg, AvailableFP1), + AvailableFP3 = ?AVAILABLE_FPREGS -- (?AVAILABLE_FPREGS -- AvailableFP2), + UsedRegs2 = [ResultReg | UsedRegs1], + { + State1#state{ + stream = Stream6, + available_regs = AvailableRegs3, + available_fpregs = AvailableFP3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +push_registers([RegA, RegB | Tail], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:stp(RegA, RegB, {sp, -16}, '!')), + push_registers(Tail, StreamModule, Stream1); +push_registers([], _StreamModule, Stream0) -> + {false, Stream0}; +push_registers([RegA], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:str(RegA, {sp, -16}, '!')), + {true, Stream1}. + +pop_registers(true, [Reg | Tail], StreamModule, Stream0) -> + % Odd number of registers, pop the last one first + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:ldr(Reg, {sp}, 16)), + pop_registers(false, Tail, StreamModule, Stream1); +pop_registers(false, [], _StreamModule, Stream0) -> + Stream0; +pop_registers(false, [RegB, RegA | Tail], StreamModule, Stream0) -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:ldp(RegA, RegB, {sp}, 16)), + pop_registers(false, Tail, StreamModule, Stream1). + +-spec set_args(state(), [arg()]) -> state(). +set_args( + #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, Args +) -> + ParamRegs = parameter_regs(Args), + ArgsRegs = args_regs(Args), + AvailableScratchGP = + [rdi, rsi, rdx, rcx, r8, r9, r10, r11] -- ParamRegs -- ArgsRegs -- UsedRegs, + AvailableScratchFP = ?AVAILABLE_FPREGS -- ParamRegs -- ArgsRegs -- UsedRegs, + Offset = StreamModule:offset(Stream0), + Args1 = [ + case Arg of + offset -> Offset; + _ -> Arg + end + || Arg <- Args + ], + SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, AvailableScratchFP, []), + Stream1 = StreamModule:append(Stream0, SetArgsCode), + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State0#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + available_fpregs = ?AVAILABLE_FPREGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, ?PARAMETER_FPREGS, []). + +parameter_regs0([], _, _, Acc) -> + lists:reverse(Acc); +parameter_regs0([Special | T], [GPReg | GPRegsT], FPRegs, Acc) when + Special =:= ctx orelse Special =:= jit_state orelse Special =:= offset +-> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([{free, Free} | T], GPRegs, FPRegs, Acc) -> + parameter_regs0([Free | T], GPRegs, FPRegs, Acc); +parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([Reg | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([Reg | T], GPRegs, [FPReg | FPRegsT], Acc) when ?IS_FPR(Reg) -> + parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); +parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); +parameter_regs0([{fp_reg, _} | T], GPRegs, [FPReg | FPRegsT], Acc) -> + parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); +parameter_regs0([Int | T], [GPReg | GPRegsT], FPRegs, Acc) when is_integer(Int) -> + parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_args0([], [], [], _AvailGP, _AvailFP, Acc) -> + list_to_binary(lists:reverse(Acc)); +set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc) -> + set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0( + [jit_state | ArgsT], + [?JITSTATE_REG | ArgsRegs], + [?JITSTATE_REG | ParamRegs], + AvailGP, + AvailFP, + Acc +) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0( + [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, AvailFP, Acc +) -> + false = lists:member(ParamReg, ArgsRegs), + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [ + jit_armv6m_asm:mov(ParamReg, ?JITSTATE_REG) | Acc + ]); +% ctx is special as we need it to access x_reg/y_reg/fp_reg +set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> + false = lists:member(?CTX_REG, ArgsRegs), + J = set_args1(Arg, ?CTX_REG), + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); +set_args0( + [Arg | ArgsT], + [_ArgReg | ArgsRegs], + [ParamReg | ParamRegs], + [Avail | AvailGPT] = AvailGP, + AvailFP, + Acc +) -> + J = set_args1(Arg, ParamReg), + case lists:member(ParamReg, ArgsRegs) of + false -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); + true -> + I = jit_armv6m_asm:mov(Avail, ParamReg), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, AvailFP, [J, I | Acc]) + end. + +set_args1(Reg, Reg) -> + []; +set_args1({x_reg, extra}, Reg) -> + jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)); +set_args1({x_reg, X}, Reg) -> + jit_armv6m_asm:ldr(Reg, ?X_REG(X)); +set_args1({ptr, Source}, Reg) -> + jit_armv6m_asm:ldr(Reg, {Source, 0}); +set_args1({y_reg, X}, Reg) -> + [ + jit_armv6m_asm:ldr(Reg, ?Y_REGS), + jit_armv6m_asm:ldr(Reg, {Reg, X * 8}) + ]; +set_args1(ArgReg, Reg) when ?IS_GPR(ArgReg) -> + jit_armv6m_asm:mov(Reg, ArgReg); +set_args1(Arg, Reg) when is_integer(Arg) -> + jit_armv6m_asm:mov(Reg, Arg). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + I1 = jit_armv6m_asm:str(Src, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + I1 = jit_armv6m_asm:str(Src, ?X_REG(X)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_armv6m_asm:str(Src, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp | _]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), + I2 = jit_armv6m_asm:str(Src, {Temp, Y * 4}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | _]} = State0, N, {y_reg, Y}) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_armv6m_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_armv6m_asm:movs(Temp2, N), + I3 = jit_armv6m_asm:str(Temp2, {Temp1, Y * 4}), + Stream1 = (State0#state.stream_module):append( + State0#state.stream, <> + ), + State0#state{stream = Stream1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_armv6m_asm:movs(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + StreamModule = State0#state.stream_module, + Stream0 = State0#state.stream, + CurrentOffset = StreamModule:offset(Stream0), + + %% Calculate where literal will be placed (must be word-aligned) + %% After LDR (2 bytes) + Branch (2 bytes) = 4 bytes from current position + OffsetAfterInstructions = CurrentOffset + 4, + %% Find next word-aligned position for literal + LiteralPosition = + case OffsetAfterInstructions rem 4 of + % Already aligned + 0 -> OffsetAfterInstructions; + % Add 2 bytes padding to align + _ -> OffsetAfterInstructions + 2 + end, + PaddingNeeded = LiteralPosition - OffsetAfterInstructions, + + %% Calculate LDR PC-relative offset + %% PC = (current_instruction_address & ~3) + 4 + LdrInstructionAddr = CurrentOffset, + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + + %% Generate: ldr rTemp, [pc, #LiteralOffset] ; Load from literal + I1 = jit_armv6m_asm:ldr(Temp, {pc, LiteralOffset}), + %% Calculate branch offset + %% Branch is at CurrentOffset + 2, need to jump past literal + BranchPosition = CurrentOffset + 2, + % After the 4-byte literal + TargetPosition = LiteralPosition + 4, + BranchOffset = TargetPosition - BranchPosition, + I2 = jit_armv6m_asm:b(BranchOffset), + %% Generate padding if needed (just zeros) + Padding = + case PaddingNeeded of + 0 -> <<>>; + % 2 bytes of padding + 2 -> <<0:16>> + end, + %% Generate: .word N ; The 32-bit literal + I3 = <>, + + Stream1 = StreamModule:append(Stream0, <>), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(X)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_armv6m_asm:ldr(Temp, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(Temp, {Temp, Y * 4}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register( + #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, + Reg, + {fp_reg, F} +) when is_atom(Reg) -> + I1 = jit_armv6m_asm:ldr(Temp, ?FP_REGS), + I2 = jit_armv6m_asm:str(Reg, {Temp, F * 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + armv6m_register(), + non_neg_integer() | armv6m_register(), + vm_register() | armv6m_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), + I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), + I2 = jit_armv6m_asm:str(Temp, {Dest, 0}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Temp1, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}), + I3 = jit_armv6m_asm:str(Temp2, {Temp1, Y * 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), + I3 = jit_armv6m_asm:str(Reg, {Temp, Y * 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Dest, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0, + available_fpregs = AvailableFPRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), + I2 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0, + available_fpregs = AvailableFPRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), + I2 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] = AvailableRegs0, + used_regs = UsedRegs0, + available_fpregs = AvailableFPRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when ?IS_GPR(IndexReg) -> + I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), + I3 = jit_armv6m_asm:str(IndexReg, {Temp, Y * 4}), + {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( + AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + available_fpregs = AvailableFPRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element(state(), armv6m_register(), non_neg_integer()) -> + {state(), armv6m_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | armv6m_register(), armv6m_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:str(ValueReg, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_armv6m_asm:str(ValueReg, {Reg, IndexReg, lsl, 2}), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_armv6m_asm:add(Temp, IndexReg, Offset), + I2 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp, lsl, 2}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_armv6m_asm:add(Temp, IndexReg, Offset), + I2 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp, lsl, 2}), + Stream1 = (State1#state.stream_module):append(State1#state.stream, <>), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value()) -> {state(), armv6m_register()}. +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_armv6m_asm:ldr(Reg, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + Imm +) when + is_integer(Imm) +-> + I1 = jit_armv6m_asm:mov(Reg, Imm), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(Reg, {Reg, Y * 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + available_fpregs = [FPReg | AvailFT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + I1 = jit_armv6m_asm:ldr(Temp, ?FP_REGS), + I2 = jit_armv6m_asm:ldr_d(FPReg, {Temp, F * 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_fpregs = AvailFT, used_regs = [FPReg | Used]}, FPReg}. + +-spec move_to_native_register(state(), value(), armv6m_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) orelse is_integer(RegSrc) -> + I = jit_armv6m_asm:mov(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_armv6m_asm:ldr(RegDst, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst +) -> + I1 = jit_armv6m_asm:ldr(RegDst, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(RegDst, {RegDst, Y * 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State, + {fp_reg, F}, + RegDst +) -> + I1 = jit_armv6m_asm:ldr(Temp, ?FP_REGS), + I2 = jit_armv6m_asm:ldr_d(RegDst, {Temp, F * 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), armv6m_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_armv6m_asm:mov(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_armv6m_asm:ldr(SaveReg, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + {y_reg, Y} +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(Reg, {Reg, Y * 4}), + I3 = jit_armv6m_asm:str(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), + I2 = jit_armv6m_asm:add(Reg, Reg, Offset * 4), + I3 = jit_armv6m_asm:str(Reg, ?Y_REGS), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State, + Label +) -> + Offset = StreamModule:offset(Stream0), + I1 = jit_armv6m_asm:adr(Temp, 0), + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]}. + +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + I1 = jit_armv6m_asm:adr(Temp, 0), + Reloc = {OffsetRef, Offset, {adr, Temp}}, + I2 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%% @doc Implement a continuation entry point. +%% TODO: push r4-r7 and lr +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?JITSTATE_MODULE), + I2 = jit_armv6m_asm:ldr_w(Reg, ?MODULE_INDEX(Reg)), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]}, + Reg + }. + +op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, Reg, Reg, Val) -> + Stream1 = + try + I = jit_armv6m_asm:Op(Reg, Reg, Val), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + [Temp | _] = State#state.available_regs, + I1 = jit_armv6m_asm:mov(Temp, Val), + I2 = jit_armv6m_asm:Op(Reg, Reg, Temp), + StreamModule:append(Stream0, <>) + end, + State#state{stream = Stream1}; +op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, RegB, Val) -> + Stream1 = + try + I = jit_armv6m_asm:Op(RegA, RegB, Val), + StreamModule:append(Stream0, I) + catch + error:{unencodable_immediate, Val} -> + MoveI = jit_armv6m_asm:mov(RegA, Val), + AndI = jit_armv6m_asm:Op(RegA, RegB, RegA), + StreamModule:append(Stream0, <>) + end, + State#state{stream = Stream1}. + +and_(State, Reg, Val) -> + op_imm(State, and_, Reg, Reg, Val). + +or_(State, Reg, Val) -> + op_imm(State, orr, Reg, Reg, Val). + +add(State, Reg, Val) -> + op_imm(State, add, Reg, Reg, Val). + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + I1 = jit_armv6m_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_armv6m_asm:lsl(Temp, Reg, 1), + I2 = jit_armv6m_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_armv6m_asm:lsl(Temp, Reg, 2), + I2 = jit_armv6m_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_armv6m_asm:lsl(Temp, Reg, 3), + I2 = jit_armv6m_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_armv6m_asm:lsl(Temp, Reg, 3), + I2 = jit_armv6m_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_armv6m_asm:lsl(Temp, Reg, 4), + I2 = jit_armv6m_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + I1 = jit_armv6m_asm:mov(Temp, Val), + I2 = jit_armv6m_asm:mul(Reg, Reg, Temp), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}. + +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_armv6m_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + % Decrement reduction count + I2 = jit_armv6m_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_armv6m_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = jit_armv6m_asm:bcc(ne, 0), + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + I5 = jit_armv6m_asm:adr(Temp, 0), + I6 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_armv6m_asm:bcc(ne, NewOffset - BNEOffset), + NewI5 = jit_armv6m_asm:adr(Temp, NewOffset - ADROffset), + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, RewriteSize} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, RewriteSize). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = Branches, + available_regs = [Temp | _] + } = State0, + Label +) -> + % Load reduction count + I1 = jit_armv6m_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + % Decrement reduction count + I2 = jit_armv6m_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_armv6m_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch to label if reduction count is not zero + I4 = jit_armv6m_asm:bcc(ne, 0), + Reloc1 = {Label, BNEOffset, {bcc, ne}}, + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}, + State2 = set_continuation_to_label(State1, Label), + call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, RewriteSize} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, RewriteSize). + +-spec set_cp(state()) -> {state(), non_neg_integer(), 4 | 8}. +set_cp(State0) -> + % get module index (dynamically) + {#state{stream_module = StreamModule, stream = Stream0} = State1, Reg} = get_module_index( + State0 + ), + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_armv6m_asm:lsl(Reg, Reg, 24), + if + Offset >= 16250 -> + I2 = jit_armv6m_asm:nop(), + I3 = jit_armv6m_asm:nop(), + RewriteSize = 8; + true -> + I2 = jit_armv6m_asm:nop(), + I3 = <<>>, + RewriteSize = 4 + end, + MOVOffset = Offset + byte_size(I1), + I4 = jit_armv6m_asm:orr(Reg, Reg, ?IP_REG), + I5 = jit_armv6m_asm:str(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + {State3, MOVOffset, RewriteSize}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), 4 | 8) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + _RewriteSize +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + NewMoveInstr = jit_armv6m_asm:mov(?IP_REG, NewOffset bsl 2), + ?ASSERT(byte_size(NewMoveInstr) =< _RewriteSize), + Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), + State0#state{stream = Stream1}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + TermReg +) -> + I1 = jit_armv6m_asm:str(TermReg, ?BS), + I2 = jit_armv6m_asm:movs(Temp, 0), + I3 = jit_armv6m_asm:str(Temp, ?BS_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLabels labels information, sorted by offset +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + SortedLabels, + SortedLines +) -> + I1 = jit_armv6m_asm:adr(r0, 8), + I2 = jit_armv6m_asm:ret(), + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, AvailableFPRegs0, UsedRegs1}; +free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_FPR(Reg) -> + AvailableFPRegs1 = free_reg0(?AVAILABLE_FPREGS, AvailableFPRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs0, AvailableFPRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> ?JITSTATE_REG; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG + end, + Args + ). diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index 44c8cbc1cf..b569140a62 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -289,9 +289,21 @@ movs(Rd, Rm) when error({movs_requires_low_registers, {Rd, Rm}}) end. -%% ARMv6-M Thumb MOV instruction (no flags, for high registers) --spec mov(arm_gpr_register(), arm_gpr_register()) -> binary(). -%% MOV register - at least one register must be high (r8-r15) +%% ARMv6-M Thumb MOV instruction - handle both immediate and register moves +-spec mov(arm_gpr_register(), arm_gpr_register() | integer()) -> binary(). +%% MOV immediate (using MOVS for low registers with immediate 0-255) +mov(Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + RdNum = reg_to_num(Rd), + case RdNum =< 7 of + true -> + %% Use MOVS for low registers with immediate + movs(Rd, Imm); + false -> + %% For high registers, need to use a different approach + %% ARMv6-M doesn't support immediate moves to high registers directly + error({unsupported_immediate_to_high_register, Rd, Imm}) + end; +%% MOV register - handle both high and low register cases mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), @@ -312,7 +324,8 @@ mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> RmLow = RmNum band 7, <<(16#4600 bor (D bsl 7) bor (M bsl 6) bor (RmLow bsl 3) bor RdLow):16/little>>; false -> - error({mov_requires_high_register, {Rd, Rm}}) + %% For low registers, use ADDS Rd, Rm, #0 (ARMv6-M standard practice) + adds(Rd, Rm, 0) end. %% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4) diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 6bd6323af8..26ab6b4ecc 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -28,6 +28,7 @@ set(ERLANG_MODULES jit_tests_common jit_aarch64_tests jit_aarch64_asm_tests + jit_armv6m_tests jit_armv6m_asm_tests jit_x86_64_tests jit_x86_64_asm_tests diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl new file mode 100644 index 0000000000..6f90b15866 --- /dev/null +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -0,0 +1,1806 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_armv6m_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_armv6m). + +% disassembly obtained with: +% arm-elf-objdump -b binary -D dump.bin -M arm + +call_primitive_0_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400050 ldr x16, [x2]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400450 ldr x16, [x2, #8]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400850 ldr x16, [x2, #16]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d2800541 mov x1, #0x2a // #42\n" + " 10: d2800562 mov x2, #0x2b // #43\n" + " 14: d2800583 mov x3, #0x2c // #44\n" + " 18: d63f0200 blr x16\n" + " 1c: aa0003e7 mov x7, x0\n" + " 20: a8c10be1 ldp x1, x2, [sp], #16\n" + " 24: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + "\n" + " 0: f9404850 ldr x16, [x2, #144]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d2800261 mov x1, #0x13 // #19\n" + " 10: d63f0200 blr x16\n" + " 14: aa0003e7 mov x7, x0\n" + " 18: a8c10be1 ldp x1, x2, [sp], #16\n" + " 1c: a8c103fe ldp x30, x0, [sp], #16\n" + " 20: f9404850 ldr x16, [x2, #144]\n" + " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 2c: f81f0fe7 str x7, [sp, #-16]!\n" + " 30: d2800281 mov x1, #0x14 // #20\n" + " 34: d63f0200 blr x16\n" + " 38: aa0003e8 mov x8, x0\n" + " 3c: f84107e7 ldr x7, [sp], #16\n" + " 40: a8c10be1 ldp x1, x2, [sp], #16\n" + " 44: a8c103fe ldp x30, x0, [sp], #16\n" + " 48: f9404850 ldr x16, [x2, #144]\n" + " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 54: a9bf1fe8 stp x8, x7, [sp, #-16]!\n" + " 58: d2800261 mov x1, #0x13 // #19\n" + " 5c: d63f0200 blr x16\n" + " 60: aa0003e9 mov x9, x0\n" + " 64: a8c11fe8 ldp x8, x7, [sp], #16\n" + " 68: a8c10be1 ldp x1, x2, [sp], #16\n" + " 6c: a8c103fe ldp x30, x0, [sp], #16\n" + " 70: f9403450 ldr x16, [x2, #104]\n" + " 74: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 78: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 7c: f81f0fe9 str x9, [sp, #-16]!\n" + " 80: f94000e1 ldr x1, [x7]\n" + " 84: f9400102 ldr x2, [x8]\n" + " 88: d63f0200 blr x16\n" + " 8c: aa0003e7 mov x7, x0\n" + " 90: f84107e9 ldr x9, [sp], #16\n" + " 94: a8c10be1 ldp x1, x2, [sp], #16\n" + " 98: a8c103fe ldp x30, x0, [sp], #16\n" + " 9c: f9000127 str x7, [x9]\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401047 ldr x7, [x2, #32]\n" + " 24: d2800042 mov x2, #0x2 // #2\n" + " 28: d2800043 mov x3, #0x2 // #2\n" + " 2c: 92800004 mov x4, #0xffffffffffffffff // #-1\n" + " 30: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401047 ldr x7, [x2, #32]\n" + " 24: d2800042 mov x2, #0x2 // #2\n" + " 28: d2800043 mov x3, #0x2 // #2\n" + " 2c: d2800144 mov x4, #0xa // #10\n" + " 30: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9400047 ldr x7, [x2]\n" + " 4: d2800542 mov x2, #0x2a // #42\n" + " 8: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_DISABLED_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9405450 ldr x16, [x2, #168]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 1c: eb0000ff cmp x7, x0\n" + " 20: 54000060 b.eq 0x2c // b.none\n" + " 24: aa0703e0 mov x0, x7\n" + " 28: d65f03c0 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(r8, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: f9405450 ldr x16, [x2, #168]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: d63f0200 blr x16\n" + " 10: aa0003e7 mov x7, x0\n" + " 14: a8c10be1 ldp x1, x2, [sp], #16\n" + " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 1c: aa0703e8 mov x8, x7\n" + " 20: eb00011f cmp x8, x0\n" + " 24: 54000060 b.eq 0x30 // b.none\n" + " 28: aa0803e0 mov x0, x8\n" + " 2c: d65f03c0 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94000e7 ldr x7, [x7]\n" + " 8: f9005c07 str x7, [x0, #184]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: 9100e0e7 add x7, x7, #0x38\n" + " 8: f9001407 str x7, [x0, #40]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_DISABLED_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b6f80047 tbz x7, #63, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: eb0800ff cmp x7, x8\n" + " c: 5400004a b.ge 0x14 // b.tcont\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: b5000047 cbnz x7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 35000047 cbnz w7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 35000047 cbnz w7, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 7100a8ff cmp w7, #0x2a\n" + " c: 54000041 b.ne 0x14 // b.any\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 37000047 tbnz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 37000047 tbnz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 36000047 tbz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 36000047 tbz w7, #0, 0x10\n" + " c: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f24008ff tst x7, #0x7\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: d28000a9 mov x9, #0x5 // #5\n" + " c: ea0900ff tst x7, x9\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f24008ff tst x7, #0x7\n" + " c: 54000040 b.eq 0x14 // b.none\n" + " 10: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 92400ce9 and x9, x7, #0xf\n" + " c: f1003d3f cmp x9, #0xf\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: 92400ce7 and x7, x7, #0xf\n" + " c: f1003cff cmp x7, #0xf\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 91000908 add x8, x8, #0x2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9401c08 ldr x8, [x0, #56]\n" + " 8: f100ecff cmp x7, #0x3b\n" + " c: 54000061 b.ne 0x18 // b.any\n" + " 10: 91000908 add x8, x8, #0x2\n" + " 14: 14000002 b 0x1c\n" + " 18: 91001108 add x8, x8, #0x4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_right(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d343fce7 lsr x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_left_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d37df0e7 lsl x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + Offset1 = ?BACKEND:offset(State1), + State2 = ?BACKEND:call_only_or_schedule_next(State1, 2), + Offset2 = ?BACKEND:offset(State2), + State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + % OP_INT_CALL_END + Offset0 = ?BACKEND:offset(State3), + State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), + State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 1400000d b 0x34\n" + " 4: 14000002 b 0xc\n" + " 8: 14000009 b 0x2c\n" + " c: b9401027 ldr w7, [x1, #16]\n" + " 10: f10004e7 subs x7, x7, #0x1\n" + " 14: b9001027 str w7, [x1, #16]\n" + " 18: 540000a1 b.ne 0x2c // b.any\n" + " 1c: 10000087 adr x7, 0x2c\n" + " 20: f9000427 str x7, [x1, #8]\n" + " 24: f9400847 ldr x7, [x2, #16]\n" + " 28: d61f00e0 br x7\n" + " 2c: f9400047 ldr x7, [x2]\n" + " 30: d61f00e0 br x7\n" + " 34: f9400447 ldr x7, [x2, #8]\n" + " 38: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_bif_with_large_literal_integer_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 9208452466117618637]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: f9402050 ldr x16, [x2, #64]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e0 mov x0, x1\n" + " 10: d2800041 mov x1, #0x2 // #2\n" + " 14: d63f0200 blr x16\n" + " 18: aa0003e7 mov x7, x0\n" + " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 20: a8c103fe ldp x30, x0, [sp], #16\n" + " 24: f9403c50 ldr x16, [x2, #120]\n" + " 28: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 2c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 30: f81f0fe7 str x7, [sp, #-16]!\n" + " 34: d29579a1 mov x1, #0xabcd // #43981\n" + " 38: f2b7c041 movk x1, #0xbe02, lsl #16\n" + " 3c: f2dfd741 movk x1, #0xfeba, lsl #32\n" + " 40: f2eff941 movk x1, #0x7fca, lsl #48\n" + " 44: d63f0200 blr x16\n" + " 48: aa0003e8 mov x8, x0\n" + " 4c: f84107e7 ldr x7, [sp], #16\n" + " 50: a8c10be1 ldp x1, x2, [sp], #16\n" + " 54: a8c103fe ldp x30, x0, [sp], #16\n" + " 58: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 5c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 60: d2800001 mov x1, #0x0 // #0\n" + " 64: d2800022 mov x2, #0x1 // #1\n" + " 68: f9401803 ldr x3, [x0, #48]\n" + " 6c: aa0803e4 mov x4, x8\n" + " 70: d63f00e0 blr x7\n" + " 74: aa0003e7 mov x7, x0\n" + " 78: a8c10be1 ldp x1, x2, [sp], #16\n" + " 7c: a8c103fe ldp x30, x0, [sp], #16\n" + " 80: b5000087 cbnz x7, 0x90\n" + " 84: f9401847 ldr x7, [x2, #48]\n" + " 88: d2801102 mov x2, #0x88 // #136\n" + " 8c: d61f00e0 br x7\n" + " 90: f9001807 str x7, [x0, #48]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, Reg, -4), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 8: f9401408 ldr x8, [x0, #40]\n" + " c: f94004e9 ldr x9, [x7, #8]\n" + " 10: f9000509 str x9, [x8, #8]\n" + " 14: f9401408 ldr x8, [x0, #40]\n" + " 18: f94000e9 ldr x9, [x7]\n" + " 1c: f9000109 str x9, [x8]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + Offset = ?BACKEND:offset(State3), + Labels = [{Label, Offset + 16#100}], + State4 = ?BACKEND:update_branches(State3, Labels), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 92400ce8 and x8, x7, #0xf\n" + " 8: f1003d1f cmp x8, #0xf\n" + " c: 54000160 b.eq 0x38 // b.none\n" + " 10: 924004e8 and x8, x7, #0x3\n" + " 14: f100091f cmp x8, #0x2\n" + " 18: 54000040 b.eq 0x20 // b.none\n" + " 1c: 14000047 b 0x138\n" + " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 24: f94000e7 ldr x7, [x7]\n" + " 28: 924014e7 and x7, x7, #0x3f\n" + " 2c: f10020ff cmp x7, #0x8\n" + " 30: 54000040 b.eq 0x38 // b.none\n" + " 34: 14000041 b 0x138" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +is_number_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + Offset = ?BACKEND:offset(State3), + Labels = [{Label, Offset + 16#100}], + State4 = ?BACKEND:update_branches(State3, Labels), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 92400ce8 and x8, x7, #0xf\n" + " 8: f1003d1f cmp x8, #0xf\n" + " c: 540001c0 b.eq 0x44 // b.none\n" + " 10: 924004e8 and x8, x7, #0x3\n" + " 14: f100091f cmp x8, #0x2\n" + " 18: 54000040 b.eq 0x20 // b.none\n" + " 1c: 1400004a b 0x144\n" + " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 24: f94000e7 ldr x7, [x7]\n" + " 28: 924014e8 and x8, x7, #0x3f\n" + " 2c: f100211f cmp x8, #0x8\n" + " 30: 540000a0 b.eq 0x44 // b.none\n" + " 34: 924014e7 and x7, x7, #0x3f\n" + " 38: f10060ff cmp x7, #0x18\n" + " 3c: 54000040 b.eq 0x44 // b.none\n" + " 40: 14000041 b 0x144" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + Offset = ?BACKEND:offset(State3), + Labels = [{Label, Offset + 16#100}], + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:update_branches(State3, Labels), + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f1012cff cmp x7, #0x4b\n" + " 8: 54000080 b.eq 0x18 // b.none\n" + " c: f1002cff cmp x7, #0xb\n" + " 10: 54000040 b.eq 0x18 // b.none\n" + " 14: 14000041 b 0x118" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9400027 ldr x7, [x1]\n" + " 24: b94000e7 ldr w7, [x7]\n" + " 28: d3689ce7 lsl x7, x7, #24\n" + " 2c: d2802610 mov x16, #0x130 // #304\n" + " 30: aa1000e7 orr x7, x7, x16\n" + " 34: f9005c07 str x7, [x0, #184]\n" + " 38: f9401047 ldr x7, [x2, #32]\n" + " 3c: d2800042 mov x2, #0x2 // #2\n" + " 40: d28000a3 mov x3, #0x5 // #5\n" + " 44: 92800004 mov x4, #0xffffffffffffffff // #-1\n" + " 48: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test_DISABLED() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = << + " 0: b9401027 ldr w7, [x1, #16]\n" + " 4: f10004e7 subs x7, x7, #0x1\n" + " 8: b9001027 str w7, [x1, #16]\n" + " c: 540000a1 b.ne 0x20 // b.any\n" + " 10: 10000087 adr x7, 0x20\n" + " 14: f9000427 str x7, [x1, #8]\n" + " 18: f9400847 ldr x7, [x2, #16]\n" + " 1c: d61f00e0 br x7\n" + " 20: f9401807 ldr x7, [x0, #48]\n" + " 24: aa0703e8 mov x8, x7\n" + " 28: 92400509 and x9, x8, #0x3\n" + " 2c: f100093f cmp x9, #0x2\n" + " 30: 540000c0 b.eq 0x48 // b.none\n" + " 34: f9404c47 ldr x7, [x2, #152]\n" + " 38: d2800702 mov x2, #0x38 // #56\n" + " 3c: d2804163 mov x3, #0x20b // #523\n" + " 40: aa0803e4 mov x4, x8\n" + " 44: d61f00e0 br x7\n" + " 48: 927ef508 and x8, x8, #0xfffffffffffffffc\n" + " 4c: f9400108 ldr x8, [x8]\n" + " 50: 92401509 and x9, x8, #0x3f\n" + " 54: f100513f cmp x9, #0x14\n" + " 58: 540000c0 b.eq 0x70 // b.none\n" + " 5c: f9404c47 ldr x7, [x2, #152]\n" + " 60: d2800c02 mov x2, #0x60 // #96\n" + " 64: d2804163 mov x3, #0x20b // #523\n" + " 68: aa0803e4 mov x4, x8\n" + " 6c: d61f00e0 br x7\n" + " 70: f9400028 ldr x8, [x1]\n" + " 74: b9400108 ldr w8, [x8]\n" + " 78: d3689d08 lsl x8, x8, #24\n" + " 7c: d2804c10 mov x16, #0x260 // #608\n" + " 80: aa100108 orr x8, x8, x16\n" + " 84: f9005c08 str x8, [x0, #184]\n" + " 88: f9408048 ldr x8, [x2, #256]\n" + " 8c: aa0703e2 mov x2, x7\n" + " 90: d2800003 mov x3, #0x0 // #0\n" + " 94: d61f0100 br x8" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: 2700 movs r7, #0\n" + " 2: 6307 str r7, [r0, #48] ; 0x30" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: 2700 movs r7, #0\n" + " 2: 6707 str r7, [r0, #112] ; 0x70" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, r6}, << + " 0: 2700 movs r7, #0\n" + " 2: 6037 str r7, [r6, #0]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 2600 movs r6, #0\n" + " 4: 60be str r6, [r7, #8]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 2600 movs r6, #0\n" + " 4: 653e str r6, [r7, #80] ; 0x50" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 6307 str r7, [r0, #48] ; 0x30" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 6707 str r7, [r0, #112] ; 0x70" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 262a movs r6, #42 ; 0x2a\n" + " 4: 60be str r6, [r7, #8]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 262a movs r6, #42 ; 0x2a\n" + " 4: 653e str r6, [r7, #80] ; 0x50" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, r3}, << + " 0: 2763 movs r7, #99 ; 0x63\n" + " 2: 601f str r7, [r3, #0]" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: 6b47 ldr r7, [r0, #52] ; 0x34\n" + " 2: 6387 str r7, [r0, #56] ; 0x38" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, << + " 0: 6b47 ldr r7, [r0, #52] ; 0x34\n" + " 2: 600f str r7, [r1, #0]" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, << + " 0: 6827 ldr r7, [r4, #0]\n" + " 2: 63c7 str r7, [r0, #60] ; 0x3c" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " 4: 6077 str r7, [r6, #4]" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 683f ldr r7, [r7, #0]\n" + " 4: 63c7 str r7, [r0, #60] ; 0x3c" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 687f ldr r7, [r7, #4]\n" + " 4: 63c7 str r7, [r0, #60] ; 0x3c" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, r5, {x_reg, 0}, << + " 0: 6305 str r5, [r0, #48] ; 0x30" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, r6, {x_reg, extra}, << + " 0: 6706 str r6, [r0, #112] ; 0x70" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, r4, {ptr, r3}, << + " 0: 601c str r4, [r3, #0]" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, r1, {y_reg, 0}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 6039 str r1, [r7, #0]" + >>) + end), + %% Test: Large immediate to x_reg (32-bit literal pool, aligned case) + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: 5678 .word 0x12345678\n" + " 6: 1234 \n" + " 8: 6307 str r7, [r0, #48] ; 0x30" + >>) + end), + %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case) + ?_test(begin + %% First do a 2-byte instruction to create unaligned start + State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), + %% Then do large immediate which should handle unaligned case + State2 = ?BACKEND:move_to_vm_register(State1, 16#12345678, {x_reg, 0}), + Stream = ?BACKEND:stream(State2), + Expected = dump_to_bin(<< + " 0: 6019 str r1, [r3, #0]\n" + " 2: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 4: e002 b.n 0xc\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 .word 0x12345678\n" + " a: 1234 \n" + " c: 6307 str r7, [r0, #48] ; 0x30" + >>), + ?assertEqual(Expected, Stream) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: 5678 .word 0x12345678\n" + " 6: 1234 \n" + " 8: 6707 str r7, [r0, #112] ; 0x70" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: 5678 .word 0x12345678\n" + " 6: 1234 \n" + " 8: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " a: 60b7 str r7, [r6, #8]" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: 5678 .word 0x12345678\n" + " 6: 1234 \n" + " 8: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " a: 6537 str r7, [r6, #80] ; 0x50" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: 5678 .word 0x12345678\n" + " 6: 1234 \n" + " 8: 601f str r7, [r3, #0]" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: 6ec7 ldr r7, [r0, #108] ; 0x6c\n" + " 2: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " 4: 67f7 str r7, [r6, #124] ; 0x7c" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 6fff ldr r7, [r7, #124] ; 0x7c\n" + " 4: 66c7 str r7, [r0, #108] ; 0x6c" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: ffff ffff ; instruction: 0xffffffff\n" + " 8: 6307 str r7, [r0, #48] ; 0x30" + >>) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_DISABLED_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r8, 2, {x_reg, 0}, << + " 0: f9400907 ldr x7, [x8, #16]\n" + " 4: f9001807 str x7, [x0, #48]" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, r8, 3, {ptr, r10}, << + " 0: f9400d07 ldr x7, [x8, #24]\n" + " 4: f9000147 str x7, [x10]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r8, 1, {y_reg, 2}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9400508 ldr x8, [x8, #8]\n" + " 8: f90008e8 str x8, [x7, #16]" + >>) + end), + %% move_array_element: reg[x] to native reg (r10) + ?_test(begin + move_array_element_test0(State0, r8, 1, r10, << + " 0: f940050a ldr x10, [x8, #8]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r8, 7, {y_reg, 31}, << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f9401d08 ldr x8, [x8, #56]\n" + " 8: f9007ce8 str x8, [x7, #248]" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r8, 7, {x_reg, 15}, << + " 0: f9401d07 ldr x7, [x8, #56]\n" + " 4: f9005407 str x7, [x0, #168]" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {x_reg, 2}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " 8: f9002007 str x7, [x0, #64]" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {ptr, r10}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " 8: f9000147 str x7, [x10]" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + move_array_element_test0(State1, r8, {free, Reg}, {y_reg, 31}, << + " 0: f9401107 ldr x7, [x8, #32]\n" + " 4: f9401408 ldr x8, [x0, #40]\n" + " 8: f8677907 ldr x7, [x8, x7, lsl #3]\n" + " c: f9007d07 str x7, [x8, #248]" + >>) + end) + ] + end}. + +get_array_element_test_DISABLED_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401107 ldr x7, [x8, #32]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg) + end) + ] + end}. + +move_to_array_element_test_DISABLED_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9000907 str x7, [x8, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f94000e7 ldr x7, [x7]\n" + " 4: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r8, r9), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401407 ldr x7, [x0, #40]\n" + " 4: f94008e7 ldr x7, [x7, #16]\n" + " 8: f8297907 str x7, [x8, x9, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9000907 str x7, [x8, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), + [r8, r9] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 9100052a add x10, x9, #0x1\n" + " 8: f82a7907 str x7, [x8, x10, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), + [r8, r9] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: d2800547 mov x7, #0x2a // #42\n" + " 4: 9100052a add x10, x9, #0x1\n" + " 8: f82a7907 str x7, [x8, x10, lsl #3]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 272a movs r7, #42 ; 0x2a" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r6, Reg), + Dump = << + " 0: 6836 ldr r6, [r6, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 6bc7 ldr r7, [r0, #60] ; 0x3c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 2: 68ff ldr r7, [r7, #12]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {fp_reg, N} - DISABLED for ARMv6-M (no FPU) + %% ?_test(begin + %% {State1, Reg} = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}), + %% Stream = ?BACKEND:stream(State1), + %% ?assertEqual(v0, Reg), + %% Dump = << + %% " 0: f9406007 ldr x7, [x0, #192]\n" + %% " 4: fd400ce0 ldr d0, [x7, #24]" + %% >>, + %% ?assertEqual(dump_to_bin(Dump), Stream) + %% end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, r6), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 262a movs r6, #42 ; 0x2a" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, r7, r5), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 1c3d adds r5, r7, #0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, r7}, r4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 683c ldr r4, [r7, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6b83 ldr r3, [r0, #56] ; 0x38" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6a81 ldr r1, [r0, #40] ; 0x28\n" + " 2: 6889 ldr r1, [r1, #8]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + %% move_to_native_register/3: {fp_reg, N} - DISABLED for ARMv6-M (no FPU) + %% ?_test(begin + %% State1 = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}, v0), + %% Stream = ?BACKEND:stream(State1), + %% Dump = << + %% " 0: f9406007 ldr x7, [x0, #192]\n" + %% " 4: fd400ce0 ldr d0, [x7, #24]" + %% >>, + %% ?assertEqual(dump_to_bin(Dump), Stream) + %% end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_DISABLED_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, r2, 2, << + "0: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 3, << + " 0: d37ff847 lsl x7, x2, #1\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 4, << + "0: d37ef442 lsl x2, x2, #2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 5, << + " 0: d37ef447 lsl x7, x2, #2\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 6, << + " 0: d37ff847 lsl x7, x2, #1\n" + " 4: 8b0200e2 add x2, x7, x2\n" + " 8: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 7, << + " 0: d37df047 lsl x7, x2, #3\n" + " 4: cb0200e2 sub x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 8, << + "0: d37df042 lsl x2, x2, #3" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 9, << + " 0: d37df047 lsl x7, x2, #3\n" + " 4: 8b0200e2 add x2, x7, x2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 10, << + " 0: d37ef447 lsl x7, x2, #2\n" + " 4: 8b0200e2 add x2, x7, x2\n" + " 8: d37ff842 lsl x2, x2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 11, << + " 0: d2800167 mov x7, #0xb // #11\n" + " 4: 9b077c42 mul x2, x2, x7" + >>) + end) + ] + end}. + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +%% Handle 32-bits undefined instruction +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + InstrA = list_to_integer([H1, H2, H3, H4], 16), + InstrB = list_to_integer([H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<>, <> | Acc]); +%% Handle 16-bit ARM32 Thumb instructions (4 hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) +-> + %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index a435ab17e0..ff272f6eac 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -29,6 +29,8 @@ start() -> jit_tests, jit_aarch64_tests, jit_aarch64_asm_tests, + jit_armv6m_tests, + jit_armv6m_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). From 380d9aea39d3e5c88fe4d133f99dfdb2ad9fa4d5 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 14:42:46 +0200 Subject: [PATCH 03/97] armv6m: implement shift left & shift right Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 4 ++-- tests/libs/jit/jit_armv6m_tests.erl | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 61ff78670e..4f0464d00a 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -934,7 +934,7 @@ merge_used_regs(State, []) -> shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when ?IS_GPR(Reg) andalso is_integer(Shift) -> - I = jit_armv6m_asm:lsr(Reg, Reg, Shift), + I = jit_armv6m_asm:lsrs(Reg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}. @@ -949,7 +949,7 @@ shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when is_atom(Reg) -> - I = jit_armv6m_asm:lsl(Reg, Reg, Shift), + I = jit_armv6m_asm:lsls(Reg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}. diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 6f90b15866..2a315ed461 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -760,27 +760,27 @@ if_else_block_test_DISABLED() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -shift_right_test_DISABLED() -> +shift_right_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), State2 = ?BACKEND:shift_right(State1, Reg, 3), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: d343fce7 lsr x7, x7, #3" + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 08ff lsrs r7, r7, #3" >>, ?assertEqual(dump_to_bin(Dump), Stream). -shift_left_test_DISABLED() -> +shift_left_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), State2 = ?BACKEND:shift_left(State1, Reg, 3), Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: d37df0e7 lsl x7, x7, #3" + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 00ff lsls r7, r7, #3" >>, ?assertEqual(dump_to_bin(Dump), Stream). From a60ced47ef717abfef667e1ee3ebb1bd4d33217c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 14:43:12 +0200 Subject: [PATCH 04/97] armv6m: remove references to fpu registers Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 171 ++++++++++++++---------------------- 1 file changed, 68 insertions(+), 103 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 4f0464d00a..e62c1b155c 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -124,8 +124,6 @@ Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse Reg =:= r15) ). -%% ARMv6-M has no FPU, so no FP registers --define(IS_FPR(_Reg), false). -type stream() :: any(). @@ -135,7 +133,6 @@ offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [armv6m_register()], - available_fpregs :: [armv6m_register()], used_regs :: [armv6m_register()] }). @@ -201,10 +198,7 @@ %% - r13 (SP), r14 (LR), r15 (PC): special purpose %% Reorder to match AArch64 test expectations (r7 first) -define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1, r12]). --define(AVAILABLE_FPREGS, []). -define(PARAMETER_REGS, [r0, r1, r2, r3]). -%% ARMv6-M has no FPU --define(PARAMETER_FPREGS, []). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -241,7 +235,6 @@ new(_Variant, StreamModule, Stream) -> branches = [], offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, - available_fpregs = ?AVAILABLE_FPREGS, used_regs = [] }. @@ -314,13 +307,13 @@ free_native_registers(State, [Reg | Rest]) -> -spec free_native_register(state(), value()) -> state(). free_native_register( - #state{available_regs = Available0, available_fpregs = AvailableFP0, used_regs = Used0} = State, + #state{available_regs = Available0, used_regs = Used0} = State, Reg ) when is_atom(Reg) -> - {Available1, AvailableFP1, Used1} = free_reg(Available0, AvailableFP0, Used0, Reg), - State#state{available_regs = Available1, available_fpregs = AvailableFP1, used_regs = Used1}; + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; free_native_register(State, {ptr, Reg}) -> free_native_register(State, Reg); free_native_register(State, _Other) -> @@ -335,7 +328,7 @@ free_native_register(State, _Other) -> %%----------------------------------------------------------------------------- -spec assert_all_native_free(state()) -> ok. assert_all_native_free(#state{ - available_regs = ?AVAILABLE_REGS, available_fpregs = ?AVAILABLE_FPREGS, used_regs = [] + available_regs = ?AVAILABLE_REGS, used_regs = [] }) -> ok. @@ -484,7 +477,6 @@ return_if_not_equal_to_ctx( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - available_fpregs = AvailableFPRegs0, used_regs = UsedRegs0 } = State, {free, Reg} @@ -500,13 +492,12 @@ return_if_not_equal_to_ctx( I4 = jit_armv6m_asm:ret(), I2 = jit_armv6m_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, Reg ), State#state{ stream = Stream1, available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1 }. @@ -632,8 +623,7 @@ if_else_block( StateElse = State2#state{ stream = Stream4, used_regs = State1#state.used_regs, - available_regs = State1#state.available_regs, - available_fpregs = State1#state.available_fpregs + available_regs = State1#state.available_regs }, State3 = BlockFalseFn(StateElse), Stream5 = State3#state.stream, @@ -895,18 +885,17 @@ if_block_cond( -spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> - #state{available_regs = AvR0, available_fpregs = AvFR0, used_regs = UR0} = State0, - {AvR1, AvFR1, UR1} = free_reg(AvR0, AvFR0, UR0, Reg), + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), State0#state{ available_regs = AvR1, - available_fpregs = AvFR1, used_regs = UR1 }; if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. -spec merge_used_regs(state(), [armv6m_register()]) -> state(). -merge_used_regs(#state{used_regs = UR0, available_regs = AvR0, available_fpregs = AvFR0} = State, [ +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ Reg | T ]) -> case lists:member(Reg, UR0) of @@ -914,10 +903,9 @@ merge_used_regs(#state{used_regs = UR0, available_regs = AvR0, available_fpregs merge_used_regs(State, T); false -> AvR1 = lists:delete(Reg, AvR0), - AvFR1 = lists:delete(Reg, AvFR0), UR1 = [Reg | UR0], merge_used_regs( - State#state{used_regs = UR1, available_regs = AvR1, available_fpregs = AvFR1}, T + State#state{used_regs = UR1, available_regs = AvR1}, T ) end; merge_used_regs(State, []) -> @@ -969,7 +957,6 @@ call_func_ptr( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - available_fpregs = AvailableFP0, used_regs = UsedRegs0 } = State0, FuncPtrTuple, @@ -1028,15 +1015,11 @@ call_func_ptr( AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), - AvailableFP1 = FreeRegs ++ AvailableFP0, - AvailableFP2 = lists:delete(ResultReg, AvailableFP1), - AvailableFP3 = ?AVAILABLE_FPREGS -- (?AVAILABLE_FPREGS -- AvailableFP2), UsedRegs2 = [ResultReg | UsedRegs1], { State1#state{ stream = Stream6, available_regs = AvailableRegs3, - available_fpregs = AvailableFP3, used_regs = UsedRegs2 }, ResultReg @@ -1069,7 +1052,6 @@ set_args( ArgsRegs = args_regs(Args), AvailableScratchGP = [rdi, rsi, rdx, rcx, r8, r9, r10, r11] -- ParamRegs -- ArgsRegs -- UsedRegs, - AvailableScratchFP = ?AVAILABLE_FPREGS -- ParamRegs -- ArgsRegs -- UsedRegs, Offset = StreamModule:offset(Stream0), Args1 = [ case Arg of @@ -1078,7 +1060,7 @@ set_args( end || Arg <- Args ], - SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, AvailableScratchFP, []), + SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, []), Stream1 = StreamModule:append(Stream0, SetArgsCode), NewUsedRegs = lists:foldl( fun @@ -1092,35 +1074,32 @@ set_args( State0#state{ stream = Stream1, available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, - available_fpregs = ?AVAILABLE_FPREGS -- ParamRegs -- NewUsedRegs, used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) }. parameter_regs(Args) -> - parameter_regs0(Args, ?PARAMETER_REGS, ?PARAMETER_FPREGS, []). + parameter_regs0(Args, ?PARAMETER_REGS, []). -parameter_regs0([], _, _, Acc) -> +parameter_regs0([], _, Acc) -> lists:reverse(Acc); -parameter_regs0([Special | T], [GPReg | GPRegsT], FPRegs, Acc) when +parameter_regs0([Special | T], [GPReg | GPRegsT], Acc) when Special =:= ctx orelse Special =:= jit_state orelse Special =:= offset -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([{free, Free} | T], GPRegs, FPRegs, Acc) -> - parameter_regs0([Free | T], GPRegs, FPRegs, Acc); -parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([Reg | T], [GPReg | GPRegsT], FPRegs, Acc) when ?IS_GPR(Reg) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([Reg | T], GPRegs, [FPReg | FPRegsT], Acc) when ?IS_FPR(Reg) -> - parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); -parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], FPRegs, Acc) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]); -parameter_regs0([{fp_reg, _} | T], GPRegs, [FPReg | FPRegsT], Acc) -> - parameter_regs0(T, GPRegs, FPRegsT, [FPReg | Acc]); -parameter_regs0([Int | T], [GPReg | GPRegsT], FPRegs, Acc) when is_integer(Int) -> - parameter_regs0(T, GPRegsT, FPRegs, [GPReg | Acc]). + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{free, Free} | T], GPRegs, Acc) -> + parameter_regs0([Free | T], GPRegs, Acc); +parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([Reg | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], Acc) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], Acc) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +parameter_regs0([{fp_reg, _} | T], [GPRegA, GPRegB | GPRegsT], Acc) -> + parameter_regs0(T, GPRegsT, [GPRegB, GPRegA | Acc]); +parameter_regs0([Int | T], [GPReg | GPRegsT], Acc) when is_integer(Int) -> + parameter_regs0(T, GPRegsT, [GPReg | Acc]). replace_reg(Args, Reg1, Reg2) -> replace_reg0(Args, Reg1, Reg2, []). @@ -1132,49 +1111,47 @@ replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> replace_reg0([Other | T], Reg, Replacement, Acc) -> replace_reg0(T, Reg, Replacement, [Other | Acc]). -set_args0([], [], [], _AvailGP, _AvailFP, Acc) -> +set_args0([], [], [], _AvailGP, Acc) -> list_to_binary(lists:reverse(Acc)); -set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc) -> - set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); -set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); +set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc) -> + set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc); +set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); set_args0( [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [?JITSTATE_REG | ParamRegs], AvailGP, - AvailFP, Acc ) -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, Acc); + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); set_args0( - [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, AvailFP, Acc + [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, Acc ) -> false = lists:member(ParamReg, ArgsRegs), - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [ + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [ jit_armv6m_asm:mov(ParamReg, ?JITSTATE_REG) | Acc ]); % ctx is special as we need it to access x_reg/y_reg/fp_reg -set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, AvailFP, Acc) -> +set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> false = lists:member(?CTX_REG, ArgsRegs), J = set_args1(Arg, ?CTX_REG), - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); set_args0( [Arg | ArgsT], [_ArgReg | ArgsRegs], [ParamReg | ParamRegs], [Avail | AvailGPT] = AvailGP, - AvailFP, Acc ) -> J = set_args1(Arg, ParamReg), case lists:member(ParamReg, ArgsRegs) of false -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, AvailFP, [J | Acc]); + set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); true -> I = jit_armv6m_asm:mov(Avail, ParamReg), NewArgsT = replace_reg(ArgsT, ParamReg, Avail), - set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, AvailFP, [J, I | Acc]) + set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, [J, I | Acc]) end. set_args1(Reg, Reg) -> @@ -1318,11 +1295,12 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, State1#state{available_regs = AR0}; move_to_vm_register( #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, - Reg, + {fp, RegA, RegB}, {fp_reg, F} -) when is_atom(Reg) -> +) -> I1 = jit_armv6m_asm:ldr(Temp, ?FP_REGS), - I2 = jit_armv6m_asm:str(Reg, {Temp, F * 4}), + I2 = jit_armv6m_asm:str(RegA, {Temp, F * 8}), + I2 = jit_armv6m_asm:str(RegB, {Temp, F * 8 + 4}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. @@ -1399,8 +1377,7 @@ move_array_element( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - used_regs = UsedRegs0, - available_fpregs = AvailableFPRegs0 + used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, @@ -1408,13 +1385,10 @@ move_array_element( ) when X < ?MAX_REG andalso is_atom(IndexReg) -> I1 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), I2 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg - ), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1, stream = Stream1 }; @@ -1423,8 +1397,7 @@ move_array_element( stream_module = StreamModule, stream = Stream0, available_regs = AvailableRegs0, - used_regs = UsedRegs0, - available_fpregs = AvailableFPRegs0 + used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, @@ -1432,13 +1405,12 @@ move_array_element( ) when is_atom(IndexReg) -> I1 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), I2 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg ), Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1, stream = Stream1 }; @@ -1447,8 +1419,7 @@ move_array_element( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _] = AvailableRegs0, - used_regs = UsedRegs0, - available_fpregs = AvailableFPRegs0 + used_regs = UsedRegs0 } = State, Reg, {free, IndexReg}, @@ -1457,15 +1428,14 @@ move_array_element( I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), I3 = jit_armv6m_asm:str(IndexReg, {Temp, Y * 4}), - {AvailableRegs1, AvailableFPRegs1, UsedRegs1} = free_reg( - AvailableRegs0, AvailableFPRegs0, UsedRegs0, IndexReg + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg ), Stream1 = StreamModule:append( Stream0, <> ), State#state{ available_regs = AvailableRegs1, - available_fpregs = AvailableFPRegs1, used_regs = UsedRegs1, stream = Stream1 }. @@ -1613,17 +1583,17 @@ move_to_native_register( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], - available_fpregs = [FPReg | AvailFT], + available_regs = [RegA, RegB | AvailT], used_regs = Used } = State, {fp_reg, F} ) -> - I1 = jit_armv6m_asm:ldr(Temp, ?FP_REGS), - I2 = jit_armv6m_asm:ldr_d(FPReg, {Temp, F * 4}), - Code = <>, + I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS), + I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}), + I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_fpregs = AvailFT, used_regs = [FPReg | Used]}, FPReg}. + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, {fp, RegA, RegB}}. -spec move_to_native_register(state(), value(), armv6m_register()) -> state(). move_to_native_register( @@ -1657,15 +1627,15 @@ move_to_native_register( move_to_native_register( #state{ stream_module = StreamModule, - stream = Stream0, - available_regs = [Temp | _] + stream = Stream0 } = State, {fp_reg, F}, - RegDst + {fp, RegA, RegB} ) -> - I1 = jit_armv6m_asm:ldr(Temp, ?FP_REGS), - I2 = jit_armv6m_asm:ldr_d(RegDst, {Temp, F * 4}), - Code = <>, + I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS), + I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}), + I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. @@ -2019,16 +1989,11 @@ return_labels_and_lines( ), State#state{stream = Stream1}. -free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), true = lists:member(Reg, UsedRegs0), UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs1, AvailableFPRegs0, UsedRegs1}; -free_reg(AvailableRegs0, AvailableFPRegs0, UsedRegs0, Reg) when ?IS_FPR(Reg) -> - AvailableFPRegs1 = free_reg0(?AVAILABLE_FPREGS, AvailableFPRegs0, Reg, []), - true = lists:member(Reg, UsedRegs0), - UsedRegs1 = lists:delete(Reg, UsedRegs0), - {AvailableRegs0, AvailableFPRegs1, UsedRegs1}. + {AvailableRegs1, UsedRegs1}. free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> lists:reverse(Acc, [Reg | PrevRegs0]); From 54d370e7974b7ac0ca906bcfa0b44f7f9df08efc Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 15:19:34 +0200 Subject: [PATCH 05/97] armv6m: fix offsets to Context and JITState structs Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 13 ++-- src/libAtomVM/jit.c | 11 ++++ src/libAtomVM/jit.h | 5 ++ tests/libs/jit/jit_armv6m_tests.erl | 98 ++++++++++++++--------------- 4 files changed, 71 insertions(+), 56 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index e62c1b155c..7c125142e9 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -162,13 +162,12 @@ -define(CTX_REG, r0). -define(JITSTATE_REG, r1). -define(NATIVE_INTERFACE_REG, r2). -%% ARMv6-M uses 4-byte word size, so adjust all offsets --define(Y_REGS, {?CTX_REG, 16#28}). --define(X_REG(N), {?CTX_REG, 16#30 + (N * 4)}). --define(CP, {?CTX_REG, 16#B8}). --define(FP_REGS, {?CTX_REG, 16#C0}). --define(BS, {?CTX_REG, 16#C8}). --define(BS_OFFSET, {?CTX_REG, 16#D0}). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). -define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). -define(JITSTATE_CONTINUATION, {?JITSTATE_REG, 16#4}). -define(JITSTATE_REDUCTIONCOUNT, {?JITSTATE_REG, 16#8}). diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 1d63f4b836..accf990802 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -83,6 +83,17 @@ _Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_aarch64.erl"); _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_aarch64.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_aarch64.erl"); +#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M +_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x30 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_armv6m.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_armv6m.erl"); #else #error Unknown jit target #endif diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index 63c72493af..eae3672ec5 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -188,6 +188,11 @@ enum TrapAndLoadResult #define JIT_JUMPTABLE_ENTRY_SIZE 4 #endif +#ifdef __arm__ +#define JIT_ARCH_TARGET JIT_ARCH_ARMV6M +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#endif + #ifndef JIT_ARCH_TARGET #error Unknown JIT target #endif diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 2a315ed461..0f7cb89cd6 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -767,7 +767,7 @@ shift_right_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 0: 6987 ldr r7, [r0, #24]\n" " 2: 08ff lsrs r7, r7, #3" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -779,7 +779,7 @@ shift_left_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 0: 6987 ldr r7, [r0, #24]\n" " 2: 00ff lsls r7, r7, #3" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1135,13 +1135,13 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << " 0: 2700 movs r7, #0\n" - " 2: 6307 str r7, [r0, #48] ; 0x30" + " 2: 6187 str r7, [r0, #24]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << " 0: 2700 movs r7, #0\n" - " 2: 6707 str r7, [r0, #112] ; 0x70" + " 2: 6587 str r7, [r0, #88] ; 0x58" >>) end), ?_test(begin @@ -1152,14 +1152,14 @@ move_to_vm_register_test_() -> end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 2600 movs r6, #0\n" " 4: 60be str r6, [r7, #8]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 2600 movs r6, #0\n" " 4: 653e str r6, [r7, #80] ; 0x50" >>) @@ -1168,25 +1168,25 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6307 str r7, [r0, #48] ; 0x30" + " 2: 6187 str r7, [r0, #24]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6707 str r7, [r0, #112] ; 0x70" + " 2: 6587 str r7, [r0, #88] ; 0x58" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 262a movs r6, #42 ; 0x2a\n" " 4: 60be str r6, [r7, #8]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 262a movs r6, #42 ; 0x2a\n" " 4: 653e str r6, [r7, #80] ; 0x50" >>) @@ -1201,14 +1201,14 @@ move_to_vm_register_test_() -> %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << - " 0: 6b47 ldr r7, [r0, #52] ; 0x34\n" - " 2: 6387 str r7, [r0, #56] ; 0x38" + " 0: 69c7 ldr r7, [r0, #28]\n" + " 2: 6207 str r7, [r0, #32]" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, << - " 0: 6b47 ldr r7, [r0, #52] ; 0x34\n" + " 0: 69c7 ldr r7, [r0, #28]\n" " 2: 600f str r7, [r1, #0]" >>) end), @@ -1216,42 +1216,42 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, << " 0: 6827 ldr r7, [r4, #0]\n" - " 2: 63c7 str r7, [r0, #60] ; 0x3c" + " 2: 6247 str r7, [r0, #36] ; 0x24" >>) end), %% Test: x_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << - " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" - " 2: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 6946 ldr r6, [r0, #20]\n" " 4: 6077 str r7, [r6, #4]" >>) end), %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 683f ldr r7, [r7, #0]\n" - " 4: 63c7 str r7, [r0, #60] ; 0x3c" + " 4: 6247 str r7, [r0, #36] ; 0x24" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 687f ldr r7, [r7, #4]\n" - " 4: 63c7 str r7, [r0, #60] ; 0x3c" + " 4: 6247 str r7, [r0, #36] ; 0x24" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, r5, {x_reg, 0}, << - " 0: 6305 str r5, [r0, #48] ; 0x30" + " 0: 6185 str r5, [r0, #24]" >>) end), ?_test(begin move_to_vm_register_test0(State0, r6, {x_reg, extra}, << - " 0: 6706 str r6, [r0, #112] ; 0x70" + " 0: 6586 str r6, [r0, #88] ; 0x58" >>) end), %% Test: Native register to ptr @@ -1263,7 +1263,7 @@ move_to_vm_register_test_() -> %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, r1, {y_reg, 0}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 6039 str r1, [r7, #0]" >>) end), @@ -1272,9 +1272,9 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" " 2: e001 b.n 0x8\n" - " 4: 5678 .word 0x12345678\n" - " 6: 1234 \n" - " 8: 6307 str r7, [r0, #48] ; 0x30" + " 4: 5678 ldrsb r0, [r7, r1]\n" + " 6: 1234 asrs r4, r6, #8\n" + " 8: 6187 str r7, [r0, #24]" >>) end), %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case) @@ -1289,9 +1289,9 @@ move_to_vm_register_test_() -> " 2: 4f01 ldr r7, [pc, #4] ; (0x8)\n" " 4: e002 b.n 0xc\n" " 6: 0000 movs r0, r0\n" - " 8: 5678 .word 0x12345678\n" - " a: 1234 \n" - " c: 6307 str r7, [r0, #48] ; 0x30" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8\n" + " c: 6187 str r7, [r0, #24]" >>), ?assertEqual(Expected, Stream) end), @@ -1299,18 +1299,18 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" " 2: e001 b.n 0x8\n" - " 4: 5678 .word 0x12345678\n" - " 6: 1234 \n" - " 8: 6707 str r7, [r0, #112] ; 0x70" + " 4: 5678 ldrsb r0, [r7, r1]\n" + " 6: 1234 asrs r4, r6, #8\n" + " 8: 6587 str r7, [r0, #88] ; 0x58" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" " 2: e001 b.n 0x8\n" - " 4: 5678 .word 0x12345678\n" - " 6: 1234 \n" - " 8: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " 4: 5678 ldrsb r0, [r7, r1]\n" + " 6: 1234 asrs r4, r6, #8\n" + " 8: 6946 ldr r6, [r0, #20]\n" " a: 60b7 str r7, [r6, #8]" >>) end), @@ -1318,9 +1318,9 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" " 2: e001 b.n 0x8\n" - " 4: 5678 .word 0x12345678\n" - " 6: 1234 \n" - " 8: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " 4: 5678 ldrsb r0, [r7, r1]\n" + " 6: 1234 asrs r4, r6, #8\n" + " 8: 6946 ldr r6, [r0, #20]\n" " a: 6537 str r7, [r6, #80] ; 0x50" >>) end), @@ -1329,25 +1329,25 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" " 2: e001 b.n 0x8\n" - " 4: 5678 .word 0x12345678\n" - " 6: 1234 \n" + " 4: 5678 ldrsb r0, [r7, r1]\n" + " 6: 1234 asrs r4, r6, #8\n" " 8: 601f str r7, [r3, #0]" >>) end), %% Test: x_reg to y_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << - " 0: 6ec7 ldr r7, [r0, #108] ; 0x6c\n" - " 2: 6a86 ldr r6, [r0, #40] ; 0x28\n" + " 0: 6d47 ldr r7, [r0, #84] ; 0x54\n" + " 2: 6946 ldr r6, [r0, #20]\n" " 4: 67f7 str r7, [r6, #124] ; 0x7c" >>) end), %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 6fff ldr r7, [r7, #124] ; 0x7c\n" - " 4: 66c7 str r7, [r0, #108] ; 0x6c" + " 4: 6547 str r7, [r0, #84] ; 0x54" >>) end), %% Test: Negative immediate to x_reg @@ -1356,7 +1356,7 @@ move_to_vm_register_test_() -> " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" " 2: e001 b.n 0x8\n" " 4: ffff ffff ; instruction: 0xffffffff\n" - " 8: 6307 str r7, [r0, #48] ; 0x30" + " 8: 6187 str r7, [r0, #24]" >>) end) ] @@ -1590,7 +1590,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: 6bc7 ldr r7, [r0, #60] ; 0x3c" + " 0: 6a47 ldr r7, [r0, #36] ; 0x24" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -1600,7 +1600,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: 6a87 ldr r7, [r0, #40] ; 0x28\n" + " 0: 6947 ldr r7, [r0, #20]\n" " 2: 68ff ldr r7, [r7, #12]" >>, ?assertEqual(dump_to_bin(Dump), Stream) @@ -1648,7 +1648,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6b83 ldr r3, [r0, #56] ; 0x38" + " 0: 6a03 ldr r3, [r0, #32]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -1657,7 +1657,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6a81 ldr r1, [r0, #40] ; 0x28\n" + " 0: 6941 ldr r1, [r0, #20]\n" " 2: 6889 ldr r1, [r1, #8]" >>, ?assertEqual(dump_to_bin(Dump), Stream) From cc72f963ce0863cd705841ee0b673a7e59b63be4 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 15:21:49 +0200 Subject: [PATCH 06/97] armv6m: enable and fix move_to_cp_test Signed-off-by: Paul Guyot --- tests/libs/jit/jit_armv6m_tests.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 0f7cb89cd6..b8f61422bf 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -265,15 +265,15 @@ return_if_not_equal_to_ctx_test_DISABLED_() -> ] end}. -move_to_cp_test_DISABLED() -> +move_to_cp_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" - " 4: f94000e7 ldr x7, [x7]\n" - " 8: f9005c07 str x7, [x0, #184]" + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 683f ldr r7, [r7, #0]\n" + " 4: 65c7 str r7, [r0, #92] ; 0x5c" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 0caf0d56b287354fbfd569f57b22a6ce829c495e Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 15:24:04 +0200 Subject: [PATCH 07/97] armv6m: add jit_armv6m to nif_jit_backend_module Signed-off-by: Paul Guyot --- src/libAtomVM/defaultatoms.def | 1 + src/libAtomVM/nifs.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 04aff1f840..35330fdecc 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -208,3 +208,4 @@ X(CODE_SERVER_ATOM, "\xB", "code_server") X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") +X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m") diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 2fe0b12948..0b0d560dc5 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -5680,6 +5680,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) return JIT_X86_64_ATOM; #elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 return JIT_AARCH64_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + return JIT_ARMV6M_ATOM; #else #error Unknown JIT target #endif From 05e993e9d5fa3bc326d595838833f45be76abbe2 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 15:24:47 +0200 Subject: [PATCH 08/97] armv6m: enable on Unix builds Signed-off-by: Paul Guyot --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ba351373d..d23b067683 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,6 +57,8 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) set(AVM_JIT_TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64|aarch64$") set(AVM_JIT_TARGET_ARCH "aarch64") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") + set(AVM_JIT_TARGET_ARCH "armv6m") else() message(FATAL "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") endif() From b727f82d69904e846684a57af9e93e6989300f7a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 15:25:02 +0200 Subject: [PATCH 09/97] armv6m: enable on Pico builds Signed-off-by: Paul Guyot --- src/platforms/rp2/CMakeLists.txt | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index 9b9eb582df..6dbcdf7bb8 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -63,9 +63,20 @@ option(AVM_WAIT_FOR_USB_CONNECT "Wait for USB connection before starting" OFF) option(AVM_WAIT_BOOTSEL_ON_EXIT "Wait in BOOTSEL rather than shutdown on exit" ON) option(AVM_REBOOT_ON_NOT_OK "Reboot Pico if result is not ok" OFF) option(AVM_CREATE_STACKTRACES "Create stacktraces" ON) - -# JIT is not available yet on rp2 -set(AVM_DISABLE_JIT ON FORCE) +option(AVM_DISABLE_JIT "Disable just in time compilation." ON) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") + # We only have armv6m for now, which all cortex-m should support + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "armv6m") + endif() +else() + # Typically riscv is not supported yet + if (NOT AVM_DISABLE_JIT) + message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") + set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE) + set(AVM_ENABLE_PRECOMPILED OFF CACHE BOOL "Enable execution of precompiled code, even if JIT is disabled." FORCE) + endif() +endif() set(AVM_DISABLE_TASK_DRIVER ON FORCE) From 4457f95d170224d659217e8692e380cd4b3d80c4 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 15:41:40 +0200 Subject: [PATCH 10/97] armv6m: fix increment_sp Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 2 +- tests/libs/jit/jit_armv6m_tests.erl | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 7c125142e9..9ece7425d0 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1682,7 +1682,7 @@ increment_sp( Offset ) -> I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), - I2 = jit_armv6m_asm:add(Reg, Reg, Offset * 4), + I2 = jit_armv6m_asm:adds(Reg, Offset * 4), I3 = jit_armv6m_asm:str(Reg, ?Y_REGS), Code = <>, Stream1 = StreamModule:append(Stream0, Code), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index b8f61422bf..5fa8a49305 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -277,15 +277,15 @@ move_to_cp_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -increment_sp_test_DISABLED() -> +increment_sp_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:increment_sp(State0, 7), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" - " 4: 9100e0e7 add x7, x7, #0x38\n" - " 8: f9001407 str x7, [x0, #40]" + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 371c adds r7, #28\n" + " 4: 6147 str r7, [r0, #20]" >>, ?assertEqual(dump_to_bin(Dump), Stream). From e43ede48bb7a9c6a919d51ca3f9ddfaaf3f4ea8c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 16:00:32 +0200 Subject: [PATCH 11/97] armv6m: move_array_element and get_array_element Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 30 ++++++----- tests/libs/jit/jit_armv6m_tests.erl | 83 +++++++++++++++-------------- 2 files changed, 61 insertions(+), 52 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 9ece7425d0..5666080300 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1382,10 +1382,11 @@ move_array_element( {free, IndexReg}, {x_reg, X} ) when X < ?MAX_REG andalso is_atom(IndexReg) -> - I1 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), - I2 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)), + I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), + I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + I3 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)), {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), - Stream1 = StreamModule:append(Stream0, <>), + Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, used_regs = UsedRegs1, @@ -1402,12 +1403,13 @@ move_array_element( {free, IndexReg}, {ptr, PtrReg} ) when is_atom(IndexReg) -> - I1 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), - I2 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}), + I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), + I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + I3 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}), {AvailableRegs1, UsedRegs1} = free_reg( AvailableRegs0, UsedRegs0, IndexReg ), - Stream1 = StreamModule:append(Stream0, <>), + Stream1 = StreamModule:append(Stream0, <>), State#state{ available_regs = AvailableRegs1, used_regs = UsedRegs1, @@ -1423,15 +1425,16 @@ move_array_element( Reg, {free, IndexReg}, {y_reg, Y} -) when ?IS_GPR(IndexReg) -> - I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg, lsl, 2}), - I3 = jit_armv6m_asm:str(IndexReg, {Temp, Y * 4}), +) when is_atom(IndexReg) -> + I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), + I2 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), + I3 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + I4 = jit_armv6m_asm:str(IndexReg, {Temp, Y * 4}), {AvailableRegs1, UsedRegs1} = free_reg( AvailableRegs0, UsedRegs0, IndexReg ), Stream1 = StreamModule:append( - Stream0, <> + Stream0, <> ), State#state{ available_regs = AvailableRegs1, @@ -1592,7 +1595,10 @@ move_to_native_register( I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), - {State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, {fp, RegA, RegB}}. + { + State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + {fp, RegA, RegB} + }. -spec move_to_native_register(state(), value(), armv6m_register()) -> state(). move_to_native_register( diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 5fa8a49305..d20b1576c6 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1367,7 +1367,7 @@ move_array_element_test0(State, Reg, Index, Dest, Dump) -> Stream = ?BACKEND:stream(State1), ?assertEqual(dump_to_bin(Dump), Stream). -move_array_element_test_DISABLED_() -> +move_array_element_test_() -> {setup, fun() -> ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) @@ -1376,79 +1376,82 @@ move_array_element_test_DISABLED_() -> [ %% move_array_element: reg[x] to x_reg ?_test(begin - move_array_element_test0(State0, r8, 2, {x_reg, 0}, << - " 0: f9400907 ldr x7, [x8, #16]\n" - " 4: f9001807 str x7, [x0, #48]" + move_array_element_test0(State0, r3, 2, {x_reg, 0}, << + " 0: 689f ldr r7, [r3, #8]\n" + " 2: 6187 str r7, [r0, #24]" >>) end), %% move_array_element: reg[x] to ptr ?_test(begin - move_array_element_test0(State0, r8, 3, {ptr, r10}, << - " 0: f9400d07 ldr x7, [x8, #24]\n" - " 4: f9000147 str x7, [x10]" + move_array_element_test0(State0, r3, 3, {ptr, r5}, << + " 0: 68df ldr r7, [r3, #12]\n" + " 2: 602f str r7, [r5, #0]" >>) end), %% move_array_element: reg[x] to y_reg ?_test(begin - move_array_element_test0(State0, r8, 1, {y_reg, 2}, << - " 0: f9401407 ldr x7, [x0, #40]\n" - " 4: f9400508 ldr x8, [x8, #8]\n" - " 8: f90008e8 str x8, [x7, #16]" + move_array_element_test0(State0, r3, 1, {y_reg, 2}, << + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 685e ldr r6, [r3, #4]\n" + " 4: 60be str r6, [r7, #8]" >>) end), - %% move_array_element: reg[x] to native reg (r10) + %% move_array_element: reg[x] to native reg (r5) ?_test(begin - move_array_element_test0(State0, r8, 1, r10, << - " 0: f940050a ldr x10, [x8, #8]" + move_array_element_test0(State0, r3, 1, r5, << + " 0: 685d ldr r5, [r3, #4]" >>) end), %% move_array_element: reg[x] to y_reg ?_test(begin - move_array_element_test0(State0, r8, 7, {y_reg, 31}, << - " 0: f9401407 ldr x7, [x0, #40]\n" - " 4: f9401d08 ldr x8, [x8, #56]\n" - " 8: f9007ce8 str x8, [x7, #248]" + move_array_element_test0(State0, r3, 7, {y_reg, 31}, << + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 69de ldr r6, [r3, #28]\n" + " 4: 67fe str r6, [r7, #124] ; 0x7c" >>) end), %% move_array_element: reg[x] to x_reg ?_test(begin - move_array_element_test0(State0, r8, 7, {x_reg, 15}, << - " 0: f9401d07 ldr x7, [x8, #56]\n" - " 4: f9005407 str x7, [x0, #168]" + move_array_element_test0(State0, r3, 7, {x_reg, 15}, << + " 0: 69df ldr r7, [r3, #28]\n" + " 2: 6547 str r7, [r0, #84] ; 0x54" >>) end), %% move_array_element: reg_x[reg_y] to x_reg ?_test(begin - {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), - move_array_element_test0(State1, r8, {free, Reg}, {x_reg, 2}, << - " 0: f9401107 ldr x7, [x8, #32]\n" - " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" - " 8: f9002007 str x7, [x0, #64]" + {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4), + move_array_element_test0(State1, r3, {free, Reg}, {x_reg, 2}, << + " 0: 691f ldr r7, [r3, #16]\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59df ldr r7, [r3, r7]\n" + " 6: 6207 str r7, [r0, #32]" >>) end), %% move_array_element: reg_x[reg_y] to pointer (large x reg) ?_test(begin - {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), - move_array_element_test0(State1, r8, {free, Reg}, {ptr, r10}, << - " 0: f9401107 ldr x7, [x8, #32]\n" - " 4: f8677907 ldr x7, [x8, x7, lsl #3]\n" - " 8: f9000147 str x7, [x10]" + {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4), + move_array_element_test0(State1, r3, {free, Reg}, {ptr, r5}, << + " 0: 691f ldr r7, [r3, #16]\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59df ldr r7, [r3, r7]\n" + " 6: 602f str r7, [r5, #0]" >>) end), %% move_array_element: reg_x[reg_y] to y_reg ?_test(begin - {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), - move_array_element_test0(State1, r8, {free, Reg}, {y_reg, 31}, << - " 0: f9401107 ldr x7, [x8, #32]\n" - " 4: f9401408 ldr x8, [x0, #40]\n" - " 8: f8677907 ldr x7, [x8, x7, lsl #3]\n" - " c: f9007d07 str x7, [x8, #248]" + {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4), + move_array_element_test0(State1, r3, {free, Reg}, {y_reg, 31}, << + " 0: 691f ldr r7, [r3, #16]\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 6946 ldr r6, [r0, #20]\n" + " 6: 59df ldr r7, [r3, r7]\n" + " 8: 67f7 str r7, [r6, #124] ; 0x7c" >>) end) ] end}. -get_array_element_test_DISABLED_() -> +get_array_element_test_() -> {setup, fun() -> ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) @@ -1457,10 +1460,10 @@ get_array_element_test_DISABLED_() -> [ %% get_array_element: reg[x] to new native reg ?_test(begin - {State1, Reg} = ?BACKEND:get_array_element(State0, r8, 4), + {State1, Reg} = ?BACKEND:get_array_element(State0, r4, 4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401107 ldr x7, [x8, #32]" + " 0: 6927 ldr r7, [r4, #16]" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(r7, Reg) From 416fdde48a9f773a688f6122952ef605dcf3a472 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 16:08:30 +0200 Subject: [PATCH 12/97] armv6m: move_to_array_element Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 24 ++++++---- tests/libs/jit/jit_armv6m_tests.erl | 70 ++++++++++++++++------------- 2 files changed, 54 insertions(+), 40 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 5666080300..e23295fcce 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1478,13 +1478,15 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State0#state{stream = Stream1}; move_to_array_element( - #state{stream_module = StreamModule, stream = Stream0} = State0, + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, ValueReg, Reg, IndexReg ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> - I1 = jit_armv6m_asm:str(ValueReg, {Reg, IndexReg, lsl, 2}), - Stream1 = StreamModule:append(Stream0, I1), + I1 = jit_armv6m_asm:mov(Temp, IndexReg), + I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), + I3 = jit_armv6m_asm:str(ValueReg, {Reg, Temp}), + Stream1 = StreamModule:append(Stream0, <>), State0#state{stream = Stream1}; move_to_array_element( State0, @@ -1511,9 +1513,10 @@ move_to_array_element( IndexReg, Offset ) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> - I1 = jit_armv6m_asm:add(Temp, IndexReg, Offset), - I2 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp, lsl, 2}), - Stream1 = StreamModule:append(Stream0, <>), + I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), + I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), + I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}), + Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_to_array_element( State0, @@ -1524,9 +1527,12 @@ move_to_array_element( ) -> {State1, ValueReg} = copy_to_native_register(State0, Value), [Temp | _] = State1#state.available_regs, - I1 = jit_armv6m_asm:add(Temp, IndexReg, Offset), - I2 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp, lsl, 2}), - Stream1 = (State1#state.stream_module):append(State1#state.stream, <>), + I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), + I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), + I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), State2 = State1#state{stream = Stream1}, free_native_register(State2, ValueReg). diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index d20b1576c6..0d0e19c769 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1471,7 +1471,7 @@ get_array_element_test_() -> ] end}. -move_to_array_element_test_DISABLED_() -> +move_to_array_element_test_() -> {setup, fun() -> ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) @@ -1480,80 +1480,88 @@ move_to_array_element_test_DISABLED_() -> [ %% move_to_array_element/4: x_reg to reg[x] ?_test(begin - State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2), + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9000907 str x7, [x8, #16]" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 609f str r7, [r3, #8]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), %% move_to_array_element/4: x_reg to reg[reg] ?_test(begin - State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, r9), + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, r4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f8297907 str x7, [x8, x9, lsl #3]" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 1c26 adds r6, r4, #0\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), %% move_to_array_element/4: ptr to reg[reg] ?_test(begin - State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r8, r9), + State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r3, r4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f94000e7 ldr x7, [x7]\n" - " 4: f8297907 str x7, [x8, x9, lsl #3]" + " 0: 683f ldr r7, [r7, #0]\n" + " 2: 1c26 adds r6, r4, #0\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), %% move_to_array_element/4: y_reg to reg[reg] ?_test(begin - State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r8, r9), + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r3, r4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401407 ldr x7, [x0, #40]\n" - " 4: f94008e7 ldr x7, [x7, #16]\n" - " 8: f8297907 str x7, [x8, x9, lsl #3]" + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 68bf ldr r7, [r7, #8]\n" + " 4: 1c26 adds r6, r4, #0\n" + " 6: 00b6 lsls r6, r6, #2\n" + " 8: 519f str r7, [r3, r6]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r8, 2, 1), + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2, 1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9000907 str x7, [x8, #16]" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 609f str r7, [r3, #8]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(8, State1, [r8, r9]), - [r8, r9] = ?BACKEND:used_regs(State2), - State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(7, State1, [r3, r4]), + [r3, r4] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 9100052a add x10, x9, #0x1\n" - " 8: f82a7907 str x7, [x8, x10, lsl #3]" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 1c66 adds r6, r4, #1\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(8, State1, [r8, r9]), - [r8, r9] = ?BACKEND:used_regs(State2), - State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(7, State1, [r3, r4]), + [r3, r4] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: d2800547 mov x7, #0x2a // #42\n" - " 4: 9100052a add x10, x9, #0x1\n" - " 8: f82a7907 str x7, [x8, x10, lsl #3]" + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 1c66 adds r6, r4, #1\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) From df06fe0cce9fa6d51193e1234bb68c1286b90952 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 18:07:54 +0200 Subject: [PATCH 13/97] armv6m: if_block Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 211 +++++++++--------- tests/libs/jit/jit_armv6m_tests.erl | 320 ++++++++++++++++++---------- 2 files changed, 312 insertions(+), 219 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index e23295fcce..a82a6caae7 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -640,10 +640,13 @@ if_else_block( non_neg_integer() }. if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> - I = jit_armv6m_asm:tbz(Reg, 63, 0), - Stream1 = StreamModule:append(Stream0, I), + %% Compare register with 0 + I1 = jit_armv6m_asm:cmp(Reg, 0), + %% Branch if positive (N flag clear) + I2 = jit_armv6m_asm:bcc(pl, 0), + Stream1 = StreamModule:append(Stream0, <>), State1 = State0#state{stream = Stream1}, - {State1, {tbz, Reg, 63}, 0}; + {State1, pl, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', Val} @@ -685,43 +688,19 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - I = jit_armv6m_asm:cbnz(Reg, 0), - Stream1 = StreamModule:append(Stream0, I), - State1 = if_block_free_reg(RegOrTuple, State0), - State2 = State1#state{stream = Stream1}, - {State2, {cbnz, Reg}, 0}; -if_block_cond( - #state{stream_module = StreamModule, stream = Stream0} = State0, {'(int)', RegOrTuple, '==', 0} -) -> - Reg = - case RegOrTuple of - {free, Reg0} -> Reg0; - RegOrTuple -> RegOrTuple - end, - I = jit_armv6m_asm:cbnz_w(Reg, 0), - Stream1 = StreamModule:append(Stream0, I), - State1 = if_block_free_reg(RegOrTuple, State0), - State2 = State1#state{stream = Stream1}, - {State2, {cbnz_w, Reg}, 0}; -if_block_cond( - #state{stream_module = StreamModule, stream = Stream0} = State0, - {'(int)', RegOrTuple, '==', Val} -) when is_integer(Val) -> - Reg = - case RegOrTuple of - {free, Reg0} -> Reg0; - RegOrTuple -> RegOrTuple - end, - I1 = jit_armv6m_asm:cmp_w(Reg, Val), + %% Compare register with 0 + I1 = jit_armv6m_asm:cmp(Reg, 0), + %% Branch if not equal I2 = jit_armv6m_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, {State2, ne, byte_size(I1)}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '!=', Val} @@ -741,25 +720,8 @@ if_block_cond( State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, {State2, eq, byte_size(I1)}; -if_block_cond( - #state{stream_module = StreamModule, stream = Stream0} = State0, - {'(int)', RegOrTuple, '!=', Val} -) when is_integer(Val) -> - Reg = - case RegOrTuple of - {free, Reg0} -> Reg0; - RegOrTuple -> RegOrTuple - end, - I1 = jit_armv6m_asm:cmp_w(Reg, Val), - I2 = jit_armv6m_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = if_block_free_reg(RegOrTuple, State0), - State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(I1)}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', Val} @@ -780,7 +742,11 @@ if_block_cond( State2 = State1#state{stream = Stream1}, {State2, ne, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0} = State0, + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, {'(bool)', RegOrTuple, '==', false} ) -> Reg = @@ -788,14 +754,21 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - % Test lowest bit - I = jit_armv6m_asm:tbnz(Reg, 0, 0), - Stream1 = StreamModule:append(Stream0, I), + % Test bit 0: shift bit 0 to MSB and branch if positive (bit was 0/false) + I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), + % branch if negative (bit was 1/true) + I2 = jit_armv6m_asm:bcc(mi, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, {tbnz, Reg, 0}, 0}; + {State2, mi, byte_size(I1)}; if_block_cond( - #state{stream_module = StreamModule, stream = Stream0} = State0, + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, {'(bool)', RegOrTuple, '!=', false} ) -> Reg = @@ -803,12 +776,15 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - % Test lowest bit - I = jit_armv6m_asm:tbz(Reg, 0, 0), - Stream1 = StreamModule:append(Stream0, I), + % Test bit 0: shift bit 0 to MSB and branch if negative (bit was 1/true) + I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), + % branch if positive (bit was 0/false) + I2 = jit_armv6m_asm:bcc(pl, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, {tbz, Reg, 0}, 0}; + {State2, pl, byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -822,25 +798,27 @@ if_block_cond( {free, Reg0} -> Reg0; RegOrTuple -> RegOrTuple end, - % Test bits - TestCode = - try - jit_armv6m_asm:tst(Reg, Val) - catch - error:{unencodable_immediate, Val} -> + % Test bits - optimize for low bits masks that can use lsls + {TestCode, BranchCond} = + case bit_test_optimization(Val) of + {low_bits_mask, BitCount} -> + % Low bits mask: use lsls to shift high bits away + ShiftAmount = 32 - BitCount, + TestCode0 = jit_armv6m_asm:lsls(Temp, Reg, ShiftAmount), + % branch if not zero (any low bit was set) + {TestCode0, ne}; + no_optimization -> + % General case: use mov+tst TestCode0 = jit_armv6m_asm:mov(Temp, Val), TestCode1 = jit_armv6m_asm:tst(Reg, Temp), - <> + {<>, eq} end, - I2 = jit_armv6m_asm:bcc(eq, 0), - Code = << - TestCode/binary, - I2/binary - >>, + I2 = jit_armv6m_asm:bcc(BranchCond, 0), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(TestCode)}; + {State2, BranchCond, byte_size(TestCode)}; if_block_cond( #state{ stream_module = StreamModule, @@ -851,16 +829,19 @@ if_block_cond( ) when ?IS_GPR(Reg) -> % AND with mask OffsetBefore = StreamModule:offset(Stream0), - State1 = op_imm(State0, and_, Temp, Reg, Mask), - Stream1 = State1#state.stream, + I1 = jit_armv6m_asm:mov(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + State2 = and_(State0, Temp, Mask), + Stream2 = State2#state.stream, % Compare with value I2 = jit_armv6m_asm:cmp(Temp, Val), - Stream2 = StreamModule:append(Stream1, I2), - OffsetAfter = StreamModule:offset(Stream2), + Stream3 = StreamModule:append(Stream2, I2), + OffsetAfter = StreamModule:offset(Stream3), I3 = jit_armv6m_asm:bcc(eq, 0), - Stream3 = StreamModule:append(Stream2, I3), - State2 = State1#state{stream = Stream3}, - {State2, eq, OffsetAfter - OffsetBefore}; + Stream4 = StreamModule:append(Stream3, I3), + State3 = State1#state{stream = Stream4}, + {State3, eq, OffsetAfter - OffsetBefore}; if_block_cond( #state{ stream_module = StreamModule, @@ -893,6 +874,19 @@ if_block_free_reg({free, Reg}, State0) -> if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> State0. +%% Helper function to determine if a bit test can be optimized using lsls +-spec bit_test_optimization(non_neg_integer()) -> + {low_bits_mask, non_neg_integer()} | no_optimization. +% ?TERM_PRIMARY_MASK +bit_test_optimization(16#3) -> {low_bits_mask, 2}; +% +bit_test_optimization(16#7) -> {low_bits_mask, 3}; +% ?TERM_IMMED_TAG_MASK +bit_test_optimization(16#F) -> {low_bits_mask, 4}; +% ?TERM_BOXED_TAG_MASK or ?TERM_IMMED2_TAG_MASK +bit_test_optimization(16#3F) -> {low_bits_mask, 6}; +bit_test_optimization(_) -> no_optimization. + -spec merge_used_regs(state(), [armv6m_register()]) -> state(). merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ Reg | T @@ -1757,41 +1751,40 @@ get_module_index( Reg }. -op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, Reg, Reg, Val) -> +and_( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Val +) -> + I1 = jit_armv6m_asm:mov(Temp, Val), + I2 = jit_armv6m_asm:ands(Reg, Temp), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}. + +or_( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Val +) -> + I1 = jit_armv6m_asm:mov(Temp, Val), + I2 = jit_armv6m_asm:orrs(Reg, Temp), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> Stream1 = try - I = jit_armv6m_asm:Op(Reg, Reg, Val), + I = jit_armv6m_asm:adds(Reg, Val), StreamModule:append(Stream0, I) catch error:{unencodable_immediate, Val} -> [Temp | _] = State#state.available_regs, I1 = jit_armv6m_asm:mov(Temp, Val), - I2 = jit_armv6m_asm:Op(Reg, Reg, Temp), + I2 = jit_armv6m_asm:adds(Reg, Temp), StreamModule:append(Stream0, <>) end, - State#state{stream = Stream1}; -op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, RegB, Val) -> - Stream1 = - try - I = jit_armv6m_asm:Op(RegA, RegB, Val), - StreamModule:append(Stream0, I) - catch - error:{unencodable_immediate, Val} -> - MoveI = jit_armv6m_asm:mov(RegA, Val), - AndI = jit_armv6m_asm:Op(RegA, RegB, RegA), - StreamModule:append(Stream0, <>) - end, State#state{stream = Stream1}. -and_(State, Reg, Val) -> - op_imm(State, and_, Reg, Reg, Val). - -or_(State, Reg, Val) -> - op_imm(State, orr, Reg, Reg, Val). - -add(State, Reg, Val) -> - op_imm(State, add, Reg, Reg, Val). - sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_armv6m_asm:sub(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 0d0e19c769..46caf10c9d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -289,7 +289,7 @@ increment_sp_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -if_block_test_DISABLED_() -> +if_block_test_() -> {setup, fun() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -309,10 +309,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: b6f80047 tbz x7, #63, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d500 bpl.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -327,11 +328,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: eb0800ff cmp x7, x8\n" - " c: 5400004a b.ge 0x14 // b.tcont\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 42b7 cmp r7, r6\n" + " 6: da00 bge.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -346,10 +347,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: b5000047 cbnz x7, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -364,10 +366,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: b5000047 cbnz x7, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -382,10 +385,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 35000047 cbnz w7, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -400,10 +404,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 35000047 cbnz w7, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -418,11 +423,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: f100ecff cmp x7, #0x3b\n" - " c: 54000040 b.eq 0x14 // b.none\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -437,11 +442,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: f100ecff cmp x7, #0x3b\n" - " c: 54000040 b.eq 0x14 // b.none\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -456,11 +461,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 7100a8ff cmp w7, #0x2a\n" - " c: 54000040 b.eq 0x14 // b.none\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -475,11 +480,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 7100a8ff cmp w7, #0x2a\n" - " c: 54000040 b.eq 0x14 // b.none\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -494,11 +499,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: f100ecff cmp x7, #0x3b\n" - " c: 54000041 b.ne 0x14 // b.any\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -513,11 +518,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: f100ecff cmp x7, #0x3b\n" - " c: 54000041 b.ne 0x14 // b.any\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -532,11 +537,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 7100a8ff cmp w7, #0x2a\n" - " c: 54000041 b.ne 0x14 // b.any\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -551,11 +556,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 7100a8ff cmp w7, #0x2a\n" - " c: 54000041 b.ne 0x14 // b.any\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -570,10 +575,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 37000047 tbnz w7, #0, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d400 bmi.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -588,10 +594,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 37000047 tbnz w7, #0, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d400 bmi.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -606,10 +613,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 36000047 tbz w7, #0, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d500 bpl.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -624,10 +632,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 36000047 tbz w7, #0, 0x10\n" - " c: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d500 bpl.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -642,11 +651,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: f24008ff tst x7, #0x7\n" - " c: 54000040 b.eq 0x14 // b.none\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 077d lsls r5, r7, #29\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -661,12 +670,12 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: d28000a9 mov x9, #0x5 // #5\n" - " c: ea0900ff tst x7, x9\n" - " 10: 54000040 b.eq 0x18 // b.none\n" - " 14: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2505 movs r5, #5\n" + " 6: 422f tst r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -681,11 +690,11 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: f24008ff tst x7, #0x7\n" - " c: 54000040 b.eq 0x14 // b.none\n" - " 10: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 077d lsls r5, r7, #29\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -700,12 +709,13 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 92400ce9 and x9, x7, #0xf\n" - " c: f1003d3f cmp x9, #0xf\n" - " 10: 54000040 b.eq 0x18 // b.none\n" - " 14: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 250f movs r5, #15\n" + " 6: 402d ands r5, r5\n" + " 8: 2d0f cmp r5, #15\n" + " a: d000 beq.n 0xe\n" + " c: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -720,12 +730,13 @@ if_block_test_DISABLED_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: 92400ce7 and x7, x7, #0xf\n" - " c: f1003cff cmp x7, #0xf\n" - " 10: 54000040 b.eq 0x18 // b.none\n" - " 14: 91000908 add x8, x8, #0x2" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 250f movs r5, #15\n" + " 6: 402f ands r7, r5\n" + " 8: 2f0f cmp r7, #15\n" + " a: d000 beq.n 0xe\n" + " c: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -733,6 +744,95 @@ if_block_test_DISABLED_() -> ] end}. +%% Test coverage for bitwise AND optimization paths +bitwise_and_optimization_test_() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 6}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 7}), + [ + %% Test optimized case: 16#3 (low bits mask, 2 bits) - lsls r5, r7, #30 + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 16#3, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 07bd lsls r5, r7, #30\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end), + %% Test optimized case: 16#F (low bits mask, 4 bits) - lsls r5, r7, #28 + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 16#F, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 073d lsls r5, r7, #28\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end), + %% Test optimized case: 16#3F (low bits mask, 6 bits) - lsls r5, r7, #26 + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 16#3F, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 06bd lsls r5, r7, #26\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end), + %% Test non-optimized case: 5 (neither single bit nor low bits mask) - mov+tst + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 2505 movs r5, #5\n" + " 6: 422f tst r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end) + ]. + if_else_block_test_DISABLED() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), From 229aa66d534ab618421c8c7a180837cf641f94c6 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 18:20:19 +0200 Subject: [PATCH 14/97] armv6m: fix is_boolean_test Signed-off-by: Paul Guyot --- tests/libs/jit/jit_armv6m_tests.erl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 46caf10c9d..72aca4515c 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1095,7 +1095,7 @@ is_number_test_DISABLED() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -is_boolean_test_DISABLED() -> +is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), Label = 1, {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), @@ -1111,12 +1111,12 @@ is_boolean_test_DISABLED() -> State4 = ?BACKEND:update_branches(State3, Labels), Stream = ?BACKEND:stream(State4), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f1012cff cmp x7, #0x4b\n" - " 8: 54000080 b.eq 0x18 // b.none\n" - " c: f1002cff cmp x7, #0xb\n" - " 10: 54000040 b.eq 0x18 // b.none\n" - " 14: 14000041 b 0x118" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2f4b cmp r7, #75 ; 0x4b\n" + " 4: d002 beq.n 0xc\n" + " 6: 2f0b cmp r7, #11\n" + " 8: d000 beq.n 0xc\n" + " a: e07f b.n 0x10c" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 31e2d8dbcb6e0ced78464e53ee5bea4d1d916fd7 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 18:49:37 +0200 Subject: [PATCH 15/97] armv6m: fix if_block & fix is_number_test Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 146 +++++++++++++++------------- tests/libs/jit/jit_armv6m_tests.erl | 59 ++++++----- 2 files changed, 112 insertions(+), 93 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index a82a6caae7..175f1690f6 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -823,7 +823,7 @@ if_block_cond( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] + available_regs = [Temp | AT] } = State0, {Reg, '&', Mask, '!=', Val} ) when ?IS_GPR(Reg) -> @@ -832,7 +832,7 @@ if_block_cond( I1 = jit_armv6m_asm:mov(Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), State1 = State0#state{stream = Stream1}, - State2 = and_(State0, Temp, Mask), + State2 = and_(State1#state{available_regs = AT}, Temp, Mask), Stream2 = State2#state.stream, % Compare with value I2 = jit_armv6m_asm:cmp(Temp, Val), @@ -840,7 +840,7 @@ if_block_cond( OffsetAfter = StreamModule:offset(Stream3), I3 = jit_armv6m_asm:bcc(eq, 0), Stream4 = StreamModule:append(Stream3, I3), - State3 = State1#state{stream = Stream4}, + State3 = State2#state{stream = Stream4, available_regs = [Temp | State2#state.available_regs]}, {State3, eq, OffsetAfter - OffsetBefore}; if_block_cond( #state{ @@ -1219,51 +1219,9 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when is_integer(N) -> - StreamModule = State0#state.stream_module, - Stream0 = State0#state.stream, - CurrentOffset = StreamModule:offset(Stream0), - - %% Calculate where literal will be placed (must be word-aligned) - %% After LDR (2 bytes) + Branch (2 bytes) = 4 bytes from current position - OffsetAfterInstructions = CurrentOffset + 4, - %% Find next word-aligned position for literal - LiteralPosition = - case OffsetAfterInstructions rem 4 of - % Already aligned - 0 -> OffsetAfterInstructions; - % Add 2 bytes padding to align - _ -> OffsetAfterInstructions + 2 - end, - PaddingNeeded = LiteralPosition - OffsetAfterInstructions, - - %% Calculate LDR PC-relative offset - %% PC = (current_instruction_address & ~3) + 4 - LdrInstructionAddr = CurrentOffset, - LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, - LiteralOffset = LiteralPosition - LdrPC, - - %% Generate: ldr rTemp, [pc, #LiteralOffset] ; Load from literal - I1 = jit_armv6m_asm:ldr(Temp, {pc, LiteralOffset}), - %% Calculate branch offset - %% Branch is at CurrentOffset + 2, need to jump past literal - BranchPosition = CurrentOffset + 2, - % After the 4-byte literal - TargetPosition = LiteralPosition + 4, - BranchOffset = TargetPosition - BranchPosition, - I2 = jit_armv6m_asm:b(BranchOffset), - %% Generate padding if needed (just zeros) - Padding = - case PaddingNeeded of - 0 -> <<>>; - % 2 bytes of padding - 2 -> <<0:16>> - end, - %% Generate: .word N ; The 32-bit literal - I3 = <>, - - Stream1 = StreamModule:append(Stream0, <>), - State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), - State1#state{available_regs = AR0}; + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + State2#state{available_regs = AR0}; % Source is a VM register move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)), @@ -1752,37 +1710,87 @@ get_module_index( }. and_( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val ) -> - I1 = jit_armv6m_asm:mov(Temp, Val), - I2 = jit_armv6m_asm:ands(Reg, Temp), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}. + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:ands(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. or_( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val ) -> - I1 = jit_armv6m_asm:mov(Temp, Val), - I2 = jit_armv6m_asm:orrs(Reg, Temp), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}. + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:orrs(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) -> + try jit_armv6m_asm:adds(Reg, Val) of + I -> + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1} + catch + error:{unencodable_immediate, Val} -> + [Temp | AT] = State0#state.available_regs, + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:adds(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2} + end. + +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I = jit_armv6m_asm:mov(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + %% Use a literal pool with a branch instruction (branch-over pattern) + %% Calculate where literal will be placed (must be word-aligned) + %% After LDR (2 bytes) + Branch (2 bytes) = 4 bytes from current position + CurrentOffset = StreamModule:offset(Stream0), + OffsetAfterInstructions = CurrentOffset + 4, + %% Find next word-aligned position for literal + LiteralPosition = + case OffsetAfterInstructions rem 4 of + % Already aligned + 0 -> OffsetAfterInstructions; + % Add 2 bytes padding to align + _ -> OffsetAfterInstructions + 2 + end, + PaddingNeeded = LiteralPosition - OffsetAfterInstructions, -add(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - Stream1 = - try - I = jit_armv6m_asm:adds(Reg, Val), - StreamModule:append(Stream0, I) - catch - error:{unencodable_immediate, Val} -> - [Temp | _] = State#state.available_regs, - I1 = jit_armv6m_asm:mov(Temp, Val), - I2 = jit_armv6m_asm:adds(Reg, Temp), - StreamModule:append(Stream0, <>) + %% Calculate LDR PC-relative offset + %% PC = (current_instruction_address & ~3) + 4 + LdrInstructionAddr = CurrentOffset, + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + + %% Generate: ldr rTemp, [pc, #LiteralOffset] ; Load from literal + I1 = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), + %% Calculate branch offset + %% Branch is at CurrentOffset + 2, need to jump past literal + BranchPosition = CurrentOffset + 2, + % After the 4-byte literal + TargetPosition = LiteralPosition + 4, + BranchOffset = TargetPosition - BranchPosition, + I2 = jit_armv6m_asm:b(BranchOffset), + %% Generate padding if needed (just zeros) + Padding = + case PaddingNeeded of + 0 -> <<>>; + % 2 bytes of padding + 2 -> <<0:16>> end, + Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}. sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 72aca4515c..94eed3d683 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -711,11 +711,12 @@ if_block_test_() -> Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 250f movs r5, #15\n" - " 6: 402d ands r5, r5\n" - " 8: 2d0f cmp r5, #15\n" - " a: d000 beq.n 0xe\n" - " c: 3602 adds r6, #2" + " 4: 1c3d adds r5, r7, #0\n" + " 6: 240f movs r4, #15\n" + " 8: 4025 ands r5, r4\n" + " a: 2d0f cmp r5, #15\n" + " c: d000 beq.n 0x10\n" + " e: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -1045,7 +1046,7 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> MMod:jump_to_label(BSt0, Label) end). -is_number_test_DISABLED() -> +is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), Label = 1, Arg1 = {x_reg, 0}, @@ -1075,23 +1076,33 @@ is_number_test_DISABLED() -> State4 = ?BACKEND:update_branches(State3, Labels), Stream = ?BACKEND:stream(State4), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 92400ce8 and x8, x7, #0xf\n" - " 8: f1003d1f cmp x8, #0xf\n" - " c: 540001c0 b.eq 0x44 // b.none\n" - " 10: 924004e8 and x8, x7, #0x3\n" - " 14: f100091f cmp x8, #0x2\n" - " 18: 54000040 b.eq 0x20 // b.none\n" - " 1c: 1400004a b 0x144\n" - " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 24: f94000e7 ldr x7, [x7]\n" - " 28: 924014e8 and x8, x7, #0x3f\n" - " 2c: f100211f cmp x8, #0x8\n" - " 30: 540000a0 b.eq 0x44 // b.none\n" - " 34: 924014e7 and x7, x7, #0x3f\n" - " 38: f10060ff cmp x7, #0x18\n" - " 3c: 54000040 b.eq 0x44 // b.none\n" - " 40: 14000041 b 0x144" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 1c3e adds r6, r7, #0\n" + " 4: 250f movs r5, #15\n" + " 6: 402e ands r6, r5\n" + " 8: 2e0f cmp r6, #15\n" + " a: d015 beq.n 0x38\n" + " c: 1c3e adds r6, r7, #0\n" + " e: 2503 movs r5, #3\n" + " 10: 402e ands r6, r5\n" + " 12: 2e02 cmp r6, #2\n" + " 14: d000 beq.n 0x18\n" + " 16: e08f b.n 0x138\n" + " 18: 4e00 ldr r6, [pc, #0] ; (0x1c)\n" + " 1a: e001 b.n 0x20\n" + " 1c: fffc ffff ; instruction: 0xfffcffff\n" + " 20: 4037 ands r7, r6\n" + " 22: 683f ldr r7, [r7, #0]\n" + " 24: 1c3e adds r6, r7, #0\n" + " 26: 253f movs r5, #63 ; 0x3f\n" + " 28: 402e ands r6, r5\n" + " 2a: 2e08 cmp r6, #8\n" + " 2c: d004 beq.n 0x38\n" + " 2e: 263f movs r6, #63 ; 0x3f\n" + " 30: 4037 ands r7, r6\n" + " 32: 2f18 cmp r7, #24\n" + " 34: d000 beq.n 0x38\n" + " 36: e07f b.n 0x138" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1895,7 +1906,7 @@ dump_to_bin0(<>, hex, Acc) -> InstrA = list_to_integer([H1, H2, H3, H4], 16), InstrB = list_to_integer([H5, H6, H7, H8], 16), - dump_to_bin0(Rest, instr, [<>, <> | Acc]); + dump_to_bin0(Rest, instr, [<>, <> | Acc]); %% Handle 16-bit ARM32 Thumb instructions (4 hex digits) dump_to_bin0(<>, hex, Acc) when (Sp =:= $\t orelse Sp =:= $\s) andalso From 933834bd91471463010079ae469b8defe1fdf8b3 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 20:41:28 +0200 Subject: [PATCH 16/97] armv6m: fix is_integer_test and optimize implementation Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 46 ++++++++++ libs/jit/src/jit_armv6m_asm.erl | 32 +++++++ tests/libs/jit/jit_armv6m_tests.erl | 138 ++++++++++++++++++---------- 3 files changed, 170 insertions(+), 46 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 175f1690f6..ad9a329b81 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -819,6 +819,38 @@ if_block_cond( State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, {State2, BranchCond, byte_size(TestCode)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_armv6m_asm:mvns(Temp, Reg), + % 32 - 4 + I2 = jit_armv6m_asm:lsls(Temp, Temp, 28), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, + {State1, eq, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_armv6m_asm:mvns(Reg, Reg), + % 32 - 4 + I2 = jit_armv6m_asm:lsls(Reg, Reg, 28), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, eq, byte_size(I1) + byte_size(I2)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1709,6 +1741,20 @@ get_module_index( Reg }. +%% @doc Perform an AND of a register with an immediate. +%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to +%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool +%% by using BICS for -4. +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:bics(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index b569140a62..21834c9c8c 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -30,6 +30,7 @@ bx/1, cmp/2, ands/2, + bics/2, orrs/2, ldr/2, lsls/2, @@ -38,6 +39,7 @@ lsrs/3, mov/2, movs/2, + mvns/2, str/2, tst/2, stp/4, @@ -289,6 +291,21 @@ movs(Rd, Rm) when error({movs_requires_low_registers, {Rd, Rm}}) end. +%% MVNS bitwise NOT +-spec mvns(arm_gpr_register(), arm_gpr_register()) -> binary(). +mvns(Rd, Rm) when + is_atom(Rd), is_atom(Rm) +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + case RdNum =< 7 andalso RmNum =< 7 of + true -> + %% Thumb MOVS register: 0000000000mmmdddd + <<(16#43D0 bor (RmNum bsl 3) bor RdNum):16/little>>; + false -> + error({mvns_requires_low_registers, {Rd, Rm}}) + end. + %% ARMv6-M Thumb MOV instruction - handle both immediate and register moves -spec mov(arm_gpr_register(), arm_gpr_register() | integer()) -> binary(). %% MOV immediate (using MOVS for low registers with immediate 0-255) @@ -518,6 +535,21 @@ ands(Rd, Rm) when %% Thumb ANDS (2-operand): 0100000000mmmddd <<(16#4000 bor (RmNum bsl 3) bor RdNum):16/little>>. +%% Emit an BICS instruction (bitwise AND with complement) +-spec bics(arm_gpr_register(), arm_gpr_register()) -> binary(). +bics(Rd, Rm) when + is_atom(Rd), + is_atom(Rm), + Rd =/= sp, + Rd =/= pc, + Rm =/= sp, + Rm =/= pc +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb ANDS (2-operand): 0100000000mmmddd + <<(16#4380 bor (RmNum bsl 3) bor RdNum):16/little>>. + %% ARMv6-M Thumb ORRS instruction (register only - sets flags) -spec orrs(arm_gpr_register(), arm_gpr_register()) -> binary(). orrs(Rd, Rm) when diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 94eed3d683..a2ede1bbae 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -708,13 +708,53 @@ if_block_test_() -> end ), Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 43fd mvns r5, r7\n" + " 6: 072d lsls r5, r5, #28\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 43ff mvns r7, r7\n" + " 6: 073f lsls r7, r7, #28\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" " 4: 1c3d adds r5, r7, #0\n" - " 6: 240f movs r4, #15\n" + " 6: 243f movs r4, #63 ; 0x3f\n" " 8: 4025 ands r5, r4\n" - " a: 2d0f cmp r5, #15\n" + " a: 2d08 cmp r5, #8\n" " c: d000 beq.n 0x10\n" " e: 3602 adds r6, #2" >>, @@ -724,7 +764,13 @@ if_block_test_() -> ?_test(begin State1 = ?BACKEND:if_block( State0, - {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + { + {free, RegA}, + '&', + ?TERM_BOXED_TAG_MASK, + '!=', + ?TERM_BOXED_POSITIVE_INTEGER + }, fun(BSt0) -> ?BACKEND:add(BSt0, RegB, 2) end @@ -733,9 +779,9 @@ if_block_test_() -> Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 250f movs r5, #15\n" + " 4: 253f movs r5, #63 ; 0x3f\n" " 6: 402f ands r7, r5\n" - " 8: 2f0f cmp r7, #15\n" + " 8: 2f08 cmp r7, #8\n" " a: d000 beq.n 0xe\n" " c: 3602 adds r6, #2" >>, @@ -994,7 +1040,7 @@ get_list_test_DISABLED() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -is_integer_test_DISABLED() -> +is_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), Label = 1, Arg1 = {x_reg, 0}, @@ -1024,20 +1070,24 @@ is_integer_test_DISABLED() -> State4 = ?BACKEND:update_branches(State3, Labels), Stream = ?BACKEND:stream(State4), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 92400ce8 and x8, x7, #0xf\n" - " 8: f1003d1f cmp x8, #0xf\n" - " c: 54000160 b.eq 0x38 // b.none\n" - " 10: 924004e8 and x8, x7, #0x3\n" - " 14: f100091f cmp x8, #0x2\n" - " 18: 54000040 b.eq 0x20 // b.none\n" - " 1c: 14000047 b 0x138\n" - " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 24: f94000e7 ldr x7, [x7]\n" - " 28: 924014e7 and x7, x7, #0x3f\n" - " 2c: f10020ff cmp x7, #0x8\n" - " 30: 54000040 b.eq 0x38 // b.none\n" - " 34: 14000041 b 0x138" + "0: 6987 ldr r7, [r0, #24]\n" + " 2: 43fe mvns r6, r7\n" + " 4: 0736 lsls r6, r6, #28\n" + " 6: d00d beq.n 0x24\n" + " 8: 1c3e adds r6, r7, #0\n" + " a: 2503 movs r5, #3\n" + " c: 402e ands r6, r5\n" + " e: 2e02 cmp r6, #2\n" + " 10: d000 beq.n 0x14\n" + " 12: e087 b.n 0x124\n" + " 14: 2603 movs r6, #3\n" + " 16: 43b7 bics r7, r6\n" + " 18: 683f ldr r7, [r7, #0]\n" + " 1a: 263f movs r6, #63 ; 0x3f\n" + " 1c: 4037 ands r7, r6\n" + " 1e: 2f08 cmp r7, #8\n" + " 20: d000 beq.n 0x24\n" + " 22: e07f b.n 0x124" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1077,32 +1127,28 @@ is_number_test() -> Stream = ?BACKEND:stream(State4), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 1c3e adds r6, r7, #0\n" - " 4: 250f movs r5, #15\n" - " 6: 402e ands r6, r5\n" - " 8: 2e0f cmp r6, #15\n" - " a: d015 beq.n 0x38\n" - " c: 1c3e adds r6, r7, #0\n" - " e: 2503 movs r5, #3\n" - " 10: 402e ands r6, r5\n" - " 12: 2e02 cmp r6, #2\n" - " 14: d000 beq.n 0x18\n" - " 16: e08f b.n 0x138\n" - " 18: 4e00 ldr r6, [pc, #0] ; (0x1c)\n" - " 1a: e001 b.n 0x20\n" - " 1c: fffc ffff ; instruction: 0xfffcffff\n" - " 20: 4037 ands r7, r6\n" - " 22: 683f ldr r7, [r7, #0]\n" - " 24: 1c3e adds r6, r7, #0\n" - " 26: 253f movs r5, #63 ; 0x3f\n" - " 28: 402e ands r6, r5\n" - " 2a: 2e08 cmp r6, #8\n" - " 2c: d004 beq.n 0x38\n" - " 2e: 263f movs r6, #63 ; 0x3f\n" - " 30: 4037 ands r7, r6\n" - " 32: 2f18 cmp r7, #24\n" - " 34: d000 beq.n 0x38\n" - " 36: e07f b.n 0x138" + " 2: 43fe mvns r6, r7\n" + " 4: 0736 lsls r6, r6, #28\n" + " 6: d012 beq.n 0x2e\n" + " 8: 1c3e adds r6, r7, #0\n" + " a: 2503 movs r5, #3\n" + " c: 402e ands r6, r5\n" + " e: 2e02 cmp r6, #2\n" + " 10: d000 beq.n 0x14\n" + " 12: e08c b.n 0x12e\n" + " 14: 2603 movs r6, #3\n" + " 16: 43b7 bics r7, r6\n" + " 18: 683f ldr r7, [r7, #0]\n" + " 1a: 1c3e adds r6, r7, #0\n" + " 1c: 253f movs r5, #63 ; 0x3f\n" + " 1e: 402e ands r6, r5\n" + " 20: 2e08 cmp r6, #8\n" + " 22: d004 beq.n 0x2e\n" + " 24: 263f movs r6, #63 ; 0x3f\n" + " 26: 4037 ands r7, r6\n" + " 28: 2f18 cmp r7, #24\n" + " 2a: d000 beq.n 0x2e\n" + " 2c: e07f b.n 0x12e" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 96517065b406e5c61b5d0492f6fe9439951a03e1 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 10:47:16 +0200 Subject: [PATCH 17/97] armv6m: test comment after optimized is_number Signed-off-by: Paul Guyot --- tests/libs/jit/jit_armv6m_tests.erl | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index a2ede1bbae..7e25724321 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1096,6 +1096,7 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> MMod:jump_to_label(BSt0, Label) end). +%% Keep the unoptimized version to test the and case. is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), Label = 1, From f27fc7ed1992299fb4065d74b6a83f4e5caad333 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 20:54:43 +0200 Subject: [PATCH 18/97] armv6m: fix get_list test Signed-off-by: Paul Guyot --- tests/libs/jit/jit_armv6m_tests.erl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 7e25724321..00596f8118 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1019,24 +1019,25 @@ call_bif_with_large_literal_integer_test_DISABLED() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -get_list_test_DISABLED() -> +get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, -4), + State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), ?BACKEND:assert_all_native_free(State5), Stream = ?BACKEND:stream(State5), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 8: f9401408 ldr x8, [x0, #40]\n" - " c: f94004e9 ldr x9, [x7, #8]\n" - " 10: f9000509 str x9, [x8, #8]\n" - " 14: f9401408 ldr x8, [x0, #40]\n" - " 18: f94000e9 ldr x9, [x7]\n" - " 1c: f9000109 str x9, [x8]" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2603 movs r6, #3\n" + " 4: 43b7 bics r7, r6\n" + " 6: 6946 ldr r6, [r0, #20]\n" + " 8: 687d ldr r5, [r7, #4]\n" + " a: 6075 str r5, [r6, #4]\n" + " c: 6946 ldr r6, [r0, #20]\n" + " e: 683d ldr r5, [r7, #0]\n" + " 10: 6035 str r5, [r6, #0]" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 95d47592f4e6cc77352acfe2c7cca5494a5057df Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 20:58:55 +0200 Subject: [PATCH 19/97] armv6m: fix if_else_block_test Signed-off-by: Paul Guyot --- tests/libs/jit/jit_armv6m_tests.erl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 00596f8118..90d64403bf 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -880,7 +880,7 @@ bitwise_and_optimization_test_() -> end) ]. -if_else_block_test_DISABLED() -> +if_else_block_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), @@ -897,13 +897,13 @@ if_else_block_test_DISABLED() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f9401c08 ldr x8, [x0, #56]\n" - " 8: f100ecff cmp x7, #0x3b\n" - " c: 54000061 b.ne 0x18 // b.any\n" - " 10: 91000908 add x8, x8, #0x2\n" - " 14: 14000002 b 0x1c\n" - " 18: 91001108 add x8, x8, #0x4" + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d101 bne.n 0xc\n" + " 8: 3602 adds r6, #2\n" + " a: e000 b.n 0xe\n" + " c: 3604 adds r6, #4" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 74e60aca4be8d0e834ae60b1727b01333bbe4b65 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 24 Aug 2025 21:24:54 +0200 Subject: [PATCH 20/97] armv6m: fix mul Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 31 +++++++++++----------- tests/libs/jit/jit_armv6m_tests.erl | 40 ++++++++++++++--------------- 2 files changed, 36 insertions(+), 35 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index ad9a329b81..f94d930e3d 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1849,38 +1849,38 @@ mul(State, _Reg, 1) -> mul(State, Reg, 2) -> shift_left(State, Reg, 1); mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> - I1 = jit_armv6m_asm:lsl(Temp, Reg, 1), - I2 = jit_armv6m_asm:add(Reg, Temp, Reg), + I1 = jit_armv6m_asm:lsls(Temp, Reg, 1), + I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 4) -> shift_left(State, Reg, 2); mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> - I1 = jit_armv6m_asm:lsl(Temp, Reg, 2), - I2 = jit_armv6m_asm:add(Reg, Temp, Reg), + I1 = jit_armv6m_asm:lsls(Temp, Reg, 2), + I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State0, Reg, 6) -> State1 = mul(State0, Reg, 3), mul(State1, Reg, 2); mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> - I1 = jit_armv6m_asm:lsl(Temp, Reg, 3), - I2 = jit_armv6m_asm:sub(Reg, Temp, Reg), + I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), + I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 8) -> shift_left(State, Reg, 3); mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> - I1 = jit_armv6m_asm:lsl(Temp, Reg, 3), - I2 = jit_armv6m_asm:add(Reg, Temp, Reg), + I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), + I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State0, Reg, 10) -> State1 = mul(State0, Reg, 5), mul(State1, Reg, 2); mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> - I1 = jit_armv6m_asm:lsl(Temp, Reg, 4), - I2 = jit_armv6m_asm:sub(Reg, Temp, Reg), + I1 = jit_armv6m_asm:lsls(Temp, Reg, 4), + I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), Stream1 = (State#state.stream_module):append(State#state.stream, <>), State#state{stream = Stream1}; mul(State, Reg, 16) -> @@ -1890,15 +1890,16 @@ mul(State, Reg, 32) -> mul(State, Reg, 64) -> shift_left(State, Reg, 6); mul( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val ) -> % multiply by decomposing by power of 2 - I1 = jit_armv6m_asm:mov(Temp, Val), - I2 = jit_armv6m_asm:mul(Reg, Reg, Temp), - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}. + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:muls(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 90d64403bf..e20e41eda6 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1850,7 +1850,7 @@ mul_test0(State0, Reg, Imm, Dump) -> Stream = ?BACKEND:stream(State1), ?assertEqual(dump_to_bin(Dump), Stream). -mul_test_DISABLED_() -> +mul_test_() -> {setup, fun() -> ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) @@ -1859,61 +1859,61 @@ mul_test_DISABLED_() -> [ ?_test(begin mul_test0(State0, r2, 2, << - "0: d37ff842 lsl x2, x2, #1" + " 0: 0052 lsls r2, r2, #1" >>) end), ?_test(begin mul_test0(State0, r2, 3, << - " 0: d37ff847 lsl x7, x2, #1\n" - " 4: 8b0200e2 add x2, x7, x2" + " 0: 0057 lsls r7, r2, #1\n" + " 2: 18ba adds r2, r7, r2" >>) end), ?_test(begin mul_test0(State0, r2, 4, << - "0: d37ef442 lsl x2, x2, #2" + " 0: 0092 lsls r2, r2, #2" >>) end), ?_test(begin mul_test0(State0, r2, 5, << - " 0: d37ef447 lsl x7, x2, #2\n" - " 4: 8b0200e2 add x2, x7, x2" + " 0: 0097 lsls r7, r2, #2\n" + " 2: 18ba adds r2, r7, r2" >>) end), ?_test(begin mul_test0(State0, r2, 6, << - " 0: d37ff847 lsl x7, x2, #1\n" - " 4: 8b0200e2 add x2, x7, x2\n" - " 8: d37ff842 lsl x2, x2, #1" + " 0: 0057 lsls r7, r2, #1\n" + " 2: 18ba adds r2, r7, r2\n" + " 4: 0052 lsls r2, r2, #1" >>) end), ?_test(begin mul_test0(State0, r2, 7, << - " 0: d37df047 lsl x7, x2, #3\n" - " 4: cb0200e2 sub x2, x7, x2" + " 0: 00d7 lsls r7, r2, #3\n" + " 2: 1aba subs r2, r7, r2" >>) end), ?_test(begin mul_test0(State0, r2, 8, << - "0: d37df042 lsl x2, x2, #3" + " 0: 00d2 lsls r2, r2, #3" >>) end), ?_test(begin mul_test0(State0, r2, 9, << - " 0: d37df047 lsl x7, x2, #3\n" - " 4: 8b0200e2 add x2, x7, x2" + " 0: 00d7 lsls r7, r2, #3\n" + " 2: 18ba adds r2, r7, r2" >>) end), ?_test(begin mul_test0(State0, r2, 10, << - " 0: d37ef447 lsl x7, x2, #2\n" - " 4: 8b0200e2 add x2, x7, x2\n" - " 8: d37ff842 lsl x2, x2, #1" + " 0: 0097 lsls r7, r2, #2\n" + " 2: 18ba adds r2, r7, r2\n" + " 4: 0052 lsls r2, r2, #1" >>) end), ?_test(begin mul_test0(State0, r2, 11, << - " 0: d2800167 mov x7, #0xb // #11\n" - " 4: 9b077c42 mul x2, x2, x7" + " 0: 270b movs r7, #11\n" + " 2: 437a muls r2, r7" >>) end) ] From 47d4c4fcdf465975329b5df85dfa4b8dbb2f112c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 25 Aug 2025 23:38:32 +0200 Subject: [PATCH 21/97] armv6m: function call with stack parameters, WIP Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 583 ++++++++++++++++++------ libs/jit/src/jit_armv6m_asm.erl | 369 +++++---------- tests/libs/jit/jit_armv6m_asm_tests.erl | 67 +++ tests/libs/jit/jit_armv6m_tests.erl | 304 +++++++++--- 4 files changed, 865 insertions(+), 458 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index f94d930e3d..0ed7884ff5 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -160,7 +160,6 @@ % ctx->e is 0x28 % ctx->x is 0x30 -define(CTX_REG, r0). --define(JITSTATE_REG, r1). -define(NATIVE_INTERFACE_REG, r2). -define(Y_REGS, {?CTX_REG, 16#14}). -define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). @@ -168,9 +167,11 @@ -define(FP_REGS, {?CTX_REG, 16#60}). -define(BS, {?CTX_REG, 16#64}). -define(BS_OFFSET, {?CTX_REG, 16#68}). --define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). --define(JITSTATE_CONTINUATION, {?JITSTATE_REG, 16#4}). --define(JITSTATE_REDUCTIONCOUNT, {?JITSTATE_REG, 16#8}). +% JITSTATE is on stack, accessed via stack offset +% These macros now expect a register that contains the jit_state pointer +-define(JITSTATE_MODULE(Reg), {Reg, 0}). +-define(JITSTATE_CONTINUATION(Reg), {Reg, 16#4}). +-define(JITSTATE_REDUCTIONCOUNT(Reg), {Reg, 16#8}). -define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). -define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). @@ -182,6 +183,10 @@ %% Intra-procedure call scratch register -define(IP_REG, r12). +%% Stack offset for function prolog: push {r1,r4,r5,r6,r7,lr} +%% r1 (JITSTATE_REG) is at SP+0 after push +-define(STACK_OFFSET_JITSTATE, 0). + -define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). -define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). -define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). @@ -198,6 +203,7 @@ %% Reorder to match AArch64 test expectations (r7 first) -define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1, r12]). -define(PARAMETER_REGS, [r0, r1, r2, r3]). +-define(SCRATCH_REGS, [r7, r6, r5, r4, r3, r2, r1, r0, r12]). %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. @@ -387,6 +393,26 @@ update_branches( Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), update_branches(State#state{stream = Stream1, branches = BranchesT}, Labels). +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), armv6m_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, 0}); + N when N * 4 =< 124 -> + jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, N * 4}); + N -> + % For large offsets, load offset into TargetReg then use register addressing + I1 = jit_armv6m_asm:movs(TargetReg, N * 4), + I2 = jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, TargetReg}), + <> + end. + %%----------------------------------------------------------------------------- %% @doc Emit a call (call with return) to a primitive with arguments. This %% function converts arguments and pass them following the backend ABI @@ -401,21 +427,22 @@ update_branches( call_primitive( #state{ stream_module = StreamModule, - stream = Stream0 + stream = Stream0, + available_regs = [TempReg | RestRegs], + used_regs = UsedRegs } = State, Primitive, Args ) -> - PrepCall = - case Primitive of - 0 -> - jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, 0}); - N -> - jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, N * 8}) - end, + % Use a low register for LDR since ARM Thumb LDR only works with low registers + PrepCall = load_primitive_ptr(Primitive, TempReg), Stream1 = StreamModule:append(Stream0, PrepCall), - StateCall = State#state{stream = Stream1}, - call_func_ptr(StateCall, {free, ?IP_REG}, Args). + StateCall = State#state{ + stream = Stream1, + available_regs = RestRegs, + used_regs = [TempReg | UsedRegs] + }, + call_func_ptr(StateCall, {free, TempReg}, Args). %%----------------------------------------------------------------------------- %% @doc Emit a jump (call without return) to a primitive with arguments. This @@ -443,24 +470,147 @@ call_primitive_last( ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, [Temp | AvailableRegs1] = ScratchRegs, UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, - PrepCall = - case Primitive of - 0 -> - jit_armv6m_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, 0}); - N -> - jit_armv6m_asm:ldr(Temp, {?NATIVE_INTERFACE_REG, N * 8}) - end, + PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), - State1 = set_args( - State0#state{ - stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs - }, - Args - ), - #state{stream = Stream2} = State1, - Call = jit_armv6m_asm:br(Temp), - Stream3 = StreamModule:append(Stream2, Call), - State1#state{stream = Stream3, available_regs = ?AVAILABLE_REGS, used_regs = []}. + % Assert that jit_state is the second argument for tail_call_with_jit_state + [FirstArg, jit_state | ArgsT] = Args, + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + + % Handle arguments differently for tail calls with 5+ arguments + case length(Args) of + NumArgs when NumArgs >= 5 -> + % For tail calls with 5+ args, set first 4 args in registers without stack allocation + State1 = set_args_registers_only( + State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + lists:sublist(ArgsForTailCall, 4) + ), + % 5th argument needs to be moved to r5 + FifthArg = lists:nth(5, ArgsForTailCall), + State2 = + case FifthArg of + % Already in r5 + {free, r5} -> + State1; + % Already in r5 + r5 -> + State1; + % Handle {free, Reg} - extract the register and move to r5 + {free, Reg} -> + move_to_native_register(State1, Reg, r5); + _ -> + % Move 5th argument to r5 + move_to_native_register(State1, FifthArg, r5) + end, + % Move function pointer to r1 if it's not already in r1 + #state{stream = Stream2} = State2, + {FinalFuncPtrReg, Stream3} = + case Temp of + % Already in r1, no move needed + r1 -> + {r1, Stream2}; + _ -> + % Move from Temp register to r1 + MoveToR1 = jit_armv6m_asm:mov(r1, Temp), + {r1, StreamModule:append(Stream2, MoveToR1)} + end, + State3 = tail_call_with_jit_state_stack( + State2#state{stream = Stream3}, FinalFuncPtrReg, NumArgs + ); + _ -> + % For 4 or fewer args, use standard argument setup + State1 = set_args( + State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + ArgsForTailCall + ), + State3 = tail_call_with_jit_state_registers_only(State1, Temp) + end, + State3#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + +%%----------------------------------------------------------------------------- +%% @doc Tail call to address in register, restoring prolog registers including +%% jit_state in r1. Only use when target function expects jit_state as second parameter. +%% Function prolog saves: push {r1,r4,r5,r6,r7,lr} +%% @end +%% @param State current backend state +%% @param Reg register containing the target address +%% @return Updated backend state +%%----------------------------------------------------------------------------- +tail_call_with_jit_state_registers_only( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Reg +) -> + % Standard tail call for 4 or fewer arguments + % First restore LR from stack (so target function can return properly) + % Choose temp register to avoid conflict with Reg + TempReg = + case Reg of + r7 -> r6; + _ -> r7 + end, + % Load saved LR to temp + RestoreLRToTemp = jit_armv6m_asm:ldr(TempReg, {sp, 20}), + % Store function pointer (pipeline friendly) + OverwriteLR = jit_armv6m_asm:str(Reg, {sp, 20}), + % Move saved LR to LR register + RestoreLR = jit_armv6m_asm:mov(lr, TempReg), + % Pop prolog registers: {r1,r4,r5,r6,r7,lr} where lr is now target address + % This restores jit_state in r1 and branches to target via pc + PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +tail_call_with_jit_state_stack( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + FuncPtrReg, + NumArgs +) when NumArgs >= 5 -> + % Tail call with 5 or 6 arguments - need to handle 5th (and 6th) stack parameters + % 5th argument is in r5, 6th argument (if present) is in r6, function pointer in FuncPtrReg + % Restore lr first (using r7 as temp since r6 might contain 6th arg), then r7, then r6 + + % Load lr value to r7 (temp) + LoadLRtoR7 = jit_armv6m_asm:ldr(r7, {sp, 20}), + % Move to lr + MoveLR = jit_armv6m_asm:mov(lr, r7), + % Restore r7 from stack + RestoreR7 = jit_armv6m_asm:ldr(r7, {sp, 16}), + % Store 5th arg where r7 was + Store5thArg = jit_armv6m_asm:str(r5, {sp, 16}), + % Store function ptr where lr was + StoreFuncPtr = jit_armv6m_asm:str(FuncPtrReg, {sp, 20}), + + % Handle 6th argument if present (NumArgs == 6) + {Store6thArg, RestoreR6, PopAndJump} = + case NumArgs of + 5 -> + % For 5 args: restore r6 from stack, pop r1,r4,r5,pc + RestoreR6_5 = jit_armv6m_asm:ldr(r6, {sp, 12}), + PopAndJump_5 = jit_armv6m_asm:pop([r1, r4, r5, pc]), + {<<>>, RestoreR6_5, PopAndJump_5}; + 6 -> + % For 6 args: store r6 (6th arg) where r6 was saved, pop r1,r4,r5,r6,pc + Store6thArg_6 = jit_armv6m_asm:str(r6, {sp, 12}), + PopAndJump_6 = jit_armv6m_asm:pop([r1, r4, r5, r6, pc]), + {Store6thArg_6, <<>>, PopAndJump_6} + end, + + Code = + <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. %%----------------------------------------------------------------------------- %% @doc Emit a return of a value if it's not equal to ctx. @@ -809,7 +959,7 @@ if_block_cond( {TestCode0, ne}; no_optimization -> % General case: use mov+tst - TestCode0 = jit_armv6m_asm:mov(Temp, Val), + TestCode0 = jit_armv6m_asm:movs(Temp, Val), TestCode1 = jit_armv6m_asm:tst(Reg, Temp), {<>, eq} end, @@ -997,8 +1147,8 @@ call_func_ptr( [FuncPtrTuple | Args] ), UsedRegs1 = UsedRegs0 -- FreeRegs, - SavedRegs = [?LR_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], - {SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0), + SavedRegs = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + {_SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0), % Set up arguments following AArch64 calling convention State1 = set_args(State0#state{stream = Stream1}, Args), @@ -1015,13 +1165,13 @@ call_func_ptr( 0 -> jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, 0}); N -> - jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, N * 8}) + jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, N * 4}) end, {?IP_REG, StreamModule:append(Stream2, PrepCall)} end, - % Call the function pointer (using BLR for call with return) - Call = jit_armv6m_asm:blr(FuncPtrReg), + % Call the function pointer (using BLX for call with return) + Call = jit_armv6m_asm:blx(FuncPtrReg), Stream4 = StreamModule:append(Stream3, Call), % If r0 is in used regs, save it to another temporary register @@ -1036,7 +1186,7 @@ call_func_ptr( {Stream4, r0} end, - Stream6 = pop_registers(SavedRegsOdd, lists:reverse(SavedRegs), StreamModule, Stream5), + Stream6 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream5), AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), @@ -1050,33 +1200,91 @@ call_func_ptr( ResultReg }. -push_registers([RegA, RegB | Tail], StreamModule, Stream0) -> - Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:stp(RegA, RegB, {sp, -16}, '!')), - push_registers(Tail, StreamModule, Stream1); +push_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> + IsOdd = (length(SavedRegs) rem 2) =:= 1, + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:push(SavedRegs)), + {IsOdd, Stream1}; push_registers([], _StreamModule, Stream0) -> - {false, Stream0}; -push_registers([RegA], StreamModule, Stream0) -> - Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:str(RegA, {sp, -16}, '!')), - {true, Stream1}. - -pop_registers(true, [Reg | Tail], StreamModule, Stream0) -> - % Odd number of registers, pop the last one first - Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:ldr(Reg, {sp}, 16)), - pop_registers(false, Tail, StreamModule, Stream1); -pop_registers(false, [], _StreamModule, Stream0) -> - Stream0; -pop_registers(false, [RegB, RegA | Tail], StreamModule, Stream0) -> - Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:ldp(RegA, RegB, {sp}, 16)), - pop_registers(false, Tail, StreamModule, Stream1). + {false, Stream0}. + +pop_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:pop(SavedRegs)), + Stream1; +pop_registers([], _StreamModule, Stream0) -> + Stream0. -spec set_args(state(), [arg()]) -> state(). -set_args( +% Handle 5 parameters: handle 5th on stack first, then first 4 in registers r0-r3 +set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5]) -> + % Handle 5th argument on stack first (with alignment) - this may free registers + State1 = set_args_push_stack(State, Arg5, undefined), + % Then set up first 4 arguments in registers using existing logic + set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4]); +% Handle 6 parameters: handle 5th and 6th on stack first, then first 4 in registers r0-r3 +set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5, Arg6]) -> + % Handle 5th and 6th arguments on stack first (no alignment needed) - this may free registers + State1 = set_args_push_stack(State, Arg5, Arg6), + % Then set up first 4 arguments in registers using existing logic + set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4]); +% Handle up to 4 parameters: all in registers r0-r3 +set_args(State, Args) when length(Args) =< 4 -> + set_args_registers_only(State, Args). + +%% @doc Handle 5th and optionally 6th arguments on stack. +%% For 5 args: push 5th arg at sp+0 with 4-byte padding at sp+4 for 8-byte alignment +%% For 6 args: push 5th arg at sp+0, 6th arg at sp+4 (2×4 bytes = 8-byte aligned, no padding) +set_args_push_stack( + #state{stream_module = StreamModule, stream = Stream0} = State0, Arg5, Arg6 +) -> + % Decrement stack pointer by 8 bytes once + I1 = jit_armv6m_asm:sub(sp, sp, 8), + Stream1 = StreamModule:append(Stream0, I1), + + % Handle Arg6 if present (goes at sp+4) + State1 = + case Arg6 of + undefined -> + % 5 arguments: no 6th arg to handle + State0#state{stream = Stream1}; + {free, Reg6} -> + % 6 arguments: Arg6 is already in register, store directly and free + I2 = jit_armv6m_asm:str(Reg6, {sp, 4}), + StreamB = StreamModule:append(Stream1, I2), + free_native_register(State0#state{stream = StreamB}, Reg6); + _ -> + % 6 arguments: store Arg6 at sp+4 + {StateA, Reg6} = move_to_native_register(State0#state{stream = Stream1}, Arg6), + StreamA = StateA#state.stream, + I2 = jit_armv6m_asm:str(Reg6, {sp, 4}), + StreamB = StreamModule:append(StreamA, I2), + free_native_register(StateA#state{stream = StreamB}, Reg6) + end, + + % Handle Arg5 (always present, always goes at sp+0) + State2 = + case Arg5 of + {free, Reg5} -> + % Arg5 is already in register, store directly and free + I3 = jit_armv6m_asm:str(Reg5, {sp, 0}), + Stream3 = StreamModule:append(State1#state.stream, I3), + free_native_register(State1#state{stream = Stream3}, Reg5); + _ -> + % Move Arg5 to register, store, and free + {StateTemp, Reg5} = move_to_native_register(State1, Arg5), + StreamTemp = StateTemp#state.stream, + I3 = jit_armv6m_asm:str(Reg5, {sp, 0}), + Stream3 = StreamModule:append(StreamTemp, I3), + free_native_register(StateTemp#state{stream = Stream3}, Reg5) + end, + State2. + +set_args_registers_only( #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, Args ) -> ParamRegs = parameter_regs(Args), ArgsRegs = args_regs(Args), AvailableScratchGP = - [rdi, rsi, rdx, rcx, r8, r9, r10, r11] -- ParamRegs -- ArgsRegs -- UsedRegs, + ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs, Offset = StreamModule:offset(Stream0), Args1 = [ case Arg of @@ -1085,8 +1293,8 @@ set_args( end || Arg <- Args ], - SetArgsCode = set_args0(Args1, ArgsRegs, ParamRegs, AvailableScratchGP, []), - Stream1 = StreamModule:append(Stream0, SetArgsCode), + State1 = set_args0(State0, Args1, ArgsRegs, ParamRegs, AvailableScratchGP), + Stream1 = State1#state.stream, NewUsedRegs = lists:foldl( fun ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); @@ -1108,7 +1316,8 @@ parameter_regs(Args) -> parameter_regs0([], _, Acc) -> lists:reverse(Acc); parameter_regs0([Special | T], [GPReg | GPRegsT], Acc) when - Special =:= ctx orelse Special =:= jit_state orelse Special =:= offset + Special =:= ctx orelse Special =:= jit_state orelse Special =:= jit_state_tail_call orelse + Special =:= offset -> parameter_regs0(T, GPRegsT, [GPReg | Acc]); parameter_regs0([{free, Free} | T], GPRegs, Acc) -> @@ -1124,7 +1333,10 @@ parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], Acc) -> parameter_regs0([{fp_reg, _} | T], [GPRegA, GPRegB | GPRegsT], Acc) -> parameter_regs0(T, GPRegsT, [GPRegB, GPRegA | Acc]); parameter_regs0([Int | T], [GPReg | GPRegsT], Acc) when is_integer(Int) -> - parameter_regs0(T, GPRegsT, [GPReg | Acc]). + parameter_regs0(T, GPRegsT, [GPReg | Acc]); +% Handle stack parameters when we run out of registers +parameter_regs0([_Arg | T], [], Acc) -> + parameter_regs0(T, [], [stack | Acc]). replace_reg(Args, Reg1, Reg2) -> replace_reg0(Args, Reg1, Reg2, []). @@ -1136,66 +1348,99 @@ replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> replace_reg0([Other | T], Reg, Replacement, Acc) -> replace_reg0(T, Reg, Replacement, [Other | Acc]). -set_args0([], [], [], _AvailGP, Acc) -> - list_to_binary(lists:reverse(Acc)); -set_args0([{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc) -> - set_args0([FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, Acc); -set_args0([ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); +set_args0(State, [], [], [], _AvailGP) -> + State; +set_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP) -> + set_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP); +set_args0(State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP) -> + set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP); set_args0( + #state{stream_module = StreamModule, stream = Stream0} = State, [jit_state | ArgsT], - [?JITSTATE_REG | ArgsRegs], - [?JITSTATE_REG | ParamRegs], - AvailGP, - Acc + [jit_state | ArgsRegs], + [ParamReg | ParamRegs], + AvailGP ) -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, Acc); + false = lists:member(ParamReg, ArgsRegs), + % jit_state is saved on stack, load from stack offset + I = jit_armv6m_asm:ldr(ParamReg, {sp, ?STACK_OFFSET_JITSTATE}), + Stream1 = StreamModule:append(Stream0, I), + set_args0(State#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP); set_args0( - [jit_state | ArgsT], [?JITSTATE_REG | ArgsRegs], [ParamReg | ParamRegs], AvailGP, Acc + State, + [jit_state_tail_call | ArgsT], + [jit_state | ArgsRegs], + [ParamReg | ParamRegs], + AvailGP ) -> false = lists:member(ParamReg, ArgsRegs), - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [ - jit_armv6m_asm:mov(ParamReg, ?JITSTATE_REG) | Acc - ]); + % For tail calls, jit_state will be restored by pop - skip generating load instruction + set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP); +% Handle stack parameters - load argument into temp register and push to stack +set_args0( + #state{stream_module = StreamModule} = State, + [Arg | ArgsT], + [stack | ArgsRegs], + [stack | ParamRegs], + [TempReg | _] = AvailGP +) -> + % Generate code to set up argument in temp register + State1 = set_args1(State, Arg, TempReg), + % Decrement stack pointer by 4 bytes and store argument + DecSP = jit_armv6m_asm:sub(sp, sp, 4), + StoreInstr = jit_armv6m_asm:str(TempReg, {sp, 0}), + Stream1 = StreamModule:append(State1#state.stream, <>), + set_args0(State1#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP); % ctx is special as we need it to access x_reg/y_reg/fp_reg -set_args0([Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, Acc) -> +set_args0(State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP) -> false = lists:member(?CTX_REG, ArgsRegs), - J = set_args1(Arg, ?CTX_REG), - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); + State1 = set_args1(State, Arg, ?CTX_REG), + set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP); set_args0( + #state{stream_module = StreamModule} = State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [ParamReg | ParamRegs], - [Avail | AvailGPT] = AvailGP, - Acc + [Avail | AvailGPT] = AvailGP ) -> - J = set_args1(Arg, ParamReg), + State1 = set_args1(State, Arg, ParamReg), case lists:member(ParamReg, ArgsRegs) of false -> - set_args0(ArgsT, ArgsRegs, ParamRegs, AvailGP, [J | Acc]); + set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP); true -> I = jit_armv6m_asm:mov(Avail, ParamReg), + Stream1 = StreamModule:append(State1#state.stream, I), NewArgsT = replace_reg(ArgsT, ParamReg, Avail), - set_args0(NewArgsT, ArgsRegs, ParamRegs, AvailGPT, [J, I | Acc]) + set_args0(State1#state{stream = Stream1}, NewArgsT, ArgsRegs, ParamRegs, AvailGPT) end. -set_args1(Reg, Reg) -> - []; -set_args1({x_reg, extra}, Reg) -> - jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)); -set_args1({x_reg, X}, Reg) -> - jit_armv6m_asm:ldr(Reg, ?X_REG(X)); -set_args1({ptr, Source}, Reg) -> - jit_armv6m_asm:ldr(Reg, {Source, 0}); -set_args1({y_reg, X}, Reg) -> - [ - jit_armv6m_asm:ldr(Reg, ?Y_REGS), - jit_armv6m_asm:ldr(Reg, {Reg, X * 8}) - ]; -set_args1(ArgReg, Reg) when ?IS_GPR(ArgReg) -> - jit_armv6m_asm:mov(Reg, ArgReg); -set_args1(Arg, Reg) when is_integer(Arg) -> - jit_armv6m_asm:mov(Reg, Arg). +set_args1(State, Reg, Reg) -> + State; +set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, Reg) -> + I = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg) -> + I = jit_armv6m_asm:ldr(Reg, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg) -> + I = jit_armv6m_asm:ldr(Reg, {Source, 0}), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, X}, Reg) -> + I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(Reg, {Reg, X * 8}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg) when + ?IS_GPR(ArgReg) +-> + I = jit_armv6m_asm:mov(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_args1(State, Arg, Reg) when is_integer(Arg) -> + mov_immediate(State, Reg, Arg). %%----------------------------------------------------------------------------- %% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) @@ -1531,18 +1776,15 @@ move_to_native_register( {State#state{stream = Stream1}, Reg}; move_to_native_register( #state{ - stream_module = StreamModule, - stream = Stream0, available_regs = [Reg | AvailT], used_regs = Used - } = State, + } = State0, Imm ) when is_integer(Imm) -> - I1 = jit_armv6m_asm:mov(Reg, Imm), - Stream1 = StreamModule:append(Stream0, I1), - {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; + State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + {move_to_native_register(State1, Imm, Reg), Reg}; move_to_native_register( #state{ stream_module = StreamModule, @@ -1593,10 +1835,12 @@ move_to_native_register( -spec move_to_native_register(state(), value(), armv6m_register()) -> state(). move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst -) when is_atom(RegSrc) orelse is_integer(RegSrc) -> +) when is_atom(RegSrc) -> I = jit_armv6m_asm:mov(RegDst, RegSrc), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + mov_immediate(State, RegDst, ValSrc); move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst ) when ?IS_GPR(Reg) -> @@ -1688,7 +1932,7 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = [Temp, TempJitState | _], branches = Branches } = State, Label @@ -1696,8 +1940,10 @@ set_continuation_to_label( Offset = StreamModule:offset(Stream0), I1 = jit_armv6m_asm:adr(Temp, 0), Reloc = {Label, Offset, {adr, Temp}}, - I2 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION), - Code = <>, + % Load jit_state pointer from stack, then store continuation + I2a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + I2b = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1, branches = [Reloc | Branches]}. @@ -1705,7 +1951,7 @@ set_continuation_to_offset( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _], + available_regs = [Temp, TempJitState | _], branches = Branches } = State ) -> @@ -1713,8 +1959,10 @@ set_continuation_to_offset( Offset = StreamModule:offset(Stream0), I1 = jit_armv6m_asm:adr(Temp, 0), Reloc = {OffsetRef, Offset, {adr, Temp}}, - I2 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION), - Code = <>, + % Load jit_state pointer from stack, then store continuation + I2a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + I2b = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. @@ -1728,13 +1976,15 @@ get_module_index( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Reg | AvailableT], + available_regs = [Reg, TempJitState | AvailableT], used_regs = UsedRegs0 } = State ) -> - I1 = jit_armv6m_asm:ldr(Reg, ?JITSTATE_MODULE), - I2 = jit_armv6m_asm:ldr_w(Reg, ?MODULE_INDEX(Reg)), - Code = <>, + % Load jit_state pointer from stack, then load module + I1a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + I1b = jit_armv6m_asm:ldr(Reg, ?JITSTATE_MODULE(TempJitState)), + I2 = jit_armv6m_asm:ldr(Reg, ?MODULE_INDEX(Reg)), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), { State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]}, @@ -1795,9 +2045,16 @@ add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) - mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= 0 andalso Val =< 255 -> - I = jit_armv6m_asm:mov(Reg, Val), + I = jit_armv6m_asm:movs(Reg, Val), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= -256 andalso Val < 0 +-> + I1 = jit_armv6m_asm:movs(Reg, bnot (Val)), + I2 = jit_armv6m_asm:negs(Reg, Reg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> %% Use a literal pool with a branch instruction (branch-over pattern) %% Calculate where literal will be placed (must be word-aligned) @@ -1902,36 +2159,82 @@ mul( State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. -spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +%% +%% Analysis of AArch64 pattern and ARM Thumb mapping: +%% +%% AArch64 layout (from call_ext_only_test): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: b.ne 0x20 ; Branch if reductions != 0 to continuation +%% 0x10-0x1c: adr/str/ldr/br sequence for scheduling next process +%% 0x20: [CONTINUATION POINT] - Actual function starts here +%% +%% ARM Thumb equivalent should be: +%% 0x0-0x6: Decrement reductions, store back +%% 0x8: bne continuation_after_prolog ; Branch OVER the prolog if reductions != 0 +%% 0xa-0x?: adr/str/ldr/blx sequence for scheduling +%% continuation: push {r1,r4-r7,lr} ; PROLOG (only executed when scheduled) +%% continuation_after_prolog: [actual function body] +%% +%% Key insight: When reductions != 0, we branch PAST the prolog directly to the function. +%% When reductions == 0, we schedule next process, and when we resume, we execute the prolog +%% then continue to the function body. +%% decrement_reductions_and_maybe_schedule_next( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 + #state{ + stream_module = StreamModule, stream = Stream0, available_regs = [Temp, TempJitState | _] + } = State0 ) -> + % Load jit_state pointer from stack + I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), % Load reduction count - I1 = jit_armv6m_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + I1 = jit_armv6m_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), % Decrement reduction count I2 = jit_armv6m_asm:subs(Temp, Temp, 1), % Store back the decremented value - I3 = jit_armv6m_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), - Stream1 = StreamModule:append(Stream0, <>), + I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), + Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), % Branch if reduction count is not zero I4 = jit_armv6m_asm:bcc(ne, 0), % Set continuation to the next instruction ADROffset = BNEOffset + byte_size(I4), - I5 = jit_armv6m_asm:adr(Temp, 0), - I6 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION), + I5 = jit_armv6m_asm:adr(Temp, 4), + I6 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), % Append the instructions to the stream Stream2 = StreamModule:append(Stream1, <>), State1 = State0#state{stream = Stream2}, State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), - % Rewrite the branch and adr instructions + % Add the prolog at the continuation point (where scheduled execution resumes) #state{stream = Stream3} = State2, - NewOffset = StreamModule:offset(Stream3), - NewI4 = jit_armv6m_asm:bcc(ne, NewOffset - BNEOffset), - NewI5 = jit_armv6m_asm:adr(Temp, NewOffset - ADROffset), - Stream4 = StreamModule:replace( - Stream3, BNEOffset, <> + CurrentOffset = StreamModule:offset(Stream3), + % Ensure continuation point is 4-byte aligned by adding NOP if necessary + {AlignedContinuationOffset, Stream3_5} = + case CurrentOffset rem 4 of + % Already 4-byte aligned + 0 -> + {CurrentOffset, Stream3}; + 2 -> + % Add NOP to achieve 4-byte alignment + NOPPadded = StreamModule:append(Stream3, jit_armv6m_asm:nop()), + {StreamModule:offset(NOPPadded), NOPPadded}; + _ -> + error({unexpected_alignment, CurrentOffset}) + end, + Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + Stream4 = StreamModule:append(Stream3_5, Prolog), + % Calculate offsets for rewriting + ContinuationAfterPrologOffset = StreamModule:offset(Stream4), + % Rewrite the branch to skip over the prolog (branch to continuation_after_prolog) + NewI4 = jit_armv6m_asm:bcc(ne, ContinuationAfterPrologOffset - BNEOffset), + % Rewrite the adr to point to the aligned continuation point (prolog location) + % The ADR instruction uses PC aligned down to 4-byte boundary + ADRAlignedOffset = ADROffset band (bnot 3), + ADRImmediate = AlignedContinuationOffset - ADRAlignedOffset, + NewI5 = jit_armv6m_asm:adr(Temp, ADRImmediate), + Stream5 = StreamModule:replace( + Stream4, BNEOffset, <> ), - merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + merge_used_regs(State2#state{stream = Stream5}, State1#state.used_regs). -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> @@ -1944,17 +2247,19 @@ call_only_or_schedule_next( stream_module = StreamModule, stream = Stream0, branches = Branches, - available_regs = [Temp | _] + available_regs = [Temp, TempJitState | _] } = State0, Label ) -> + % Load jit_state pointer from stack + I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), % Load reduction count - I1 = jit_armv6m_asm:ldr_w(Temp, ?JITSTATE_REDUCTIONCOUNT), + I1 = jit_armv6m_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), % Decrement reduction count I2 = jit_armv6m_asm:subs(Temp, Temp, 1), % Store back the decremented value - I3 = jit_armv6m_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), - Stream1 = StreamModule:append(Stream0, <>), + I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), + Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), % Branch to label if reduction count is not zero I4 = jit_armv6m_asm:bcc(ne, 0), @@ -2004,7 +2309,7 @@ rewrite_cp_offset( _RewriteSize ) -> NewOffset = StreamModule:offset(Stream0) - CodeOffset, - NewMoveInstr = jit_armv6m_asm:mov(?IP_REG, NewOffset bsl 2), + NewMoveInstr = jit_armv6m_asm:movs(?IP_REG, NewOffset bsl 2), ?ASSERT(byte_size(NewMoveInstr) =< _RewriteSize), Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), State0#state{stream = Stream1}. @@ -2069,7 +2374,9 @@ args_regs(Args) -> ({free, Imm}) when is_integer(Imm) -> imm; (offset) -> imm; (ctx) -> ?CTX_REG; - (jit_state) -> ?JITSTATE_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; (Reg) when is_atom(Reg) -> Reg; (Imm) when is_integer(Imm) -> imm; ({ptr, Reg}) -> Reg; diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index 21834c9c8c..bba65f4e52 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -21,6 +21,8 @@ -export([ adds/2, adds/3, + sub/2, + sub/3, subs/2, subs/3, muls/2, @@ -31,6 +33,8 @@ cmp/2, ands/2, bics/2, + negs/2, + rsbs/3, orrs/2, ldr/2, lsls/2, @@ -40,10 +44,9 @@ mov/2, movs/2, mvns/2, + nop/0, str/2, tst/2, - stp/4, - ldp/4, adr/2, push/1, pop/1 @@ -140,29 +143,36 @@ cond_to_num(al) -> 14; % Never cond_to_num(nv) -> 15. +-define(IS_LOW_REGISTER(Reg), + (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse + Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7) +). + %% Emit an ADDS instruction (Thumb encoding) %% ADDS Rd, #imm - adds immediate value to register and sets flags (2-operand form) -spec adds(arm_gpr_register(), integer()) -> binary(). -adds(Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> +adds(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> adds(Rd, Rd, Imm); -adds(Rd, Imm) when is_atom(Rd), is_integer(Imm) -> +adds(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm) -> error({unencodable_immediate, Imm}). %% ADDS Rd, Rn, #imm - adds immediate value to register and sets flags (3-operand form) -spec adds(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). -adds(Rd, Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> +adds(Rd, Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> %% Thumb ADDS (immediate, 8-bit) encoding: 00110dddiiiiiiii (Rd = Rn) RdNum = reg_to_num(Rd), <<(16#3000 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>; -adds(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 -> +adds(Rd, Rn, Imm) when + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 +-> %% Thumb ADDS (immediate, 3-bit) encoding: 0001110iiinnnddd RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), <<(16#1C00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>; -adds(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> +adds(Rd, Rn, Imm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm) -> error({unencodable_immediate, Imm}); -adds(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> +adds(Rd, Rn, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) -> %% Thumb ADDS (register) encoding: 0001100mmmnnnddd RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), @@ -207,16 +217,12 @@ bx(Reg) when is_atom(Reg) -> -spec ldr(arm_gpr_register(), {arm_gpr_register(), integer()}) -> binary(). %% LDR Rt, [Rn, #imm5*4] - 16-bit immediate offset (0-124, multiple of 4) ldr(Rt, {Rn, Imm}) when - is_atom(Rt), - is_atom(Rn), + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), is_integer(Imm), Imm >= 0, Imm =< 124, - (Imm rem 4) =:= 0, - Rt =/= sp, - Rt =/= pc, - Rn =/= sp, - Rn =/= pc + (Imm rem 4) =:= 0 -> RtNum = reg_to_num(Rt), RnNum = reg_to_num(Rn), @@ -225,7 +231,7 @@ ldr(Rt, {Rn, Imm}) when <<(16#6800 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>; %% LDR Rt, [SP, #imm8*4] - SP-relative load (0-1020, multiple of 4) ldr(Rt, {sp, Imm}) when - is_atom(Rt), + ?IS_LOW_REGISTER(Rt), is_integer(Imm), Imm >= 0, Imm =< 1020, @@ -237,7 +243,7 @@ ldr(Rt, {sp, Imm}) when <<(16#9800 bor (RtNum bsl 8) bor Imm8):16/little>>; %% LDR Rt, [PC, #imm8*4] - PC-relative load (0-1020, multiple of 4) ldr(Rt, {pc, Imm}) when - is_atom(Rt), + ?IS_LOW_REGISTER(Rt), is_integer(Imm), Imm >= 0, Imm =< 1020, @@ -249,15 +255,9 @@ ldr(Rt, {pc, Imm}) when <<(16#4800 bor (RtNum bsl 8) bor Imm8):16/little>>; %% LDR Rt, [Rn, Rm] - register offset ldr(Rt, {Rn, Rm}) when - is_atom(Rt), - is_atom(Rn), - is_atom(Rm), - Rt =/= sp, - Rt =/= pc, - Rn =/= sp, - Rn =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), + ?IS_LOW_REGISTER(Rm) -> RtNum = reg_to_num(Rt), RnNum = reg_to_num(Rn), @@ -269,7 +269,7 @@ ldr(Rt, {Rn, Rm}) when -spec movs(arm_gpr_register(), integer() | arm_gpr_register()) -> binary(). %% MOVS immediate - 8-bit immediates only (0-255) movs(Rd, Imm) when - is_atom(Rd), + ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 @@ -279,81 +279,46 @@ movs(Rd, Imm) when <<(16#2000 bor (RdNum bsl 8) bor Imm):16/little>>; %% MOVS register - low registers only (both must be r0-r7) movs(Rd, Rm) when - is_atom(Rd), is_atom(Rm) + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), - case RdNum =< 7 andalso RmNum =< 7 of - true -> - %% Thumb MOVS register: 0000000000mmmdddd - <<(16#0000 bor (RmNum bsl 3) bor RdNum):16/little>>; - false -> - error({movs_requires_low_registers, {Rd, Rm}}) - end. + <<(16#0000 bor (RmNum bsl 3) bor RdNum):16/little>>. %% MVNS bitwise NOT -spec mvns(arm_gpr_register(), arm_gpr_register()) -> binary(). mvns(Rd, Rm) when - is_atom(Rd), is_atom(Rm) + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), - case RdNum =< 7 andalso RmNum =< 7 of - true -> - %% Thumb MOVS register: 0000000000mmmdddd - <<(16#43D0 bor (RmNum bsl 3) bor RdNum):16/little>>; - false -> - error({mvns_requires_low_registers, {Rd, Rm}}) - end. + %% Thumb MOVS register: 0000000000mmmdddd + <<(16#43D0 bor (RmNum bsl 3) bor RdNum):16/little>>. %% ARMv6-M Thumb MOV instruction - handle both immediate and register moves --spec mov(arm_gpr_register(), arm_gpr_register() | integer()) -> binary(). -%% MOV immediate (using MOVS for low registers with immediate 0-255) -mov(Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> - RdNum = reg_to_num(Rd), - case RdNum =< 7 of - true -> - %% Use MOVS for low registers with immediate - movs(Rd, Imm); - false -> - %% For high registers, need to use a different approach - %% ARMv6-M doesn't support immediate moves to high registers directly - error({unsupported_immediate_to_high_register, Rd, Imm}) - end; -%% MOV register - handle both high and low register cases +-spec mov(arm_gpr_register(), arm_gpr_register() | arm_gpr_register()) -> binary(). mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), - case RdNum >= 8 orelse RmNum >= 8 of - true -> - %% Thumb MOV high register: 01000110DMmmmdddd - D = - if - RdNum >= 8 -> 1; - true -> 0 - end, - M = - if - RmNum >= 8 -> 1; - true -> 0 - end, - RdLow = RdNum band 7, - RmLow = RmNum band 7, - <<(16#4600 bor (D bsl 7) bor (M bsl 6) bor (RmLow bsl 3) bor RdLow):16/little>>; - false -> - %% For low registers, use ADDS Rd, Rm, #0 (ARMv6-M standard practice) - adds(Rd, Rm, 0) - end. + D = + if + RdNum >= 8 -> 1; + true -> 0 + end, + M = + if + RmNum >= 8 -> 1; + true -> 0 + end, + RdLow = RdNum band 7, + RmLow = RmNum band 7, + <<(16#4600 bor (D bsl 7) bor (M bsl 6) bor (RmLow bsl 3) bor RdLow):16/little>>. %% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4) str(Rt, {Rn, Imm}) when - is_atom(Rt), - is_atom(Rn), + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), is_integer(Imm), - Rt =/= sp, - Rt =/= pc, - Rn =/= sp, - Rn =/= pc, Imm >= 0, Imm =< 124, (Imm rem 4) =:= 0 @@ -365,10 +330,8 @@ str(Rt, {Rn, Imm}) when <<(16#6000 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>; %% SP-relative STR (0-1020, multiple of 4) str(Rt, {sp, Imm}) when - is_atom(Rt), + ?IS_LOW_REGISTER(Rt), is_integer(Imm), - Rt =/= sp, - Rt =/= pc, Imm >= 0, Imm =< 1020, (Imm rem 4) =:= 0 @@ -379,15 +342,9 @@ str(Rt, {sp, Imm}) when <<(16#9000 bor (RtNum bsl 8) bor Imm8):16/little>>; %% STR Rt, [Rn, Rm] - register offset str(Rt, {Rn, Rm}) when - is_atom(Rt), - is_atom(Rn), - is_atom(Rm), - Rt =/= sp, - Rt =/= pc, - Rn =/= sp, - Rn =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), + ?IS_LOW_REGISTER(Rm) -> RtNum = reg_to_num(Rt), RnNum = reg_to_num(Rn), @@ -395,71 +352,6 @@ str(Rt, {Rn, Rm}) when %% Thumb STR register: 0101000mmmnnntttt <<(16#5000 bor (RmNum bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>. -%% Emit a store pair (STP) instruction for 64-bit registers -%% stp(Rn, Rm, {Base}, Imm) -> binary() -%% stp(Rn, Rm, {Base, Imm}, '!') -> binary() (store-update) --spec stp( - arm_gpr_register(), - arm_gpr_register(), - {arm_gpr_register()} | {arm_gpr_register(), integer()}, - integer() | '!' -) -> binary(). -stp(Rn, Rm, {Base}, Imm) when - is_atom(Rn), - is_atom(Rm), - is_atom(Base), - is_integer(Imm), - Imm >= -512, - Imm =< 504, - (Imm rem 8) =:= 0 --> - RnNum = reg_to_num(Rn), - RmNum = reg_to_num(Rm), - BaseNum = reg_to_num(Base), - %% STP encoding: 1010100010|imm7|base|rm|rn - %% 0xa9bf0000 | ((Imm div 8) band 0x7f) << 15 | Base << 5 | Rm << 10 | Rn - << - (16#A8800000 bor ((Imm div 8) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor RnNum):32/little - >>; -stp(Rn, Rm, {Base, Imm}, '!') when - is_atom(Rn), - is_atom(Rm), - is_atom(Base), - is_integer(Imm), - Imm >= -512, - Imm =< 504, - (Imm rem 8) =:= 0 --> - RnNum = reg_to_num(Rn), - RmNum = reg_to_num(Rm), - BaseNum = reg_to_num(Base), - << - (16#A9800000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor - RnNum):32/little - >>. - -%% Emit a load pair (LDP) instruction for 64-bit registers -%% ldp(Rn, Rm, {Base}, Imm) -> binary() --spec ldp(arm_gpr_register(), arm_gpr_register(), {arm_gpr_register()}, integer()) -> - binary(). -ldp(Rn, Rm, {Base}, Imm) when - is_atom(Rn), - is_atom(Rm), - is_atom(Base), - is_integer(Imm), - Imm >= -512, - Imm =< 504, - (Imm rem 8) =:= 0 --> - RnNum = reg_to_num(Rn), - RmNum = reg_to_num(Rm), - BaseNum = reg_to_num(Base), - %% LDP encoding: 1010100011|imm7|base|rm|rn - << - (16#A8C00000 bor (((Imm div 8) band 16#7F) bsl 15) bor (BaseNum bsl 5) bor (RmNum bsl 10) bor - RnNum):32/little - >>. - %% Emit a conditional branch instruction -spec bcc(cc(), integer()) -> binary(). %% Special case: 'al' (always) condition uses unconditional branch for efficiency @@ -483,52 +375,32 @@ bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> -spec cmp(arm_gpr_register(), arm_gpr_register() | integer()) -> binary(). %% CMP register-register form (low registers only) cmp(Rn, Rm) when - is_atom(Rn), - is_atom(Rm), - Rn =/= sp, - Rn =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rn), + ?IS_LOW_REGISTER(Rm) -> RnNum = reg_to_num(Rn), RmNum = reg_to_num(Rm), - case RnNum =< 7 andalso RmNum =< 7 of - true -> - %% Thumb CMP register: 0100001010mmmnnn - <<(16#4280 bor (RmNum bsl 3) bor RnNum):16/little>>; - false -> - error({cmp_requires_low_registers, {Rn, Rm}}) - end; + %% Thumb CMP register: 0100001010mmmnnn + <<(16#4280 bor (RmNum bsl 3) bor RnNum):16/little>>; %% CMP register-immediate form (8-bit immediate 0-255) cmp(Rn, Imm) when - is_atom(Rn), + ?IS_LOW_REGISTER(Rn), is_integer(Imm), - Rn =/= sp, - Rn =/= pc, Imm >= 0, Imm =< 255 -> RnNum = reg_to_num(Rn), - case RnNum =< 7 of - true -> - %% Thumb CMP immediate: 00101nnniiiiiiiii - <<(16#2800 bor (RnNum bsl 8) bor Imm):16/little>>; - false -> - error({cmp_immediate_requires_low_register, Rn}) - end; -cmp(Rn, Imm) when is_atom(Rn), is_integer(Imm) -> + %% Thumb CMP immediate: 00101nnniiiiiiiii + <<(16#2800 bor (RnNum bsl 8) bor Imm):16/little>>; +cmp(Rn, Imm) when ?IS_LOW_REGISTER(Rn), is_integer(Imm) -> error({unencodable_immediate, Imm}). %% Emit an AND instruction (bitwise AND) %% ARMv6-M Thumb ANDS instruction (register only - no immediate support) -spec ands(arm_gpr_register(), arm_gpr_register()) -> binary(). ands(Rd, Rm) when - is_atom(Rd), - is_atom(Rm), - Rd =/= sp, - Rd =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), @@ -538,27 +410,34 @@ ands(Rd, Rm) when %% Emit an BICS instruction (bitwise AND with complement) -spec bics(arm_gpr_register(), arm_gpr_register()) -> binary(). bics(Rd, Rm) when - is_atom(Rd), - is_atom(Rm), - Rd =/= sp, - Rd =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), %% Thumb ANDS (2-operand): 0100000000mmmddd <<(16#4380 bor (RmNum bsl 3) bor RdNum):16/little>>. +%% Emit an NEGS instruction (bitwise NAND) +-spec negs(arm_gpr_register(), arm_gpr_register()) -> binary(). +negs(Rd, Rm) -> + rsbs(Rd, Rm, 0). + +-spec rsbs(arm_gpr_register(), arm_gpr_register(), 0) -> binary(). +rsbs(Rd, Rn, 0) when + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rn) +-> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% Thumb ANDS (2-operand): 0100000000mmmddd + <<(16#4240 bor (RnNum bsl 3) bor RdNum):16/little>>. + %% ARMv6-M Thumb ORRS instruction (register only - sets flags) -spec orrs(arm_gpr_register(), arm_gpr_register()) -> binary(). orrs(Rd, Rm) when - is_atom(Rd), - is_atom(Rm), - Rd =/= sp, - Rd =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), @@ -569,15 +448,11 @@ orrs(Rd, Rm) when -spec lsls(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). %% LSLS Rd, Rm, #imm5 - immediate shift (1-31) lsls(Rd, Rm, Imm) when - is_atom(Rd), - is_atom(Rm), + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm), is_integer(Imm), Imm >= 1, - Imm =< 31, - Rd =/= sp, - Rd =/= pc, - Rm =/= sp, - Rm =/= pc + Imm =< 31 -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), @@ -587,12 +462,8 @@ lsls(Rd, Rm, Imm) when -spec lsls(arm_gpr_register(), arm_gpr_register()) -> binary(). %% LSLS Rdn, Rm - register shift (Rdn = Rdn << Rm) lsls(Rdn, Rm) when - is_atom(Rdn), - is_atom(Rm), - Rdn =/= sp, - Rdn =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rdn), + ?IS_LOW_REGISTER(Rm) -> RdnNum = reg_to_num(Rdn), RmNum = reg_to_num(Rm), @@ -603,15 +474,11 @@ lsls(Rdn, Rm) when -spec lsrs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). %% LSRS Rd, Rm, #imm5 - immediate shift (1-32) lsrs(Rd, Rm, Imm) when - is_atom(Rd), - is_atom(Rm), + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm), is_integer(Imm), Imm >= 1, - Imm =< 32, - Rd =/= sp, - Rd =/= pc, - Rm =/= sp, - Rm =/= pc + Imm =< 32 -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), @@ -626,12 +493,8 @@ lsrs(Rd, Rm, Imm) when -spec lsrs(arm_gpr_register(), arm_gpr_register()) -> binary(). %% LSRS Rdn, Rm - register shift (Rdn = Rdn >> Rm) lsrs(Rdn, Rm) when - is_atom(Rdn), - is_atom(Rm), - Rdn =/= sp, - Rdn =/= pc, - Rm =/= sp, - Rm =/= pc + ?IS_LOW_REGISTER(Rdn), + ?IS_LOW_REGISTER(Rm) -> RdnNum = reg_to_num(Rdn), RmNum = reg_to_num(Rm), @@ -641,39 +504,35 @@ lsrs(Rdn, Rm) when %% ARMv6-M Thumb TST instruction (register only) -spec tst(arm_gpr_register(), arm_gpr_register()) -> binary(). %% TST Rn, Rm - test bits (performs Rn & Rm, updates flags, low registers only) -tst(Rn, Rm) when is_atom(Rn), is_atom(Rm) -> +tst(Rn, Rm) when ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) -> RnNum = reg_to_num(Rn), RmNum = reg_to_num(Rm), - case RnNum =< 7 andalso RmNum =< 7 of - true -> - %% Thumb TST register: 0100001000mmmnnn - <<(16#4200 bor (RmNum bsl 3) bor RnNum):16/little>>; - false -> - error({tst_requires_low_registers, {Rn, Rm}}) - end. + <<(16#4200 bor (RmNum bsl 3) bor RnNum):16/little>>. %% Emit a SUBS instruction (Thumb encoding) %% SUBS Rd, #imm - subtracts immediate value from register and sets flags (2-operand form) -spec subs(arm_gpr_register(), integer()) -> binary(). -subs(Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> +subs(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> subs(Rd, Rd, Imm); -subs(Rd, Imm) when is_atom(Rd), is_integer(Imm) -> +subs(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm) -> error({unencodable_immediate, Imm}). %% SUBS Rd, Rn, #imm - subtracts immediate value from register and sets flags (3-operand form) -spec subs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). -subs(Rd, Rd, Imm) when is_atom(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> +subs(Rd, Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> %% Thumb SUBS (immediate, 8-bit) encoding: 00111dddiiiiiiii (Rd = Rn) RdNum = reg_to_num(Rd), <<(16#3800 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>; -subs(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 -> +subs(Rd, Rn, Imm) when + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 +-> %% Thumb SUBS (immediate, 3-bit) encoding: 0001111iiinnnddd RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), <<(16#1E00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>; -subs(Rd, Rn, Imm) when is_atom(Rd), is_atom(Rn), is_integer(Imm) -> +subs(Rd, Rn, Imm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm) -> error({unencodable_immediate, Imm}); -subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> +subs(Rd, Rn, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) -> %% Thumb SUBS (register) encoding: 0001101mmmnnnddd RdNum = reg_to_num(Rd), RnNum = reg_to_num(Rn), @@ -682,13 +541,27 @@ subs(Rd, Rn, Rm) when is_atom(Rd), is_atom(Rn), is_atom(Rm) -> (16#1A00 bor ((RmNum band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little >>. +%% SUB SP, #imm - subtracts immediate value from stack pointer (2-operand form) +-spec sub(sp, integer()) -> binary(). +sub(sp, Imm) when is_integer(Imm), Imm >= 0, Imm =< 508, (Imm rem 4) =:= 0 -> + %% Thumb SUB SP, SP, #imm7*4 encoding: 10110000 1iiiiiii + Imm7 = Imm div 4, + <<(16#B080 bor (Imm7 band 127)):16/little>>; +sub(sp, Imm) when is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% SUB SP, SP, #imm - subtracts immediate value from stack pointer (3-operand form) +-spec sub(sp, sp, integer()) -> binary(). +sub(sp, sp, Imm) -> + sub(sp, Imm). + %% ARMv6-M Thumb address calculation (ADR) instruction %% ADR is implemented as ADD Rd, PC, #imm8*4 in Thumb %% In Thumb, PC = current_instruction_address + 4, so adr(Rd, N) means: %% Rd = (current_pc + 4) + immediate = current_pc + (N - 4) + 4 = current_pc + N -spec adr(arm_gpr_register(), integer()) -> binary(). adr(Rd, Offset) when - is_atom(Rd), + ?IS_LOW_REGISTER(Rd), is_integer(Offset), Offset >= 4, Offset =< 1024, @@ -705,7 +578,7 @@ adr(Rd, Offset) when %% Emit a MULS instruction (Thumb encoding) %% MULS Rd, Rm - multiply Rd by Rm, store result in Rd (sets flags) -spec muls(arm_gpr_register(), arm_gpr_register()) -> binary(). -muls(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> +muls(Rd, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm) -> %% Thumb MULS encoding: 0100001101mmmrrr (Rd is both source and destination) RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), @@ -729,6 +602,12 @@ pop(RegList) when is_list(RegList) -> %% Thumb POP encoding: 1011110Plllllll where P=PC bit, lllllll=low register mask <<(16#BC00 bor (PCBit bsl 8) bor LowRegMask):16/little>>. +%% ARMv6-M Thumb NOP instruction +%% NOP - no operation (encoded as mov r8, r8) +-spec nop() -> binary(). +nop() -> + <<16#46c0:16/little>>. + %% Generic helper function to process register lists for PUSH/POP process_reglist(RegList, SpecialReg) -> RegBits = lists:foldl( @@ -738,7 +617,7 @@ process_reglist(RegList, SpecialReg) -> 0, RegList ), - LowRegsBits = RegBits band 2#1111111, + LowRegsBits = RegBits band 2#11111111, SpecialRegBit = RegBits band (1 bsl reg_to_num(SpecialReg)), if RegBits =/= LowRegsBits + SpecialRegBit -> diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl index c8e27b6327..e30d303319 100644 --- a/tests/libs/jit/jit_armv6m_asm_tests.erl +++ b/tests/libs/jit/jit_armv6m_asm_tests.erl @@ -68,6 +68,25 @@ subs_test_() -> ) ]. +sub_test_() -> + [ + ?_assertEqual( + asm(<<16#B082:16/little>>, "sub sp, #8"), jit_armv6m_asm:sub(sp, 8) + ), + ?_assertEqual( + asm(<<16#B082:16/little>>, "sub sp, sp, #8"), jit_armv6m_asm:sub(sp, sp, 8) + ), + ?_assertEqual( + asm(<<16#B080:16/little>>, "sub sp, #0"), jit_armv6m_asm:sub(sp, 0) + ), + ?_assertEqual( + asm(<<16#B084:16/little>>, "sub sp, #16"), jit_armv6m_asm:sub(sp, 16) + ), + ?_assertEqual( + asm(<<16#B0FF:16/little>>, "sub sp, #508"), jit_armv6m_asm:sub(sp, 508) + ) + ]. + muls_test_() -> [ ?_assertEqual( @@ -206,6 +225,18 @@ mov_test_() -> ?_assertEqual( asm(<<16#46c8:16/little>>, "mov r8, r9"), jit_armv6m_asm:mov(r8, r9) + ), + ?_assertEqual( + asm(<<16#46c0:16/little>>, "mov r8, r8"), + jit_armv6m_asm:mov(r8, r8) + ), + ?_assertEqual( + asm(<<16#4619:16/little>>, "mov r1, r3"), + jit_armv6m_asm:mov(r1, r3) + ), + ?_assertEqual( + asm(<<16#46c0:16/little>>, "nop"), + jit_armv6m_asm:nop() ) ]. @@ -296,6 +327,42 @@ orrs_test_() -> ) ]. +bics_test_() -> + [ + ?_assertEqual( + asm(<<16#4391:16/little>>, "bics r1, r2"), + jit_armv6m_asm:bics(r1, r2) + ), + ?_assertEqual( + asm(<<16#43a3:16/little>>, "bics r3, r4"), + jit_armv6m_asm:bics(r3, r4) + ) + ]. + +negs_test_() -> + [ + ?_assertEqual( + asm(<<16#4251:16/little>>, "negs r1, r2"), + jit_armv6m_asm:negs(r1, r2) + ), + ?_assertEqual( + asm(<<16#4263:16/little>>, "negs r3, r4"), + jit_armv6m_asm:negs(r3, r4) + ) + ]. + +rsbs_test_() -> + [ + ?_assertEqual( + asm(<<16#4251:16/little>>, "rsbs r1, r2, 0"), + jit_armv6m_asm:rsbs(r1, r2, 0) + ), + ?_assertEqual( + asm(<<16#4263:16/little>>, "rsbs r3, r4, 0"), + jit_armv6m_asm:rsbs(r3, r4, 0) + ) + ]. + lsls_test_() -> [ %% ARMv6-M Thumb LSLS immediate shift (1-31) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index e20e41eda6..6382b0f0da 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -34,57 +34,108 @@ % disassembly obtained with: % arm-elf-objdump -b binary -D dump.bin -M arm -call_primitive_0_test_DISABLED() -> +call_primitive_0_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), ?assertEqual(r7, ResultReg), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9400050 ldr x16, [x2]\n" - " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " c: d63f0200 blr x16\n" - " 10: aa0003e7 mov x7, x0\n" - " 14: a8c10be1 ldp x1, x2, [sp], #16\n" - " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 0: 6817 ldr r7, [r2, #0]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9900 ldr r1, [sp, #0]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}" >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_primitive_1_test_DISABLED() -> +call_primitive_1_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), ?assertEqual(r7, ResultReg), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9400450 ldr x16, [x2, #8]\n" - " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " c: d63f0200 blr x16\n" - " 10: aa0003e7 mov x7, x0\n" - " 14: a8c10be1 ldp x1, x2, [sp], #16\n" - " 18: a8c103fe ldp x30, x0, [sp], #16\n" + " 0: 6857 ldr r7, [r2, #4]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9900 ldr r1, [sp, #0]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}" >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_primitive_2_args_test_DISABLED() -> +call_primitive_2_args_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), ?assertEqual(r7, ResultReg), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9400850 ldr x16, [x2, #16]\n" - " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " c: d2800541 mov x1, #0x2a // #42\n" - " 10: d2800562 mov x2, #0x2b // #43\n" - " 14: d2800583 mov x3, #0x2c // #44\n" - " 18: d63f0200 blr x16\n" - " 1c: aa0003e7 mov x7, x0\n" - " 20: a8c10be1 ldp x1, x2, [sp], #16\n" - " 24: a8c103fe ldp x30, x0, [sp], #16" + " 0: 6897 ldr r7, [r2, #8]\n" + " 2: b405 push {r0, r2}\n" + " 4: 212a movs r1, #42 ; 0x2a\n" + " 6: 222b movs r2, #43 ; 0x2b\n" + " 8: 232c movs r3, #44 ; 0x2c\n" + " a: 47b8 blx r7\n" + " c: 4607 mov r7, r0\n" + " e: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6957 ldr r7, [r2, #20]\n" + " 2: 2210 movs r2, #16\n" + " 4: 2320 movs r3, #32\n" + " 6: 2502 movs r5, #2\n" + " 8: 4639 mov r1, r7\n" + " a: 9f05 ldr r7, [sp, #20]\n" + " c: 46be mov lr, r7\n" + " e: 9f04 ldr r7, [sp, #16]\n" + " 10: 9504 str r5, [sp, #16]\n" + " 12: 9105 str r1, [sp, #20]\n" + " 14: 9e03 ldr r6, [sp, #12]\n" + " 16: bd32 pop {r1, r4, r5, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2603 movs r6, #3\n" + " 4: 43b7 bics r7, r6\n" + " 6: 69c6 ldr r6, [r0, #28]\n" + " 8: 25b8 movs r5, #184 ; 0xb8\n" + " a: 5955 ldr r5, [r2, r5]\n" + " c: b405 push {r0, r2}\n" + " e: b082 sub sp, #8\n" + " 10: 9601 str r6, [sp, #4]\n" + " 12: 2608 movs r6, #8\n" + " 14: 9600 str r6, [sp, #0]\n" + " 16: 9900 ldr r1, [sp, #0]\n" + " 18: 463a mov r2, r7\n" + " 1a: 2340 movs r3, #64 ; 0x40\n" + " 1c: 47a8 blx r5\n" + " 1e: 4605 mov r5, r0\n" + " 20: bc05 pop {r0, r2}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -146,59 +197,152 @@ call_primitive_extended_regs_test_DISABLED() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_ext_only_test_DISABLED() -> +call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), - State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, -1]), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: b9401027 ldr w7, [x1, #16]\n" - " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: b9001027 str w7, [x1, #16]\n" - " c: 540000a1 b.ne 0x20 // b.any\n" - " 10: 10000087 adr x7, 0x20\n" - " 14: f9000427 str x7, [x1, #8]\n" - " 18: f9400847 ldr x7, [x2, #16]\n" - " 1c: d61f00e0 br x7\n" - " 20: f9401047 ldr x7, [x2, #32]\n" - " 24: d2800042 mov x2, #0x2 // #2\n" - " 28: d2800043 mov x3, #0x2 // #2\n" - " 2c: 92800004 mov x4, #0xffffffffffffffff // #-1\n" - " 30: d61f00e0 br x7" + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d107 bne.n 0x1a\n" + " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" + " c: 6077 str r7, [r6, #4]\n" + " e: 6897 ldr r7, [r2, #8]\n" + " 10: 9e05 ldr r6, [sp, #20]\n" + " 12: 9705 str r7, [sp, #20]\n" + " 14: 46b6 mov lr, r6\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1a: 6917 ldr r7, [r2, #16]\n" + " 1c: 221c movs r2, #28\n" + " 1e: 2302 movs r3, #2\n" + " 20: 2502 movs r5, #2\n" + " 22: 4639 mov r1, r7\n" + " 24: 9f05 ldr r7, [sp, #20]\n" + " 26: 46be mov lr, r7\n" + " 28: 9f04 ldr r7, [sp, #16]\n" + " 2a: 9504 str r5, [sp, #16]\n" + " 2c: 9105 str r1, [sp, #20]\n" + " 2e: 9603 str r6, [sp, #12]\n" + " 30: bd72 pop {r1, r4, r5, r6, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_unaligned_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + %% First do a 2-byte instruction to create unaligned start + State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), + State2 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State1), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6019 str r1, [r3, #0]\n" + " 2: 9e00 ldr r6, [sp, #0]\n" + " 4: 68b7 ldr r7, [r6, #8]\n" + " 6: 3f01 subs r7, #1\n" + " 8: 60b7 str r7, [r6, #8]\n" + " a: d108 bne.n 0x1e\n" + " c: a703 add r7, pc, #12 ; (adr r7, 0x1c)\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6917 ldr r7, [r2, #16]\n" + " 20: 2220 movs r2, #32\n" + " 22: 2302 movs r3, #2\n" + " 24: 2502 movs r5, #2\n" + " 26: 4639 mov r1, r7\n" + " 28: 9f05 ldr r7, [sp, #20]\n" + " 2a: 46be mov lr, r7\n" + " 2c: 9f04 ldr r7, [sp, #16]\n" + " 2e: 9504 str r5, [sp, #16]\n" + " 30: 9105 str r1, [sp, #20]\n" + " 32: 9603 str r6, [sp, #12]\n" + " 34: bd72 pop {r1, r4, r5, r6, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 6cd6 ldr r6, [r2, #76] ; 0x4c\n" + " 4: 2204 movs r2, #4\n" + " 6: 4b01 ldr r3, [pc, #4] ; (0xc)\n" + " 8: e002 b.n 0x10\n" + " a: 0000 movs r0, r0\n" + " c: 080b lsrs r3, r1, #32\n" + " e: 0000 movs r0, r0\n" + " 10: 463d mov r5, r7\n" + " 12: 4631 mov r1, r6\n" + " 14: 9f05 ldr r7, [sp, #20]\n" + " 16: 46be mov lr, r7\n" + " 18: 9f04 ldr r7, [sp, #16]\n" + " 1a: 9504 str r5, [sp, #16]\n" + " 1c: 9105 str r1, [sp, #20]\n" + " 1e: 9e03 ldr r6, [sp, #12]\n" + " 20: bd32 pop {r1, r4, r5, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_ext_last_test_DISABLED() -> +call_ext_last_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), - State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, 2, 2, 10]), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: b9401027 ldr w7, [x1, #16]\n" - " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: b9001027 str w7, [x1, #16]\n" - " c: 540000a1 b.ne 0x20 // b.any\n" - " 10: 10000087 adr x7, 0x20\n" - " 14: f9000427 str x7, [x1, #8]\n" - " 18: f9400847 ldr x7, [x2, #16]\n" - " 1c: d61f00e0 br x7\n" - " 20: f9401047 ldr x7, [x2, #32]\n" - " 24: d2800042 mov x2, #0x2 // #2\n" - " 28: d2800043 mov x3, #0x2 // #2\n" - " 2c: d2800144 mov x4, #0xa // #10\n" - " 30: d61f00e0 br x7" + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d107 bne.n 0x1a\n" + " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" + " c: 6077 str r7, [r6, #4]\n" + " e: 6897 ldr r7, [r2, #8]\n" + " 10: 9e05 ldr r6, [sp, #20]\n" + " 12: 9705 str r7, [sp, #20]\n" + " 14: 46b6 mov lr, r6\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1a: 6917 ldr r7, [r2, #16]\n" + " 1c: 221c movs r2, #28\n" + " 1e: 2302 movs r3, #2\n" + " 20: 2502 movs r5, #2\n" + " 22: 4639 mov r1, r7\n" + " 24: 9f05 ldr r7, [sp, #20]\n" + " 26: 46be mov lr, r7\n" + " 28: 9f04 ldr r7, [sp, #16]\n" + " 2a: 9504 str r5, [sp, #16]\n" + " 2c: 9105 str r1, [sp, #20]\n" + " 2e: 9603 str r6, [sp, #12]\n" + " 30: bd72 pop {r1, r4, r5, r6, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_primitive_last_test_DISABLED() -> +call_primitive_last_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), Stream = ?BACKEND:stream(State1), Dump = << - " 0: f9400047 ldr x7, [x2]\n" - " 4: d2800542 mov x2, #0x2a // #42\n" - " 8: d61f00e0 br x7" + " 0: 6817 ldr r7, [r2, #0]\n" + " 2: 222a movs r2, #42 ; 0x2a\n" + " 4: 9e05 ldr r6, [sp, #20]\n" + " 6: 9705 str r7, [sp, #20]\n" + " 8: 46b6 mov lr, r6\n" + " a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -242,7 +386,7 @@ return_if_not_equal_to_ctx_test_DISABLED_() -> ), ?assertEqual(r7, ResultReg), {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), - ?assertEqual(r8, OtherReg), + ?assertEqual(r6, OtherReg), State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), Stream = ?BACKEND:stream(State3), Dump = @@ -751,7 +895,7 @@ if_block_test_() -> Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 1c3d adds r5, r7, #0\n" + " 4: 463d mov r5, r7\n" " 6: 243f movs r4, #63 ; 0x3f\n" " 8: 4025 ands r5, r4\n" " a: 2d08 cmp r5, #8\n" @@ -1071,11 +1215,11 @@ is_integer_test() -> State4 = ?BACKEND:update_branches(State3, Labels), Stream = ?BACKEND:stream(State4), Dump = << - "0: 6987 ldr r7, [r0, #24]\n" + " 0: 6987 ldr r7, [r0, #24]\n" " 2: 43fe mvns r6, r7\n" " 4: 0736 lsls r6, r6, #28\n" " 6: d00d beq.n 0x24\n" - " 8: 1c3e adds r6, r7, #0\n" + " 8: 463e mov r6, r7\n" " a: 2503 movs r5, #3\n" " c: 402e ands r6, r5\n" " e: 2e02 cmp r6, #2\n" @@ -1132,7 +1276,7 @@ is_number_test() -> " 2: 43fe mvns r6, r7\n" " 4: 0736 lsls r6, r6, #28\n" " 6: d012 beq.n 0x2e\n" - " 8: 1c3e adds r6, r7, #0\n" + " 8: 463e mov r6, r7\n" " a: 2503 movs r5, #3\n" " c: 402e ands r6, r5\n" " e: 2e02 cmp r6, #2\n" @@ -1141,7 +1285,7 @@ is_number_test() -> " 14: 2603 movs r6, #3\n" " 16: 43b7 bics r7, r6\n" " 18: 683f ldr r7, [r7, #0]\n" - " 1a: 1c3e adds r6, r7, #0\n" + " 1a: 463e mov r6, r7\n" " 1c: 253f movs r5, #63 ; 0x3f\n" " 1e: 402e ands r6, r5\n" " 20: 2e08 cmp r6, #8\n" @@ -1512,10 +1656,9 @@ move_to_vm_register_test_() -> %% Test: Negative immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: ffff ffff ; instruction: 0xffffffff\n" - " 8: 6187 str r7, [r0, #24]" + " 0: 2700 movs r7, #0\n" + " 2: 427f negs r7, r7\n" + " 4: 6187 str r7, [r0, #24]" >>) end) ] @@ -1653,7 +1796,7 @@ move_to_array_element_test_() -> Stream = ?BACKEND:stream(State1), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 1c26 adds r6, r4, #0\n" + " 2: 4626 mov r6, r4\n" " 4: 00b6 lsls r6, r6, #2\n" " 6: 519f str r7, [r3, r6]" >>, @@ -1665,7 +1808,7 @@ move_to_array_element_test_() -> Stream = ?BACKEND:stream(State1), Dump = << " 0: 683f ldr r7, [r7, #0]\n" - " 2: 1c26 adds r6, r4, #0\n" + " 2: 4626 mov r6, r4\n" " 4: 00b6 lsls r6, r6, #2\n" " 6: 519f str r7, [r3, r6]" >>, @@ -1678,7 +1821,7 @@ move_to_array_element_test_() -> Dump = << " 0: 6947 ldr r7, [r0, #20]\n" " 2: 68bf ldr r7, [r7, #8]\n" - " 4: 1c26 adds r6, r4, #0\n" + " 4: 4626 mov r6, r4\n" " 6: 00b6 lsls r6, r6, #2\n" " 8: 519f str r7, [r3, r6]" >>, @@ -1744,6 +1887,17 @@ move_to_native_register_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream) end), + %% move_to_native_register/2: negative value + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 2729 movs r7, #41 ; 0x29\n" + " 2: 427f negs r7, r7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), %% move_to_native_register/2: {ptr, reg} ?_test(begin {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}), @@ -1800,7 +1954,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, r7, r5), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 1c3d adds r5, r7, #0" + " 0: 463d mov r5, r7" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), From dc2e79bcf2a20c4307ab0443959fd0c2d00e2076 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 26 Aug 2025 22:59:40 +0200 Subject: [PATCH 22/97] armv6m: fix and enable all disabled tests Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 152 +++++++--- tests/libs/jit/jit_armv6m_tests.erl | 417 ++++++++++++++++------------ 2 files changed, 344 insertions(+), 225 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 0ed7884ff5..1bc9244d18 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -201,7 +201,7 @@ %% - r12: intra-procedure call scratch %% - r13 (SP), r14 (LR), r15 (PC): special purpose %% Reorder to match AArch64 test expectations (r7 first) --define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1, r12]). +-define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1]). -define(PARAMETER_REGS, [r0, r1, r2, r3]). -define(SCRATCH_REGS, [r7, r6, r5, r4, r3, r2, r1, r0, r12]). @@ -359,9 +359,13 @@ jump_table0( LabelsCount ) -> Offset = StreamModule:offset(Stream0), + % Create 4-byte jump table entry: prolog (push) + unconditional branch + PushInstr = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), BranchInstr = jit_armv6m_asm:b(0), - Reloc = {N, Offset, b}, - Stream1 = StreamModule:append(Stream0, BranchInstr), + % Branch is after push + Reloc = {N, Offset + byte_size(PushInstr), b}, + JumpEntry = <>, + Stream1 = StreamModule:append(Stream0, JumpEntry), jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- @@ -638,8 +642,8 @@ return_if_not_equal_to_ctx( % Move to r0 (return register) _ -> jit_armv6m_asm:mov(r0, Reg) end, - I4 = jit_armv6m_asm:ret(), - I2 = jit_armv6m_asm:bcc(eq, 4 + byte_size(I3) + byte_size(I4)), + I4 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + I2 = jit_armv6m_asm:bcc(eq, 2 + byte_size(I3) + byte_size(I4)), Stream1 = StreamModule:append(Stream0, <>), {AvailableRegs1, UsedRegs1} = free_reg( AvailableRegs0, UsedRegs0, Reg @@ -1932,20 +1936,59 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp, TempJitState | _], - branches = Branches + available_regs = [Temp, TempJitState | _] } = State, Label ) -> Offset = StreamModule:offset(Stream0), - I1 = jit_armv6m_asm:adr(Temp, 0), - Reloc = {Label, Offset, {adr, Temp}}, - % Load jit_state pointer from stack, then store continuation - I2a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), - I2b = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), - Code = <>, + % Load jit_state pointer from stack + I1 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + + % We'll place the aligned block right after: I1 + adr + str + skip_branch + padding + + % adr instruction size + I2Size = 2, + % str instruction size + I3Size = 2, + % skip branch instruction size + I4Size = 2, + + % Position where aligned block would start (before padding) + AlignedBlockOffsetBase = Offset + byte_size(I1) + I2Size + I3Size + I4Size, + PaddingNeeded = (4 - (AlignedBlockOffsetBase rem 4)) rem 4, + AlignedBlockOffsetAligned = AlignedBlockOffsetBase + PaddingNeeded, + + % adr instruction will be at: Offset + byte_size(I1) + AdrInstructionOffset = Offset + byte_size(I1), + % For adr, PC is aligned down to 4-byte boundary (no +4 needed) + AdrPC = AdrInstructionOffset band (bnot 3), + % Calculate the correct adr offset to point to the branch instruction + AdrOffset = AlignedBlockOffsetAligned - AdrPC, + I2 = jit_armv6m_asm:adr(Temp, AdrOffset), + I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), + + % Skip over aligned block + + % aligned block size + padding + SkipOffset = 4 + PaddingNeeded, + I4 = jit_armv6m_asm:b(SkipOffset), + + % Padding if needed + Padding = + case PaddingNeeded of + 0 -> <<>>; + 2 -> <<0:16>> + end, + + % Aligned block: branch to jump table entry (we know the address directly) + JumpTableEntryOffset = Label * 4, + AlignedBlockBranchOffset = AlignedBlockOffsetAligned, + BranchOffset = JumpTableEntryOffset - AlignedBlockBranchOffset, + I5 = jit_armv6m_asm:b(BranchOffset), + + Code = <>, Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + State#state{stream = Stream1}. set_continuation_to_offset( #state{ @@ -2158,7 +2201,6 @@ mul( Stream2 = StreamModule:append(Stream1, I), State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. --spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). %% %% Analysis of AArch64 pattern and ARM Thumb mapping: %% @@ -2179,6 +2221,7 @@ mul( %% When reductions == 0, we schedule next process, and when we resume, we execute the prolog %% then continue to the function body. %% +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). decrement_reductions_and_maybe_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, available_regs = [Temp, TempJitState | _] @@ -2238,9 +2281,9 @@ decrement_reductions_and_maybe_schedule_next( -spec call_or_schedule_next(state(), non_neg_integer()) -> state(). call_or_schedule_next(State0, Label) -> - {State1, RewriteOffset, RewriteSize} = set_cp(State0), + {State1, RewriteOffset, TempReg} = set_cp(State0), State2 = call_only_or_schedule_next(State1, Label), - rewrite_cp_offset(State2, RewriteOffset, RewriteSize). + rewrite_cp_offset(State2, RewriteOffset, TempReg). call_only_or_schedule_next( #state{ @@ -2270,49 +2313,74 @@ call_only_or_schedule_next( call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). call_primitive_with_cp(State0, Primitive, Args) -> - {State1, RewriteOffset, RewriteSize} = set_cp(State0), + {State1, RewriteOffset, TempReg} = set_cp(State0), State2 = call_primitive_last(State1, Primitive, Args), - rewrite_cp_offset(State2, RewriteOffset, RewriteSize). + rewrite_cp_offset(State2, RewriteOffset, TempReg). --spec set_cp(state()) -> {state(), non_neg_integer(), 4 | 8}. +-spec set_cp(state()) -> {state(), non_neg_integer(), armv6m_register()}. set_cp(State0) -> % get module index (dynamically) - {#state{stream_module = StreamModule, stream = Stream0} = State1, Reg} = get_module_index( + { + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State1, + Reg + } = get_module_index( State0 ), + % Get a temporary register from available registers + [TempReg | _] = AvailRegs, + Offset = StreamModule:offset(Stream0), % build cp with module_index << 24 - I1 = jit_armv6m_asm:lsl(Reg, Reg, 24), - if - Offset >= 16250 -> - I2 = jit_armv6m_asm:nop(), - I3 = jit_armv6m_asm:nop(), - RewriteSize = 8; - true -> - I2 = jit_armv6m_asm:nop(), - I3 = <<>>, - RewriteSize = 4 - end, + I1 = jit_armv6m_asm:lsls(Reg, Reg, 24), + % Emit a single nop as placeholder for offset load instruction + I2 = jit_armv6m_asm:nop(), MOVOffset = Offset + byte_size(I1), - I4 = jit_armv6m_asm:orr(Reg, Reg, ?IP_REG), - I5 = jit_armv6m_asm:str(Reg, ?CP), - Code = <>, + % OR the module index with the offset (loaded in temp register) + I3 = jit_armv6m_asm:orrs(Reg, TempReg), + I4 = jit_armv6m_asm:str(Reg, ?CP), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State2 = State1#state{stream = Stream1}, State3 = free_native_register(State2, Reg), - {State3, MOVOffset, RewriteSize}. + {State3, MOVOffset, TempReg}. --spec rewrite_cp_offset(state(), non_neg_integer(), 4 | 8) -> state(). +-spec rewrite_cp_offset(state(), non_neg_integer(), armv6m_register()) -> state(). rewrite_cp_offset( #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, RewriteOffset, - _RewriteSize + TempReg ) -> NewOffset = StreamModule:offset(Stream0) - CodeOffset, - NewMoveInstr = jit_armv6m_asm:movs(?IP_REG, NewOffset bsl 2), - ?ASSERT(byte_size(NewMoveInstr) =< _RewriteSize), - Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), - State0#state{stream = Stream1}. + OffsetImm = NewOffset bsl 2, + + % Check if offset fits in movs immediate (0-255) + {NewMoveInstr, Stream1} = + if + OffsetImm =< 255 -> + {jit_armv6m_asm:movs(TempReg, OffsetImm), Stream0}; + true -> + % Need to emit literal pool with proper alignment + CurrentOffset = StreamModule:offset(Stream0), + % Ensure 4-byte alignment for literal pool + AlignedOffset = (CurrentOffset + 3) band (bnot 3), + PaddingSize = AlignedOffset - CurrentOffset, + Padding = binary:copy(<<0>>, PaddingSize), + + % Emit the 32-bit literal + Literal = <>, + StreamWithLiteral = StreamModule:append( + StreamModule:append(Stream0, Padding), Literal + ), + + % Compute PC-relative offset for ldr instruction + % PC is (RewriteOffset + 4) aligned to 4-byte boundary, literal is at AlignedOffset + PCValue = (RewriteOffset + 4 + 3) band (bnot 3), + PCRelOffset = AlignedOffset - PCValue, + LdrInstr = jit_armv6m_asm:ldr(TempReg, {pc, PCRelOffset}), + {LdrInstr, StreamWithLiteral} + end, + Stream2 = StreamModule:replace(Stream1, RewriteOffset, NewMoveInstr), + State0#state{stream = Stream2}. set_bs( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 6382b0f0da..a39cdc8df7 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -139,7 +139,7 @@ call_primitive_6_args_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_primitive_extended_regs_test_DISABLED() -> +call_primitive_extended_regs_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), @@ -151,50 +151,34 @@ call_primitive_extended_regs_test_DISABLED() -> State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), ?BACKEND:assert_all_native_free(State6), Stream = ?BACKEND:stream(State6), - Dump = - << - "\n" - " 0: f9404850 ldr x16, [x2, #144]\n" - " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " c: d2800261 mov x1, #0x13 // #19\n" - " 10: d63f0200 blr x16\n" - " 14: aa0003e7 mov x7, x0\n" - " 18: a8c10be1 ldp x1, x2, [sp], #16\n" - " 1c: a8c103fe ldp x30, x0, [sp], #16\n" - " 20: f9404850 ldr x16, [x2, #144]\n" - " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " 2c: f81f0fe7 str x7, [sp, #-16]!\n" - " 30: d2800281 mov x1, #0x14 // #20\n" - " 34: d63f0200 blr x16\n" - " 38: aa0003e8 mov x8, x0\n" - " 3c: f84107e7 ldr x7, [sp], #16\n" - " 40: a8c10be1 ldp x1, x2, [sp], #16\n" - " 44: a8c103fe ldp x30, x0, [sp], #16\n" - " 48: f9404850 ldr x16, [x2, #144]\n" - " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " 54: a9bf1fe8 stp x8, x7, [sp, #-16]!\n" - " 58: d2800261 mov x1, #0x13 // #19\n" - " 5c: d63f0200 blr x16\n" - " 60: aa0003e9 mov x9, x0\n" - " 64: a8c11fe8 ldp x8, x7, [sp], #16\n" - " 68: a8c10be1 ldp x1, x2, [sp], #16\n" - " 6c: a8c103fe ldp x30, x0, [sp], #16\n" - " 70: f9403450 ldr x16, [x2, #104]\n" - " 74: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 78: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " 7c: f81f0fe9 str x9, [sp, #-16]!\n" - " 80: f94000e1 ldr x1, [x7]\n" - " 84: f9400102 ldr x2, [x8]\n" - " 88: d63f0200 blr x16\n" - " 8c: aa0003e7 mov x7, x0\n" - " 90: f84107e9 ldr x9, [sp], #16\n" - " 94: a8c10be1 ldp x1, x2, [sp], #16\n" - " 98: a8c103fe ldp x30, x0, [sp], #16\n" - " 9c: f9000127 str x7, [x9]\n" - >>, + Dump = << + " 0: 6c97 ldr r7, [r2, #72] ; 0x48\n" + " 2: b405 push {r0, r2}\n" + " 4: 2113 movs r1, #19\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}\n" + " c: 6c96 ldr r6, [r2, #72] ; 0x48\n" + " e: b485 push {r0, r2, r7}\n" + " 10: 2114 movs r1, #20\n" + " 12: 47b0 blx r6\n" + " 14: 4606 mov r6, r0\n" + " 16: bc85 pop {r0, r2, r7}\n" + " 18: 6c95 ldr r5, [r2, #72] ; 0x48\n" + " 1a: b4c5 push {r0, r2, r6, r7}\n" + " 1c: 2113 movs r1, #19\n" + " 1e: 47a8 blx r5\n" + " 20: 4605 mov r5, r0\n" + " 22: bcc5 pop {r0, r2, r6, r7}\n" + " 24: 6b54 ldr r4, [r2, #52] ; 0x34\n" + " 26: b425 push {r0, r2, r5}\n" + " 28: 6839 ldr r1, [r7, #0]\n" + " 2a: 6832 ldr r2, [r6, #0]\n" + " 2c: 47a0 blx r4\n" + " 2e: 4604 mov r4, r0\n" + " 30: bc25 pop {r0, r2, r5}\n" + " 32: 602c str r4, [r5, #0]" + >>, ?assertEqual(dump_to_bin(Dump), Stream). call_ext_only_test() -> @@ -346,7 +330,7 @@ call_primitive_last_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -return_if_not_equal_to_ctx_test_DISABLED_() -> +return_if_not_equal_to_ctx_test_() -> {setup, fun() -> ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) @@ -364,17 +348,16 @@ return_if_not_equal_to_ctx_test_DISABLED_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: f9405450 ldr x16, [x2, #168]\n" - " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " c: d63f0200 blr x16\n" - " 10: aa0003e7 mov x7, x0\n" - " 14: a8c10be1 ldp x1, x2, [sp], #16\n" - " 18: a8c103fe ldp x30, x0, [sp], #16\n" - " 1c: eb0000ff cmp x7, x0\n" - " 20: 54000060 b.eq 0x2c // b.none\n" - " 24: aa0703e0 mov x0, x7\n" - " 28: d65f03c0 ret" + " 0: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 2: b405 push {r0, r2}\n" + " 4: 9900 ldr r1, [sp, #0]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}\n" + " c: 4287 cmp r7, r0\n" + " e: d001 beq.n 0x14\n" + " 10: 4638 mov r0, r7\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -391,18 +374,17 @@ return_if_not_equal_to_ctx_test_DISABLED_() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: f9405450 ldr x16, [x2, #168]\n" - " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " c: d63f0200 blr x16\n" - " 10: aa0003e7 mov x7, x0\n" - " 14: a8c10be1 ldp x1, x2, [sp], #16\n" - " 18: a8c103fe ldp x30, x0, [sp], #16\n" - " 1c: aa0703e8 mov x8, x7\n" - " 20: eb00011f cmp x8, x0\n" - " 24: 54000060 b.eq 0x30 // b.none\n" - " 28: aa0803e0 mov x0, x8\n" - " 2c: d65f03c0 ret" + " 0: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 2: b405 push {r0, r2}\n" + " 4: 9900 ldr r1, [sp, #0]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}\n" + " c: 463e mov r6, r7\n" + " e: 4286 cmp r6, r0\n" + " 10: d001 beq.n 0x16\n" + " 12: 4630 mov r0, r6\n" + " 14: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -1075,7 +1057,7 @@ shift_left_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_only_or_schedule_next_and_label_relocation_test_DISABLED() -> +call_only_or_schedule_next_and_label_relocation_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_table(State0, 2), Offset1 = ?BACKEND:offset(State1), @@ -1089,28 +1071,45 @@ call_only_or_schedule_next_and_label_relocation_test_DISABLED() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: 1400000d b 0x34\n" - " 4: 14000002 b 0xc\n" - " 8: 14000009 b 0x2c\n" - " c: b9401027 ldr w7, [x1, #16]\n" - " 10: f10004e7 subs x7, x7, #0x1\n" - " 14: b9001027 str w7, [x1, #16]\n" - " 18: 540000a1 b.ne 0x2c // b.any\n" - " 1c: 10000087 adr x7, 0x2c\n" - " 20: f9000427 str x7, [x1, #8]\n" - " 24: f9400847 ldr x7, [x2, #16]\n" - " 28: d61f00e0 br x7\n" - " 2c: f9400047 ldr x7, [x2]\n" - " 30: d61f00e0 br x7\n" - " 34: f9400447 ldr x7, [x2, #8]\n" - " 38: d61f00e0 br x7" + " 0: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 2: e018 b.n 0x36\n" + " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 6: e001 b.n 0xc\n" + " 8: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " a: e00f b.n 0x2c\n" + " c: 9e00 ldr r6, [sp, #0]\n" + " e: 68b7 ldr r7, [r6, #8]\n" + " 10: 3f01 subs r7, #1\n" + " 12: 60b7 str r7, [r6, #8]\n" + " 14: d10a bne.n 0x2c\n" + " 16: 9e00 ldr r6, [sp, #0]\n" + " 18: a701 add r7, pc, #4 ; (adr r7, 0x20)\n" + " 1a: 6077 str r7, [r6, #4]\n" + " 1c: e001 b.n 0x22\n" + " 1e: 0000 movs r0, r0\n" + " 20: e7f2 b.n 0x8\n" + " 22: 6897 ldr r7, [r2, #8]\n" + " 24: 9e05 ldr r6, [sp, #20]\n" + " 26: 9705 str r7, [sp, #20]\n" + " 28: 46b6 mov lr, r6\n" + " 2a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 2c: 6817 ldr r7, [r2, #0]\n" + " 2e: 9e05 ldr r6, [sp, #20]\n" + " 30: 9705 str r7, [sp, #20]\n" + " 32: 46b6 mov lr, r6\n" + " 34: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 36: 6857 ldr r7, [r2, #4]\n" + " 38: 9e05 ldr r6, [sp, #20]\n" + " 3a: 9705 str r7, [sp, #20]\n" + " 3c: 46b6 mov lr, r6\n" + " 3e: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_bif_with_large_literal_integer_test_DISABLED() -> +call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), - {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 9208452466117618637]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]), {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} ]), @@ -1123,43 +1122,40 @@ call_bif_with_large_literal_integer_test_DISABLED() -> Stream = ?BACKEND:stream(State6), Dump = << - " 0: f9402050 ldr x16, [x2, #64]\n" - " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " c: aa0103e0 mov x0, x1\n" - " 10: d2800041 mov x1, #0x2 // #2\n" - " 14: d63f0200 blr x16\n" - " 18: aa0003e7 mov x7, x0\n" - " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" - " 20: a8c103fe ldp x30, x0, [sp], #16\n" - " 24: f9403c50 ldr x16, [x2, #120]\n" - " 28: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 2c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " 30: f81f0fe7 str x7, [sp, #-16]!\n" - " 34: d29579a1 mov x1, #0xabcd // #43981\n" - " 38: f2b7c041 movk x1, #0xbe02, lsl #16\n" - " 3c: f2dfd741 movk x1, #0xfeba, lsl #32\n" - " 40: f2eff941 movk x1, #0x7fca, lsl #48\n" - " 44: d63f0200 blr x16\n" - " 48: aa0003e8 mov x8, x0\n" - " 4c: f84107e7 ldr x7, [sp], #16\n" - " 50: a8c10be1 ldp x1, x2, [sp], #16\n" - " 54: a8c103fe ldp x30, x0, [sp], #16\n" - " 58: a9bf03fe stp x30, x0, [sp, #-16]!\n" - " 5c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" - " 60: d2800001 mov x1, #0x0 // #0\n" - " 64: d2800022 mov x2, #0x1 // #1\n" - " 68: f9401803 ldr x3, [x0, #48]\n" - " 6c: aa0803e4 mov x4, x8\n" - " 70: d63f00e0 blr x7\n" - " 74: aa0003e7 mov x7, x0\n" - " 78: a8c10be1 ldp x1, x2, [sp], #16\n" - " 7c: a8c103fe ldp x30, x0, [sp], #16\n" - " 80: b5000087 cbnz x7, 0x90\n" - " 84: f9401847 ldr x7, [x2, #48]\n" - " 88: d2801102 mov x2, #0x88 // #136\n" - " 8c: d61f00e0 br x7\n" - " 90: f9001807 str x7, [x0, #48]" + " 0: 6a17 ldr r7, [r2, #32]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9800 ldr r0, [sp, #0]\n" + " 6: 2102 movs r1, #2\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: 6bd6 ldr r6, [r2, #60] ; 0x3c\n" + " 10: b485 push {r0, r2, r7}\n" + " 12: 4901 ldr r1, [pc, #4] ; (0x18)\n" + " 14: e002 b.n 0x1c\n" + " 16: 0000 movs r0, r0\n" + " 18: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" + " 1c: 47b0 blx r6\n" + " 1e: 4606 mov r6, r0\n" + " 20: bc85 pop {r0, r2, r7}\n" + " 22: b405 push {r0, r2}\n" + " 24: b082 sub sp, #8\n" + " 26: 9600 str r6, [sp, #0]\n" + " 28: 2100 movs r1, #0\n" + " 2a: 2201 movs r2, #1\n" + " 2c: 6983 ldr r3, [r0, #24]\n" + " 2e: 47b8 blx r7\n" + " 30: 4607 mov r7, r0\n" + " 32: bc05 pop {r0, r2}\n" + " 34: 2f00 cmp r7, #0\n" + " 36: d105 bne.n 0x44\n" + " 38: 6997 ldr r7, [r2, #24]\n" + " 3a: 223a movs r2, #58 ; 0x3a\n" + " 3c: 9e05 ldr r6, [sp, #20]\n" + " 3e: 9705 str r7, [sp, #20]\n" + " 40: 46b6 mov lr, r6\n" + " 42: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 44: 6187 str r7, [r0, #24]" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1323,36 +1319,53 @@ is_boolean_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_ext_test_DISABLED() -> +call_ext_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), ?BACKEND:assert_all_native_free(State2), Stream = ?BACKEND:stream(State2), Dump = << - " 0: b9401027 ldr w7, [x1, #16]\n" - " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: b9001027 str w7, [x1, #16]\n" - " c: 540000a1 b.ne 0x20 // b.any\n" - " 10: 10000087 adr x7, 0x20\n" - " 14: f9000427 str x7, [x1, #8]\n" - " 18: f9400847 ldr x7, [x2, #16]\n" - " 1c: d61f00e0 br x7\n" - " 20: f9400027 ldr x7, [x1]\n" - " 24: b94000e7 ldr w7, [x7]\n" - " 28: d3689ce7 lsl x7, x7, #24\n" - " 2c: d2802610 mov x16, #0x130 // #304\n" - " 30: aa1000e7 orr x7, x7, x16\n" - " 34: f9005c07 str x7, [x0, #184]\n" - " 38: f9401047 ldr x7, [x2, #32]\n" - " 3c: d2800042 mov x2, #0x2 // #2\n" - " 40: d28000a3 mov x3, #0x5 // #5\n" - " 44: 92800004 mov x4, #0xffffffffffffffff // #-1\n" - " 48: d61f00e0 br x7" + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d107 bne.n 0x1a\n" + " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" + " c: 6077 str r7, [r6, #4]\n" + " e: 6897 ldr r7, [r2, #8]\n" + " 10: 9e05 ldr r6, [sp, #20]\n" + " 12: 9705 str r7, [sp, #20]\n" + " 14: 46b6 mov lr, r6\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1a: 9e00 ldr r6, [sp, #0]\n" + " 1c: 6837 ldr r7, [r6, #0]\n" + " 1e: 683f ldr r7, [r7, #0]\n" + " 20: 063f lsls r7, r7, #24\n" + " 22: 4d07 ldr r5, [pc, #28] ; (0x40)\n" + " 24: 432f orrs r7, r5\n" + " 26: 65c7 str r7, [r0, #92] ; 0x5c\n" + " 28: 6917 ldr r7, [r2, #16]\n" + " 2a: 2202 movs r2, #2\n" + " 2c: 2305 movs r3, #5\n" + " 2e: 2500 movs r5, #0\n" + " 30: 426d negs r5, r5\n" + " 32: 4639 mov r1, r7\n" + " 34: 9f05 ldr r7, [sp, #20]\n" + " 36: 46be mov lr, r7\n" + " 38: 9f04 ldr r7, [sp, #16]\n" + " 3a: 9504 str r5, [sp, #16]\n" + " 3c: 9105 str r1, [sp, #20]\n" + " 3e: 9e03 ldr r6, [sp, #12]\n" + " 40: bd32 pop {r1, r4, r5, pc}\n" + " 42: 0000 movs r0, r0\n" + " 44: 0108 lsls r0, r1, #4\n" + " 46: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). -call_fun_test_DISABLED() -> +call_fun_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), FuncReg = {x_reg, 0}, @@ -1382,44 +1395,82 @@ call_fun_test_DISABLED() -> ?BACKEND:assert_all_native_free(State9), Stream = ?BACKEND:stream(State9), Dump = << - " 0: b9401027 ldr w7, [x1, #16]\n" - " 4: f10004e7 subs x7, x7, #0x1\n" - " 8: b9001027 str w7, [x1, #16]\n" - " c: 540000a1 b.ne 0x20 // b.any\n" - " 10: 10000087 adr x7, 0x20\n" - " 14: f9000427 str x7, [x1, #8]\n" - " 18: f9400847 ldr x7, [x2, #16]\n" - " 1c: d61f00e0 br x7\n" - " 20: f9401807 ldr x7, [x0, #48]\n" - " 24: aa0703e8 mov x8, x7\n" - " 28: 92400509 and x9, x8, #0x3\n" - " 2c: f100093f cmp x9, #0x2\n" - " 30: 540000c0 b.eq 0x48 // b.none\n" - " 34: f9404c47 ldr x7, [x2, #152]\n" - " 38: d2800702 mov x2, #0x38 // #56\n" - " 3c: d2804163 mov x3, #0x20b // #523\n" - " 40: aa0803e4 mov x4, x8\n" - " 44: d61f00e0 br x7\n" - " 48: 927ef508 and x8, x8, #0xfffffffffffffffc\n" - " 4c: f9400108 ldr x8, [x8]\n" - " 50: 92401509 and x9, x8, #0x3f\n" - " 54: f100513f cmp x9, #0x14\n" - " 58: 540000c0 b.eq 0x70 // b.none\n" - " 5c: f9404c47 ldr x7, [x2, #152]\n" - " 60: d2800c02 mov x2, #0x60 // #96\n" - " 64: d2804163 mov x3, #0x20b // #523\n" - " 68: aa0803e4 mov x4, x8\n" - " 6c: d61f00e0 br x7\n" - " 70: f9400028 ldr x8, [x1]\n" - " 74: b9400108 ldr w8, [x8]\n" - " 78: d3689d08 lsl x8, x8, #24\n" - " 7c: d2804c10 mov x16, #0x260 // #608\n" - " 80: aa100108 orr x8, x8, x16\n" - " 84: f9005c08 str x8, [x0, #184]\n" - " 88: f9408048 ldr x8, [x2, #256]\n" - " 8c: aa0703e2 mov x2, x7\n" - " 90: d2800003 mov x3, #0x0 // #0\n" - " 94: d61f0100 br x8" + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d107 bne.n 0x1a\n" + " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" + " c: 6077 str r7, [r6, #4]\n" + " e: 6897 ldr r7, [r2, #8]\n" + " 10: 9e05 ldr r6, [sp, #20]\n" + " 12: 9705 str r7, [sp, #20]\n" + " 14: 46b6 mov lr, r6\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1a: 6987 ldr r7, [r0, #24]\n" + " 1c: 463e mov r6, r7\n" + " 1e: 4635 mov r5, r6\n" + " 20: 2403 movs r4, #3\n" + " 22: 4025 ands r5, r4\n" + " 24: 2d02 cmp r5, #2\n" + " 26: d00e beq.n 0x46\n" + " 28: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 2a: 222a movs r2, #42 ; 0x2a\n" + " 2c: 4b00 ldr r3, [pc, #0] ; (0x30)\n" + " 2e: e001 b.n 0x34\n" + " 30: 020b lsls r3, r1, #8\n" + " 32: 0000 movs r0, r0\n" + " 34: 4635 mov r5, r6\n" + " 36: 4639 mov r1, r7\n" + " 38: 9f05 ldr r7, [sp, #20]\n" + " 3a: 46be mov lr, r7\n" + " 3c: 9f04 ldr r7, [sp, #16]\n" + " 3e: 9504 str r5, [sp, #16]\n" + " 40: 9105 str r1, [sp, #20]\n" + " 42: 9e03 ldr r6, [sp, #12]\n" + " 44: bd32 pop {r1, r4, r5, pc}\n" + " 46: 2503 movs r5, #3\n" + " 48: 43ae bics r6, r5\n" + " 4a: 6836 ldr r6, [r6, #0]\n" + " 4c: 4635 mov r5, r6\n" + " 4e: 243f movs r4, #63 ; 0x3f\n" + " 50: 4025 ands r5, r4\n" + " 52: 2d14 cmp r5, #20\n" + " 54: d00f beq.n 0x76\n" + " 56: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 58: 2258 movs r2, #88 ; 0x58\n" + " 5a: 4b01 ldr r3, [pc, #4] ; (0x60)\n" + " 5c: e002 b.n 0x64\n" + " 5e: 0000 movs r0, r0\n" + " 60: 020b lsls r3, r1, #8\n" + " 62: 0000 movs r0, r0\n" + " 64: 4635 mov r5, r6\n" + " 66: 4639 mov r1, r7\n" + " 68: 9f05 ldr r7, [sp, #20]\n" + " 6a: 46be mov lr, r7\n" + " 6c: 9f04 ldr r7, [sp, #16]\n" + " 6e: 9504 str r5, [sp, #16]\n" + " 70: 9105 str r1, [sp, #20]\n" + " 72: 9e03 ldr r6, [sp, #12]\n" + " 74: bd32 pop {r1, r4, r5, pc}\n" + " 76: 9d00 ldr r5, [sp, #0]\n" + " 78: 682e ldr r6, [r5, #0]\n" + " 7a: 6836 ldr r6, [r6, #0]\n" + " 7c: 0636 lsls r6, r6, #24\n" + " 7e: 4c04 ldr r4, [pc, #16] ; (0x90)\n" + " 80: 4326 orrs r6, r4\n" + " 82: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 84: 2680 movs r6, #128 ; 0x80\n" + " 86: 5996 ldr r6, [r2, r6]\n" + " 88: 463a mov r2, r7\n" + " 8a: 2300 movs r3, #0\n" + " 8c: 9f05 ldr r7, [sp, #20]\n" + " 8e: 9605 str r6, [sp, #20]\n" + " 90: 46be mov lr, r7\n" + " 92: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 94: 0250 lsls r0, r2, #9\n" + " 96: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). From c0a998d318b0f80d81a643eb41667f17797069a4 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 27 Aug 2025 00:15:33 +0200 Subject: [PATCH 23/97] armv6m: increase coverage & fix issues Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 74 +++++++---------------------- libs/jit/src/jit_precompile.erl | 1 + tests/libs/jit/jit_armv6m_tests.erl | 71 +++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 56 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 1bc9244d18..d1bf1dc0db 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -180,8 +180,6 @@ %% Link register -define(LR_REG, r14). -%% Intra-procedure call scratch register --define(IP_REG, r12). %% Stack offset for function prolog: push {r1,r4,r5,r6,r7,lr} %% r1 (JITSTATE_REG) is at SP+0 after push @@ -391,7 +389,6 @@ update_branches( NewInstr = case Type of {bcc, CC} -> jit_armv6m_asm:bcc(CC, Rel); - {adr, Reg} -> jit_armv6m_asm:adr(Reg, Rel); b -> jit_armv6m_asm:b(Rel) end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), @@ -410,11 +407,17 @@ load_primitive_ptr(Primitive, TargetReg) -> jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, 0}); N when N * 4 =< 124 -> jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, N * 4}); - N -> - % For large offsets, load offset into TargetReg then use register addressing + N when N * 4 < 256 -> + % Can encode N * 4 directly in movs instruction (8-bit immediate limit) I1 = jit_armv6m_asm:movs(TargetReg, N * 4), I2 = jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, TargetReg}), - <> + <>; + N -> + % For very large primitive numbers, load N and shift left by 2 (multiply by 4) + I1 = jit_armv6m_asm:movs(TargetReg, N), + I2 = jit_armv6m_asm:lsls(TargetReg, TargetReg, 2), + I3 = jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, TargetReg}), + <> end. %%----------------------------------------------------------------------------- @@ -672,21 +675,6 @@ jump_to_label( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1, branches = [Reloc | AccBranches]}. -%% @private --spec rewrite_branch_instruction( - jit_armv6m_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer() -) -> binary(). -rewrite_branch_instruction({cbnz, Reg}, Offset) -> - jit_armv6m_asm:cbnz(Reg, Offset); -rewrite_branch_instruction({cbnz_w, Reg}, Offset) -> - jit_armv6m_asm:cbnz_w(Reg, Offset); -rewrite_branch_instruction({tbz, Reg, Bit}, Offset) -> - jit_armv6m_asm:tbz(Reg, Bit, Offset); -rewrite_branch_instruction({tbnz, Reg, Bit}, Offset) -> - jit_armv6m_asm:tbnz(Reg, Bit, Offset); -rewrite_branch_instruction(CC, Offset) when is_atom(CC) -> - jit_armv6m_asm:bcc(CC, Offset). - %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally %% execute a block. @@ -736,7 +724,7 @@ if_block( OffsetAfter = StreamModule:offset(Stream2), %% Patch the conditional branch instruction to jump to the end of the block BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), - NewBranchInstr = rewrite_branch_instruction(CC, BranchOffset), + NewBranchInstr = jit_armv6m_asm:bcc(CC, BranchOffset), Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). @@ -770,7 +758,7 @@ if_else_block( OffsetAfter = StreamModule:offset(Stream3), %% Patch the conditional branch to jump to the else block ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset), - NewBranchInstr = rewrite_branch_instruction(CC, ElseBranchOffset), + NewBranchInstr = jit_armv6m_asm:bcc(CC, ElseBranchOffset), Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr), %% Build the else block StateElse = State2#state{ @@ -1129,7 +1117,7 @@ shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, %% @param Args arguments to pass to the function %% @return Updated backend state and return register %%----------------------------------------------------------------------------- --spec call_func_ptr(state(), {free, armv6m_register()} | {primitive, non_neg_integer()}, [arg()]) -> +-spec call_func_ptr(state(), {free, armv6m_register()}, [arg()]) -> {state(), armv6m_register()}. call_func_ptr( #state{ @@ -1138,17 +1126,16 @@ call_func_ptr( available_regs = AvailableRegs0, used_regs = UsedRegs0 } = State0, - FuncPtrTuple, + {free, FuncPtrReg}, Args ) -> FreeRegs = lists:flatmap( fun - ({free, ?IP_REG}) -> []; ({free, {ptr, Reg}}) -> [Reg]; ({free, Reg}) when is_atom(Reg) -> [Reg]; (_) -> [] end, - [FuncPtrTuple | Args] + [{free, FuncPtrReg} | Args] ), UsedRegs1 = UsedRegs0 -- FreeRegs, SavedRegs = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], @@ -1158,25 +1145,9 @@ call_func_ptr( State1 = set_args(State0#state{stream = Stream1}, Args), #state{stream = Stream2} = State1, - {FuncPtrReg, Stream3} = - case FuncPtrTuple of - {free, Reg} -> - {Reg, Stream2}; - {primitive, Primitive} -> - % We use r16 for the address. - PrepCall = - case Primitive of - 0 -> - jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, 0}); - N -> - jit_armv6m_asm:ldr(?IP_REG, {?NATIVE_INTERFACE_REG, N * 4}) - end, - {?IP_REG, StreamModule:append(Stream2, PrepCall)} - end, - % Call the function pointer (using BLX for call with return) Call = jit_armv6m_asm:blx(FuncPtrReg), - Stream4 = StreamModule:append(Stream3, Call), + Stream4 = StreamModule:append(Stream2, Call), % If r0 is in used regs, save it to another temporary register FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), @@ -1974,11 +1945,7 @@ set_continuation_to_label( I4 = jit_armv6m_asm:b(SkipOffset), % Padding if needed - Padding = - case PaddingNeeded of - 0 -> <<>>; - 2 -> <<0:16>> - end, + Padding = <<0:(PaddingNeeded * 8)>>, % Aligned block: branch to jump table entry (we know the address directly) JumpTableEntryOffset = Label * 4, @@ -2130,12 +2097,7 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re BranchOffset = TargetPosition - BranchPosition, I2 = jit_armv6m_asm:b(BranchOffset), %% Generate padding if needed (just zeros) - Padding = - case PaddingNeeded of - 0 -> <<>>; - % 2 bytes of padding - 2 -> <<0:16>> - end, + Padding = <<0:(PaddingNeeded * 8)>>, Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}. @@ -2364,7 +2326,7 @@ rewrite_cp_offset( % Ensure 4-byte alignment for literal pool AlignedOffset = (CurrentOffset + 3) band (bnot 3), PaddingSize = AlignedOffset - CurrentOffset, - Padding = binary:copy(<<0>>, PaddingSize), + Padding = <<0:(PaddingSize * 8)>>, % Emit the 32-bit literal Literal = <>, diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index 151e470c54..dfcb19dcc4 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -70,6 +70,7 @@ compile(Target, Dir, Path) -> case Target of "x86_64" -> ?JIT_ARCH_X86_64; "aarch64" -> ?JIT_ARCH_AARCH64; + "armv6m" -> ?JIT_ARCH_ARMV6M; _ -> error({unsupported_target, Target}) end, diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index a39cdc8df7..7fc1cf0c84 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -887,6 +887,25 @@ if_block_test_() -> ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 42b7 cmp r7, r6\n" + " 6: da00 bge.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), ?_test(begin State1 = ?BACKEND:if_block( State0, @@ -1800,6 +1819,15 @@ move_array_element_test_() -> " 6: 59df ldr r7, [r3, r7]\n" " 8: 67f7 str r7, [r6, #124] ; 0x7c" >>) + end), + %% move_array_element with integer index and x_reg destination + ?_test(begin + {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 68be ldr r6, [r7, #8]\n" + " 4: 62c6 str r6, [r0, #44] ; 0x2c" + >>) end) ] end}. @@ -2124,6 +2152,32 @@ mul_test_() -> ] end}. +%% Test set_args1 with y_reg pattern +set_args1_y_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1 + % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}]) + % but with {y_reg, 5} instead of {free, Src} + {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [ + {y_reg, 5} + ]), + + Stream = ?BACKEND:stream(State1), + % Expected disassembly for loading from y_reg and calling primitive + Dump = << + " 0: 2743 movs r7, #67 ; 0x43\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59d7 ldr r7, [r2, r7]\n" + " 6: b405 push {r0, r2}\n" + " 8: 6940 ldr r0, [r0, #20]\n" + " a: 6a80 ldr r0, [r0, #40] ; 0x28\n" + " c: 47b8 blx r7\n" + " e: 4607 mov r7, r0\n" + " 10: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). @@ -2179,3 +2233,20 @@ dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> dump_to_bin0(Tail, instr, Acc); dump_to_bin0(<<>>, _, Acc) -> list_to_binary(lists:reverse(Acc)). + +%% Test set_continuation_to_offset function +set_continuation_to_offset_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test set_continuation_to_offset - should generate ADR, LDR, and STR instructions + {State1, _OffsetRef} = ?BACKEND:set_continuation_to_offset(State0), + + Stream = ?BACKEND:stream(State1), + + % Expected: adr temp to 0, ldr jitstate from stack, str temp to continuation + Dump = << + " 0: a700 add r7, pc, #0 ; (adr r7, 0 )\n" + " 2: 9901 ldr r1, [sp, #4]\n" + " 4: 6187 str r7, [r0, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). From 0f69724b22249887ec467ab6a54c84a7fdb65ab6 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 27 Aug 2025 00:39:00 +0200 Subject: [PATCH 24/97] armv6m: add wait_timeout_test, fix continuation to offset Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 22 ++++++- tests/libs/jit/jit_armv6m_tests.erl | 89 +++++++++++++++++++++++------ 2 files changed, 91 insertions(+), 20 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index d1bf1dc0db..6c94b64938 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -389,6 +389,8 @@ update_branches( NewInstr = case Type of {bcc, CC} -> jit_armv6m_asm:bcc(CC, Rel); + {adr, Reg} when Rel rem 4 =:= 0 -> jit_armv6m_asm:adr(Reg, Rel); + {adr, Reg} when Rel rem 4 =:= 2 -> jit_armv6m_asm:adr(Reg, Rel + 2); b -> jit_armv6m_asm:b(Rel) end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), @@ -1967,7 +1969,7 @@ set_continuation_to_offset( ) -> OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), - I1 = jit_armv6m_asm:adr(Temp, 0), + I1 = jit_armv6m_asm:adr(Temp, 4), Reloc = {OffsetRef, Offset, {adr, Temp}}, % Load jit_state pointer from stack, then store continuation I2a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), @@ -1979,8 +1981,22 @@ set_continuation_to_offset( %% @doc Implement a continuation entry point. %% TODO: push r4-r7 and lr -spec continuation_entry_point(#state{}) -> #state{}. -continuation_entry_point(State) -> - State. +continuation_entry_point( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State +) -> + % Align if required. + Offset = StreamModule:offset(Stream0), + Stream1 = + case Offset rem 4 of + 0 -> Stream0; + 2 -> StreamModule:append(Stream0, <<0:16>>) + end, + Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + Stream2 = StreamModule:append(Stream1, Prolog), + State#state{stream = Stream2}. get_module_index( #state{ diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 7fc1cf0c84..3bdca56bb1 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1338,6 +1338,78 @@ is_boolean_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + Offset0 = ?BACKEND:offset(State3), + State4 = ?BACKEND:continuation_entry_point(State3), + {State5, ResultReg0} = ?BACKEND:call_primitive(State4, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State6 = ?BACKEND:return_if_not_equal_to_ctx(State5, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State7, ResultReg1} = ?BACKEND:call_primitive(State6, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State8 = ?BACKEND:if_block(State7, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State9 = ?BACKEND:update_branches(State8, [{OffsetRef0, Offset0}]), + + Stream = ?BACKEND:stream(State9), + Dump = << + " 0: a707 add r7, pc, #28 ; (adr r7, 0x20)\n" + " 2: 9e00 ldr r6, [sp, #0]\n" + " 4: 6077 str r7, [r6, #4]\n" + " 6: 4f01 ldr r7, [pc, #4] ; (0xc)\n" + " 8: e002 b.n 0x10\n" + " a: 0000 movs r0, r0\n" + " c: 1388 asrs r0, r1, #14\n" + " e: 0000 movs r0, r0\n" + " 10: 6f96 ldr r6, [r2, #120] ; 0x78\n" + " 12: 463a mov r2, r7\n" + " 14: 232a movs r3, #42 ; 0x2a\n" + " 16: 9f05 ldr r7, [sp, #20]\n" + " 18: 9605 str r6, [sp, #20]\n" + " 1a: 46be mov lr, r7\n" + " 1c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1e: 0000 movs r0, r0\n" + " 20: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 22: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 24: b405 push {r0, r2}\n" + " 26: 9900 ldr r1, [sp, #0]\n" + " 28: 47b8 blx r7\n" + " 2a: 4607 mov r7, r0\n" + " 2c: bc05 pop {r0, r2}\n" + " 2e: 4287 cmp r7, r0\n" + " 30: d001 beq.n 0x36\n" + " 32: 4638 mov r0, r7\n" + " 34: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 36: 2784 movs r7, #132 ; 0x84\n" + " 38: 59d7 ldr r7, [r2, r7]\n" + " 3a: b405 push {r0, r2}\n" + " 3c: 2102 movs r1, #2\n" + " 3e: 47b8 blx r7\n" + " 40: 4607 mov r7, r0\n" + " 42: bc05 pop {r0, r2}\n" + " 44: 2f00 cmp r7, #0\n" + " 46: d105 bne.n 0x54\n" + " 48: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" + " 4a: 222a movs r2, #42 ; 0x2a\n" + " 4c: 9e05 ldr r6, [sp, #20]\n" + " 4e: 9705 str r7, [sp, #20]\n" + " 50: 46b6 mov lr, r6\n" + " 52: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -2233,20 +2305,3 @@ dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> dump_to_bin0(Tail, instr, Acc); dump_to_bin0(<<>>, _, Acc) -> list_to_binary(lists:reverse(Acc)). - -%% Test set_continuation_to_offset function -set_continuation_to_offset_test() -> - State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - - % Test set_continuation_to_offset - should generate ADR, LDR, and STR instructions - {State1, _OffsetRef} = ?BACKEND:set_continuation_to_offset(State0), - - Stream = ?BACKEND:stream(State1), - - % Expected: adr temp to 0, ldr jitstate from stack, str temp to continuation - Dump = << - " 0: a700 add r7, pc, #0 ; (adr r7, 0 )\n" - " 2: 9901 ldr r1, [sp, #4]\n" - " 4: 6187 str r7, [r0, #24]" - >>, - ?assertEqual(dump_to_bin(Dump), Stream). From 263be8527bc6e9c37b34b1b9c38849575bedf616 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 27 Aug 2025 07:54:14 +0200 Subject: [PATCH 25/97] armv6m: fix OP_GC_BIF2 and return_labels_and_lines Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 43 ++++++++--- tests/libs/jit/jit_armv6m_tests.erl | 110 ++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 9 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 6c94b64938..ef294afdfd 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1223,14 +1223,22 @@ set_args_push_stack( undefined -> % 5 arguments: no 6th arg to handle State0#state{stream = Stream1}; - {free, Reg6} -> - % 6 arguments: Arg6 is already in register, store directly and free + {free, Reg6} when is_atom(Reg6) -> + % 6 arguments: Arg6 is already in native register, store directly and free I2 = jit_armv6m_asm:str(Reg6, {sp, 4}), StreamB = StreamModule:append(Stream1, I2), free_native_register(State0#state{stream = StreamB}, Reg6); _ -> % 6 arguments: store Arg6 at sp+4 - {StateA, Reg6} = move_to_native_register(State0#state{stream = Stream1}, Arg6), + % Handle {free, NonNativeReg} by unwrapping + ActualArg6 = + case Arg6 of + {free, InnerArg6} -> InnerArg6; + Other6 -> Other6 + end, + {StateA, Reg6} = move_to_native_register( + State0#state{stream = Stream1}, ActualArg6 + ), StreamA = StateA#state.stream, I2 = jit_armv6m_asm:str(Reg6, {sp, 4}), StreamB = StreamModule:append(StreamA, I2), @@ -1240,14 +1248,20 @@ set_args_push_stack( % Handle Arg5 (always present, always goes at sp+0) State2 = case Arg5 of - {free, Reg5} -> - % Arg5 is already in register, store directly and free + {free, Reg5} when is_atom(Reg5) -> + % Arg5 is already in native register, store directly and free I3 = jit_armv6m_asm:str(Reg5, {sp, 0}), Stream3 = StreamModule:append(State1#state.stream, I3), free_native_register(State1#state{stream = Stream3}, Reg5); _ -> % Move Arg5 to register, store, and free - {StateTemp, Reg5} = move_to_native_register(State1, Arg5), + % Handle {free, NonNativeReg} by unwrapping + ActualArg5 = + case Arg5 of + {free, InnerArg5} -> InnerArg5; + Other5 -> Other5 + end, + {StateTemp, Reg5} = move_to_native_register(State1, ActualArg5), StreamTemp = StateTemp#state.stream, I3 = jit_armv6m_asm:str(Reg5, {sp, 0}), Stream3 = StreamModule:append(StreamTemp, I3), @@ -2388,13 +2402,24 @@ return_labels_and_lines( SortedLabels, SortedLines ) -> - I1 = jit_armv6m_asm:adr(r0, 8), - I2 = jit_armv6m_asm:ret(), + % Check if current offset is 4-byte aligned + CurrentOffset = StreamModule:offset(Stream0), + + {I1, Padding} = + case CurrentOffset rem 4 of + 0 -> + % Aligned - use offset 4 + {jit_armv6m_asm:adr(r0, 4), <<>>}; + _ -> + % Unaligned - use offset 8 with 2-byte padding + {jit_armv6m_asm:adr(r0, 8), <<0:16>>} + end, + I2 = jit_armv6m_asm:bx(lr), LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, LinesTable = <<<> || {Line, Offset} <- SortedLines>>, Stream1 = StreamModule:append( Stream0, - <> ), State#state{stream = Stream1}. diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 3bdca56bb1..64cf1e707e 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1410,6 +1410,116 @@ wait_timeout_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test return_labels_and_lines/3 function +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + + % {Label, Offset} pairs + SortedLabels = [{1, 16}, {2, 32}], + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State1 = ?BACKEND:return_labels_and_lines(State0, SortedLabels, SortedLines), + Stream = ?BACKEND:stream(State1), + + % Should have generated adr + bx lr + labels table + lines table + % adr = 4 bytes, bx = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Total minimum: 30 bytes + ?assert(byte_size(Stream) >= 30), + + % Expected: adr r0, + bx lr + labels table + lines table + % The data tables start at offset 4, so adr should be adr r0, 4 not adr r0, 8 + Dump = << + " 0: a000 add r0, pc, #0 ; (adr r0, 0x4)\n" + " 2: 4770 bx lr\n" + " 4: 0200 lsls r0, r0, #8\n" + " 6: 0100 lsls r0, r0, #4\n" + " 8: 0000 movs r0, r0\n" + " a: 1000 asrs r0, r0, #32\n" + " c: 0200 lsls r0, r0, #8\n" + " e: 0000 movs r0, r0\n" + " 10: 2000 movs r0, #0\n" + " 12: 0200 lsls r0, r0, #8\n" + " 14: 0a00 lsrs r0, r0, #8\n" + " 16: 0000 movs r0, r0\n" + " 18: 1000 asrs r0, r0, #32\n" + " 1a: 1400 asrs r0, r0, #16\n" + " 1c: 0000 movs r0, r0\n" + " 1e: 2000 movs r0, #0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/3 with unaligned offset - should fail +return_labels_and_lines_unaligned_test() -> + % Create a new state with a 2-byte instruction already in the stream + % to simulate starting at an odd offset (offset 2 instead of 0) + PaddingInstruction = jit_armv6m_asm:bx(lr), + TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + + % Test return_labels_and_lines with some sample labels and lines + SortedLabels = [{1, 16}, {2, 32}], + SortedLines = [{10, 16}, {20, 32}], + + State1 = ?BACKEND:return_labels_and_lines(State0, SortedLabels, SortedLines), + Stream = ?BACKEND:stream(State1), + + Dump = << + " 0: 4770 bx lr\n" + "2: a001 add r0, pc, #4 ; (adr r0, 0x8)\n" + "4: 4770 bx lr\n" + "6: 0000 movs r0, r0\n" + "8: 0200 lsls r0, r0, #8\n" + "a: 0100 lsls r0, r0, #4\n" + "c: 0000 movs r0, r0\n" + "e: 1000 asrs r0, r0, #32\n" + "10: 0200 lsls r0, r0, #8\n" + "12: 0000 movs r0, r0\n" + "14: 2000 movs r0, #0\n" + "16: 0200 lsls r0, r0, #8\n" + "18: 0a00 lsrs r0, r0, #8\n" + "1a: 0000 movs r0, r0\n" + "1c: 1000 asrs r0, r0, #32\n" + "1e: 1400 asrs r0, r0, #16\n" + "20: 0000 movs r0, r0\n" + "22: 2000 movs r0, #0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} that causes the jit_precompile bug +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6a17 ldr r7, [r2, #32]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9800 ldr r0, [sp, #0]\n" + " 6: 212a movs r1, #42 ; 0x2a\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: b405 push {r0, r2}\n" + " 10: b082 sub sp, #8\n" + " 12: 6986 ldr r6, [r0, #24]\n" + " 14: 9600 str r6, [sp, #0]\n" + " 16: 2100 movs r1, #0\n" + " 18: 2203 movs r2, #3\n" + " 1a: 6943 ldr r3, [r0, #20]\n" + " 1c: 681b ldr r3, [r3, #0]\n" + " 1e: 47b8 blx r7\n" + " 20: 4607 mov r7, r0\n" + " 22: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), From f2249de0fb1930f8808053f0ab8f327062117fdb Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 27 Aug 2025 23:21:11 +0200 Subject: [PATCH 26/97] armv6m: fix mov_immediate with negative value and branch from set_continuation_to_label Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 101 ++++++------ tests/libs/jit/jit_armv6m_tests.erl | 247 +++++++++++++++++++++++++--- 2 files changed, 269 insertions(+), 79 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index ef294afdfd..0059ad20a6 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -388,7 +388,6 @@ update_branches( Rel = LabelOffset - Offset, NewInstr = case Type of - {bcc, CC} -> jit_armv6m_asm:bcc(CC, Rel); {adr, Reg} when Rel rem 4 =:= 0 -> jit_armv6m_asm:adr(Reg, Rel); {adr, Reg} when Rel rem 4 =:= 2 -> jit_armv6m_asm:adr(Reg, Rel + 2); b -> jit_armv6m_asm:b(Rel) @@ -1923,55 +1922,47 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp, TempJitState | _] + available_regs = [Temp1, Temp2 | _] } = State, Label ) -> Offset = StreamModule:offset(Stream0), - % Load jit_state pointer from stack - I1 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), - - % We'll place the aligned block right after: I1 + adr + str + skip_branch + padding - - % adr instruction size - I2Size = 2, - % str instruction size - I3Size = 2, - % skip branch instruction size - I4Size = 2, - - % Position where aligned block would start (before padding) - AlignedBlockOffsetBase = Offset + byte_size(I1) + I2Size + I3Size + I4Size, - PaddingNeeded = (4 - (AlignedBlockOffsetBase rem 4)) rem 4, - AlignedBlockOffsetAligned = AlignedBlockOffsetBase + PaddingNeeded, - - % adr instruction will be at: Offset + byte_size(I1) - AdrInstructionOffset = Offset + byte_size(I1), - % For adr, PC is aligned down to 4-byte boundary (no +4 needed) - AdrPC = AdrInstructionOffset band (bnot 3), - % Calculate the correct adr offset to point to the branch instruction - AdrOffset = AlignedBlockOffsetAligned - AdrPC, - I2 = jit_armv6m_asm:adr(Temp, AdrOffset), - I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), - - % Skip over aligned block - - % aligned block size + padding - SkipOffset = 4 + PaddingNeeded, - I4 = jit_armv6m_asm:b(SkipOffset), - - % Padding if needed - Padding = <<0:(PaddingNeeded * 8)>>, - - % Aligned block: branch to jump table entry (we know the address directly) + % Calculate jump table entry offset JumpTableEntryOffset = Label * 4, - AlignedBlockBranchOffset = AlignedBlockOffsetAligned, - BranchOffset = JumpTableEntryOffset - AlignedBlockBranchOffset, - I5 = jit_armv6m_asm:b(BranchOffset), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}. + % Assume mov_immediate will be at most 10 bytes + MaxMovImmediateSize = 10, + EstimatedAdrOffset = Offset + MaxMovImmediateSize, + % +4 for adr base, +4 for minimum adr offset + EstimatedAdrPC = (EstimatedAdrOffset band (bnot 3)) + 4 + 4, + RelativeOffset = JumpTableEntryOffset - EstimatedAdrPC, + + % Generate mov_immediate with the relative offset + State1 = mov_immediate(State, Temp2, RelativeOffset), + Stream1 = State1#state.stream, + ActualMovImmediateSize = StreamModule:offset(Stream1) - Offset, + + % Calculate where adr instruction will actually be + ActualAdrOffset = Offset + ActualMovImmediateSize, + ActualAdrPC = (ActualAdrOffset band (bnot 3)) + 4, + + % Calculate the correct adr offset: ActualAdrPC + (AdrOffset - 4) + RelativeOffset = JumpTableEntryOffset + % So: AdrOffset = JumpTableEntryOffset - ActualAdrPC - RelativeOffset + 4 + AdrOffset = JumpTableEntryOffset - ActualAdrPC - RelativeOffset + 4, + % Ensure adr offset is multiple of 4 and within range + AdrOffset = ((AdrOffset + 3) div 4) * 4, + + % Get PC address using adr + I1 = jit_armv6m_asm:adr(Temp1, AdrOffset), + + % Add PC + offset, load jit_state, and store continuation + I2 = jit_armv6m_asm:adds(Temp2, Temp2, Temp1), + I3 = jit_armv6m_asm:ldr(Temp1, {sp, ?STACK_OFFSET_JITSTATE}), + I4 = jit_armv6m_asm:str(Temp2, ?JITSTATE_CONTINUATION(Temp1)), + + Code = <>, + Stream2 = StreamModule:append(Stream1, Code), + State1#state{stream = Stream2}. set_continuation_to_offset( #state{ @@ -2089,9 +2080,9 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when - Val >= -256 andalso Val < 0 + Val >= -255 andalso Val < 0 -> - I1 = jit_armv6m_asm:movs(Reg, bnot (Val)), + I1 = jit_armv6m_asm:movs(Reg, -Val), I2 = jit_armv6m_asm:negs(Reg, Reg), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; @@ -2295,12 +2286,18 @@ call_only_or_schedule_next( % Store back the decremented value I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), Stream1 = StreamModule:append(Stream0, <>), - BNEOffset = StreamModule:offset(Stream1), - % Branch to label if reduction count is not zero - I4 = jit_armv6m_asm:bcc(ne, 0), - Reloc1 = {Label, BNEOffset, {bcc, ne}}, - Stream2 = StreamModule:append(Stream1, I4), - State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}, + % Use trampoline technique: branch if zero (eq) to skip over the long branch + % If not zero, we want to continue execution at Label + % If zero, we want to fall through to scheduling code + + % Skip over the unconditional branch (2 bytes) + I4 = jit_armv6m_asm:bcc(eq, 4), + % Unconditional branch to label (will be patched later) + I5 = jit_armv6m_asm:b(0), + LongBranchOffset = StreamModule:offset(Stream1) + byte_size(I4), + LongBranchReloc = {Label, LongBranchOffset, b}, + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2, branches = [LongBranchReloc | Branches]}, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 64cf1e707e..7f2d934b26 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1091,40 +1091,210 @@ call_only_or_schedule_next_and_label_relocation_test() -> Dump = << " 0: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 2: e018 b.n 0x36\n" + " 2: e019 b.n 0x38\n" " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 6: e001 b.n 0xc\n" " 8: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " a: e00f b.n 0x2c\n" + " a: e010 b.n 0x2e\n" " c: 9e00 ldr r6, [sp, #0]\n" " e: 68b7 ldr r7, [r6, #8]\n" " 10: 3f01 subs r7, #1\n" " 12: 60b7 str r7, [r6, #8]\n" - " 14: d10a bne.n 0x2c\n" - " 16: 9e00 ldr r6, [sp, #0]\n" - " 18: a701 add r7, pc, #4 ; (adr r7, 0x20)\n" - " 1a: 6077 str r7, [r6, #4]\n" - " 1c: e001 b.n 0x22\n" - " 1e: 0000 movs r0, r0\n" - " 20: e7f2 b.n 0x8\n" - " 22: 6897 ldr r7, [r2, #8]\n" - " 24: 9e05 ldr r6, [sp, #20]\n" - " 26: 9705 str r7, [sp, #20]\n" - " 28: 46b6 mov lr, r6\n" - " 2a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 2c: 6817 ldr r7, [r2, #0]\n" - " 2e: 9e05 ldr r6, [sp, #20]\n" - " 30: 9705 str r7, [sp, #20]\n" - " 32: 46b6 mov lr, r6\n" - " 34: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 36: 6857 ldr r7, [r2, #4]\n" - " 38: 9e05 ldr r6, [sp, #20]\n" - " 3a: 9705 str r7, [sp, #20]\n" - " 3c: 46b6 mov lr, r6\n" - " 3e: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 14: d000 beq.n 0x18\n" + " 16: e00a b.n 0x2e\n" + " 18: 2620 movs r6, #32\n" + " 1a: 4276 negs r6, r6\n" + " 1c: a702 add r7, pc, #8 ; (adr r7, 0x28)\n" + " 1e: 19f6 adds r6, r6, r7\n" + " 20: 9f00 ldr r7, [sp, #0]\n" + " 22: 607e str r6, [r7, #4]\n" + " 24: 6897 ldr r7, [r2, #8]\n" + " 26: 9e05 ldr r6, [sp, #20]\n" + " 28: 9705 str r7, [sp, #20]\n" + " 2a: 46b6 mov lr, r6\n" + " 2c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 2e: 6817 ldr r7, [r2, #0]\n" + " 30: 9e05 ldr r6, [sp, #20]\n" + " 32: 9705 str r7, [sp, #20]\n" + " 34: 46b6 mov lr, r6\n" + " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 38: 6857 ldr r7, [r2, #4]\n" + " 3a: 9e05 ldr r6, [sp, #20]\n" + " 3c: 9705 str r7, [sp, #20]\n" + " 3e: 46b6 mov lr, r6\n" + " 40: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test with different alignment (unaligned start) +call_only_or_schedule_next_and_label_relocation_unaligned_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + %% First do a 2-byte instruction to create unaligned start + State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), + State2 = ?BACKEND:jump_table(State1, 2), + Offset1 = ?BACKEND:offset(State2), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + Offset2 = ?BACKEND:offset(State3), + State4 = ?BACKEND:call_primitive_last(State3, 0, [ctx, jit_state]), + % OP_INT_CALL_END + Offset0 = ?BACKEND:offset(State4), + State5 = ?BACKEND:call_primitive_last(State4, 1, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: 6019 str r1, [r3, #0]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: e019 b.n 0x3a\n" + " 6: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 8: e001 b.n 0xe\n" + " a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " c: e010 b.n 0x30\n" + " e: 9e00 ldr r6, [sp, #0]\n" + " 10: 68b7 ldr r7, [r6, #8]\n" + " 12: 3f01 subs r7, #1\n" + " 14: 60b7 str r7, [r6, #8]\n" + " 16: d000 beq.n 0x1a\n" + " 18: e00a b.n 0x30\n" + " 1a: 2624 movs r6, #36 ; 0x24\n" + " 1c: 4276 negs r6, r6\n" + " 1e: a703 add r7, pc, #12 ; (adr r7, 0x2c)\n" + " 20: 19f6 adds r6, r6, r7\n" + " 22: 9f00 ldr r7, [sp, #0]\n" + " 24: 607e str r6, [r7, #4]\n" + " 26: 6897 ldr r7, [r2, #8]\n" + " 28: 9e05 ldr r6, [sp, #20]\n" + " 2a: 9705 str r7, [sp, #20]\n" + " 2c: 46b6 mov lr, r6\n" + " 2e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 30: 6817 ldr r7, [r2, #0]\n" + " 32: 9e05 ldr r6, [sp, #20]\n" + " 34: 9705 str r7, [sp, #20]\n" + " 36: 46b6 mov lr, r6\n" + " 38: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3a: 6857 ldr r7, [r2, #4]\n" + " 3c: 9e05 ldr r6, [sp, #20]\n" + " 3e: 9705 str r7, [sp, #20]\n" + " 40: 46b6 mov lr, r6\n" + " 42: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with large gap (256+ bytes) to force mov_immediate path +call_only_or_schedule_next_and_label_relocation_large_gap_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the actual function bodies + % Each operation emits ~2 bytes, so 128 operations = ~256 bytes + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, r3) + end, + State1, + lists:seq(1, 128) + ), + Offset1 = ?BACKEND:offset(StatePadded), + State2 = ?BACKEND:call_only_or_schedule_next(StatePadded, 2), + Offset2 = ?BACKEND:offset(State2), + State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + % OP_INT_CALL_END + Offset0 = ?BACKEND:offset(State3), + State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), + State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), + Stream = ?BACKEND:stream(State5), + % Extract the final section starting at 0x10c to verify the literal pool pattern + Dump = << + " 10c: 9e00 ldr r6, [sp, #0]\n" + " 10e: 68b7 ldr r7, [r6, #8]\n" + " 110: 3f01 subs r7, #1\n" + " 112: 60b7 str r7, [r6, #8]\n" + " 114: d000 beq.n 0x118\n" + " 116: e00c b.n 0x132\n" + " 118: 4e00 ldr r6, [pc, #0] ; (0x11c)\n" + " 11a: e001 b.n 0x120\n" + " 11c: fee0 ffff mcr2 15, 7, pc, cr0, cr15, {7} ; \n" + " 120: a701 add r7, pc, #4 ; (adr r7, 0x128)\n" + " 122: 19f6 adds r6, r6, r7\n" + " 124: 9f00 ldr r7, [sp, #0]\n" + " 126: 607e str r6, [r7, #4]\n" + " 128: 6897 ldr r7, [r2, #8]\n" + " 12a: 9e05 ldr r6, [sp, #20]\n" + " 12c: 9705 str r7, [sp, #20]\n" + " 12e: 46b6 mov lr, r6\n" + " 130: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 132: 6817 ldr r7, [r2, #0]\n" + " 134: 9e05 ldr r6, [sp, #20]\n" + " 136: 9705 str r7, [sp, #20]\n" + " 138: 46b6 mov lr, r6\n" + " 13a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 13c: 6857 ldr r7, [r2, #4]\n" + " 13e: 9e05 ldr r6, [sp, #20]\n" + " 140: 9705 str r7, [sp, #20]\n" + " 142: 46b6 mov lr, r6\n" + " 144: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#10c), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +%% Test with large gap (256+ bytes) and different alignment to force literal pool path +call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the rest of the code + % Use 127 operations (instead of 128) to create different alignment + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, r3) + end, + State1, + lists:seq(1, 127) + ), + Offset1 = ?BACKEND:offset(StatePadded), + State2 = ?BACKEND:call_only_or_schedule_next(StatePadded, 2), + Offset2 = ?BACKEND:offset(State2), + State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + % OP_INT_CALL_END + Offset0 = ?BACKEND:offset(State3), + State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), + State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), + Stream = ?BACKEND:stream(State5), + % Extract the final section starting at 0x10a to verify the literal pool pattern with different alignment + Dump = << + " 10a: 9e00 ldr r6, [sp, #0]\n" + " 10c: 68b7 ldr r7, [r6, #8]\n" + " 10e: 3f01 subs r7, #1\n" + " 110: 60b7 str r7, [r6, #8]\n" + " 112: d000 beq.n 0x116\n" + " 114: e00d b.n 0x132\n" + " 116: 4e01 ldr r6, [pc, #4] ; (0x11c)\n" + " 118: e002 b.n 0x120\n" + " 11a: 0000 movs r0, r0\n" + " 11c: fee0 ffff mcr2 15, 7, pc, cr0, cr15, {7} ; \n" + " 120: a701 add r7, pc, #4 ; (adr r7, 0x128)\n" + " 122: 19f6 adds r6, r6, r7\n" + " 124: 9f00 ldr r7, [sp, #0]\n" + " 126: 607e str r6, [r7, #4]\n" + " 128: 6897 ldr r7, [r2, #8]\n" + " 12a: 9e05 ldr r6, [sp, #20]\n" + " 12c: 9705 str r7, [sp, #20]\n" + " 12e: 46b6 mov lr, r6\n" + " 130: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 132: 6817 ldr r7, [r2, #0]\n" + " 134: 9e05 ldr r6, [sp, #20]\n" + " 136: 9705 str r7, [sp, #20]\n" + " 138: 46b6 mov lr, r6\n" + " 13a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 13c: 6857 ldr r7, [r2, #4]\n" + " 13e: 9e05 ldr r6, [sp, #20]\n" + " 140: 9705 str r7, [sp, #20]\n" + " 142: 46b6 mov lr, r6\n" + " 144: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#10a), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), @@ -1550,7 +1720,7 @@ call_ext_test() -> " 28: 6917 ldr r7, [r2, #16]\n" " 2a: 2202 movs r2, #2\n" " 2c: 2305 movs r3, #5\n" - " 2e: 2500 movs r5, #0\n" + " 2e: 2501 movs r5, #1\n" " 30: 426d negs r5, r5\n" " 32: 4639 mov r1, r7\n" " 34: 9f05 ldr r7, [sp, #20]\n" @@ -1908,7 +2078,7 @@ move_to_vm_register_test_() -> %% Test: Negative immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << - " 0: 2700 movs r7, #0\n" + " 0: 2701 movs r7, #1\n" " 2: 427f negs r7, r7\n" " 4: 6187 str r7, [r0, #24]" >>) @@ -2154,11 +2324,34 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: 2729 movs r7, #41 ; 0x29\n" + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 427f negs r7, r7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -255 (boundary case) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 27ff movs r7, #255 ; 0xff\n" " 2: 427f negs r7, r7" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), + %% move_to_native_register/2: -256 (boundary case, should use literal pool) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: ff00 ffff vmaxnm.f32 , q8, " + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), %% move_to_native_register/2: {ptr, reg} ?_test(begin {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}), From 00f1fb2aed2b7b4ccf5eae82f8388a22b9c70726 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 28 Aug 2025 08:26:44 +0200 Subject: [PATCH 27/97] armv6m: fix jump table to it works with labels beyond b range Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 101 ++++++++- libs/jit/src/jit_armv6m_asm.erl | 15 ++ tests/libs/jit/jit_armv6m_asm_tests.erl | 21 ++ tests/libs/jit/jit_armv6m_tests.erl | 266 +++++++++++++----------- 4 files changed, 269 insertions(+), 134 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 0059ad20a6..f0d455939d 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -340,6 +340,23 @@ assert_all_native_free(#state{ %% updated afterwards with update_branches/2. Emit branches for labels from %% 0 (special entry for lines and labels information) to LabelsCount included %% (special entry for OP_INT_CALL_END). +%% +%% On this platform, the jump table is composed of +%% ``` +%% ldr r3, offset_to_label_0 +%% b common +%% ldr r3, offset_to_label_1 +%% b common +%% ... +%% offset_to_label_0: dword (32 bits with offset) +%% offset_to_label_1: dword (32 bits with offset) +%% ... +%% common: +%% push {r1, r4, r5, r6, r7, lr} +%% add pc, pc, r3 +%% ``` +%% so each entry can be anywhere (we're not limited by b's range) +%% %% @end %% @param State current backend state %% @param LabelsCount number of labels in the module. @@ -350,21 +367,75 @@ jump_table(State, LabelsCount) -> jump_table0(State, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> - State; + % After all jump table entries, emit the common handler and offset data + emit_jump_table_common_and_data(State, LabelsCount); jump_table0( #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, N, LabelsCount ) -> Offset = StreamModule:offset(Stream0), - % Create 4-byte jump table entry: prolog (push) + unconditional branch - PushInstr = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), - BranchInstr = jit_armv6m_asm:b(0), - % Branch is after push - Reloc = {N, Offset + byte_size(PushInstr), b}, - JumpEntry = <>, + % Calculate offsets at emit time: + % Layout: [entries] [common_handler] [data] + + % 4 bytes per entry + EntriesSize = (LabelsCount + 1) * 4, + % push (2 bytes) + add pc, pc, r3 (2 bytes) + CommonHandlerSize = 4, + + % Offset to common handler from current branch instruction (branch is at entry+2) + CommonHandlerOffset = EntriesSize - (N * 4) - 2, + + % Offset to data from current ldr instruction + + % PC when ldr executes + CurrentPC = Offset + 4, + DataOffset = Offset + EntriesSize + CommonHandlerSize + (N * 4) - CurrentPC, + + % Create jump table entry with calculated offsets + LdrInstr = jit_armv6m_asm:ldr(r3, {pc, DataOffset}), + % branch offset in bytes + BranchInstr = jit_armv6m_asm:b(CommonHandlerOffset), + JumpEntry = <>, Stream1 = StreamModule:append(Stream0, JumpEntry), - jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + + % No relocations needed since we calculated everything at emit time + jump_table0(State#state{stream = Stream1, branches = Branches}, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Emit the common handler and offset data for the jump table. +%% @end +%%----------------------------------------------------------------------------- +emit_jump_table_common_and_data( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + LabelsCount +) -> + % Emit common handler: push {r1, r4, r5, r6, r7, lr} + add pc, pc, r3 + CommonHandlerOffset = StreamModule:offset(Stream0), + PushInstr = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + AddInstrOffset = CommonHandlerOffset + byte_size(PushInstr), + % indirect jump using loaded offset + AddInstr = jit_armv6m_asm:add(pc, r3), + CommonHandler = <>, + Stream1 = StreamModule:append(Stream0, CommonHandler), + + % Emit offset data (32-bit offsets for each label, will be updated by update_branches/2) + {Stream2, NewBranches} = lists:foldl( + fun(N, {StreamAcc, BranchesAcc}) -> + Offset = StreamModule:offset(StreamAcc), + % Each data entry is a 32-bit offset that will be patched by update_branches/2 + + % placeholder, will be updated + DataEntry = <<0:32/little>>, + StreamNext = StreamModule:append(StreamAcc, DataEntry), + % Add relocation for this data entry, including the add instruction offset + DataReloc = {N, Offset, {jump_table_data, AddInstrOffset}}, + {StreamNext, [DataReloc | BranchesAcc]} + end, + {Stream1, Branches}, + lists:seq(0, LabelsCount) + ), + State#state{stream = Stream2, branches = NewBranches}. %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. @@ -390,7 +461,15 @@ update_branches( case Type of {adr, Reg} when Rel rem 4 =:= 0 -> jit_armv6m_asm:adr(Reg, Rel); {adr, Reg} when Rel rem 4 =:= 2 -> jit_armv6m_asm:adr(Reg, Rel + 2); - b -> jit_armv6m_asm:b(Rel) + b -> + jit_armv6m_asm:b(Rel); + {jump_table_data, AddInstrOffset} -> + % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label + + % PC when add instruction executes + AddPC = AddInstrOffset + 4, + RelativeOffset = LabelOffset - AddPC, + <> end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), update_branches(State#state{stream = Stream1, branches = BranchesT}, Labels). @@ -1964,6 +2043,10 @@ set_continuation_to_label( Stream2 = StreamModule:append(Stream1, Code), State1#state{stream = Stream2}. +%% @doc Set the contination to a given offset +%% Return a reference so the offset will be updated with update_branches +%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current +%% code and not too far, so on Thumb we can use adr instruction. set_continuation_to_offset( #state{ stream_module = StreamModule, diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index bba65f4e52..757805a583 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -19,6 +19,7 @@ -module(jit_armv6m_asm). -export([ + add/2, adds/2, adds/3, sub/2, @@ -148,6 +149,20 @@ cond_to_num(nv) -> 15. Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7) ). +%% Emit an ADD instruction (Thumb encoding, high register form) +%% ADD Rd, Rm - adds register value to register (supports high registers including PC) +%% Encoding: 01000100 DN RmNum[3:0] RdLow3[2:0] +-spec add(arm_gpr_register(), arm_gpr_register()) -> binary(). +add(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + % Extract bit 3 of Rd + DN = (RdNum bsr 3) band 1, + RdLow3 = RdNum band 7, + % Build 16-bit instruction: 01000100 DN RmNum[3:0] RdLow3[2:0] + Instr = (2#01000100 bsl 8) bor (DN bsl 7) bor (RmNum bsl 3) bor RdLow3, + <>. + %% Emit an ADDS instruction (Thumb encoding) %% ADDS Rd, #imm - adds immediate value to register and sets flags (2-operand form) -spec adds(arm_gpr_register(), integer()) -> binary(). diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl index e30d303319..8005844f10 100644 --- a/tests/libs/jit/jit_armv6m_asm_tests.erl +++ b/tests/libs/jit/jit_armv6m_asm_tests.erl @@ -49,6 +49,27 @@ adds_test_() -> ) ]. +add_test_() -> + [ + %% ARMv6-M Thumb ADD instructions (register, high registers supported) + %% ADD Rd, Rm - adds register value to register (supports PC) + ?_assertEqual( + asm(<<16#449f:16/little>>, "add pc, r3"), jit_armv6m_asm:add(pc, r3) + ), + ?_assertEqual( + asm(<<16#4440:16/little>>, "add r0, r8"), jit_armv6m_asm:add(r0, r8) + ), + ?_assertEqual( + asm(<<16#4488:16/little>>, "add r8, r1"), jit_armv6m_asm:add(r8, r1) + ), + ?_assertEqual( + asm(<<16#44c9:16/little>>, "add r9, r9"), jit_armv6m_asm:add(r9, r9) + ), + ?_assertEqual( + asm(<<16#4419:16/little>>, "add r1, r3"), jit_armv6m_asm:add(r1, r3) + ) + ]. + subs_test_() -> [ ?_assertEqual( diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 7f2d934b26..db03a38d7f 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1090,39 +1090,47 @@ call_only_or_schedule_next_and_label_relocation_test() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 2: e019 b.n 0x38\n" - " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 0: 4b03 ldr r3, [pc, #12]\n" + " 2: e003 b.n 0xc\n" + " 4: 4b04 ldr r3, [pc, #16]\n" " 6: e001 b.n 0xc\n" - " 8: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " a: e010 b.n 0x2e\n" - " c: 9e00 ldr r6, [sp, #0]\n" - " e: 68b7 ldr r7, [r6, #8]\n" - " 10: 3f01 subs r7, #1\n" - " 12: 60b7 str r7, [r6, #8]\n" - " 14: d000 beq.n 0x18\n" - " 16: e00a b.n 0x2e\n" - " 18: 2620 movs r6, #32\n" - " 1a: 4276 negs r6, r6\n" - " 1c: a702 add r7, pc, #8 ; (adr r7, 0x28)\n" - " 1e: 19f6 adds r6, r6, r7\n" - " 20: 9f00 ldr r7, [sp, #0]\n" - " 22: 607e str r6, [r7, #4]\n" - " 24: 6897 ldr r7, [r2, #8]\n" - " 26: 9e05 ldr r6, [sp, #20]\n" - " 28: 9705 str r7, [sp, #20]\n" - " 2a: 46b6 mov lr, r6\n" - " 2c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 2e: 6817 ldr r7, [r2, #0]\n" - " 30: 9e05 ldr r6, [sp, #20]\n" - " 32: 9705 str r7, [sp, #20]\n" - " 34: 46b6 mov lr, r6\n" - " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 38: 6857 ldr r7, [r2, #4]\n" - " 3a: 9e05 ldr r6, [sp, #20]\n" - " 3c: 9705 str r7, [sp, #20]\n" - " 3e: 46b6 mov lr, r6\n" - " 40: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 8: 4b05 ldr r3, [pc, #20]\n" + " a: e7ff b.n 0xc\n" + " c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " e: 449f add pc, r3\n" + " 10: 0036 movs r6, r6\n" + " 12: 0000 movs r0, r0\n" + " 14: 000a movs r2, r1\n" + " 16: 0000 movs r0, r0\n" + " 18: 002c movs r4, r5\n" + " 1a: 0000 movs r0, r0\n" + " 1c: 9e00 ldr r6, [sp, #0]\n" + " 1e: 68b7 ldr r7, [r6, #8]\n" + " 20: 3f01 subs r7, #1\n" + " 22: 60b7 str r7, [r6, #8]\n" + " 24: d000 beq.n 0x28\n" + " 26: e00a b.n 0x3e\n" + " 28: 2630 movs r6, #48\n" + " 2a: 4276 negs r6, r6\n" + " 2c: a702 add r7, pc, #8\n" + " 2e: 19f6 adds r6, r6, r7\n" + " 30: 9f00 ldr r7, [sp, #0]\n" + " 32: 607e str r6, [r7, #4]\n" + " 34: 6897 ldr r7, [r2, #8]\n" + " 36: 9e05 ldr r6, [sp, #20]\n" + " 38: 9705 str r7, [sp, #20]\n" + " 3a: 46b6 mov lr, r6\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3e: 6817 ldr r7, [r2, #0]\n" + " 40: 9e05 ldr r6, [sp, #20]\n" + " 42: 9705 str r7, [sp, #20]\n" + " 44: 46b6 mov lr, r6\n" + " 46: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 48: 6857 ldr r7, [r2, #4]\n" + " 4a: 9e05 ldr r6, [sp, #20]\n" + " 4c: 9705 str r7, [sp, #20]\n" + " 4e: 46b6 mov lr, r6\n" + " 50: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1144,39 +1152,47 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> Dump = << " 0: 6019 str r1, [r3, #0]\n" - " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 4: e019 b.n 0x3a\n" - " 6: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 2: 4b03 ldr r3, [pc, #12]\n" + " 4: e003 b.n 0xe\n" + " 6: 4b04 ldr r3, [pc, #16]\n" " 8: e001 b.n 0xe\n" - " a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " c: e010 b.n 0x30\n" - " e: 9e00 ldr r6, [sp, #0]\n" - " 10: 68b7 ldr r7, [r6, #8]\n" - " 12: 3f01 subs r7, #1\n" - " 14: 60b7 str r7, [r6, #8]\n" - " 16: d000 beq.n 0x1a\n" - " 18: e00a b.n 0x30\n" - " 1a: 2624 movs r6, #36 ; 0x24\n" - " 1c: 4276 negs r6, r6\n" - " 1e: a703 add r7, pc, #12 ; (adr r7, 0x2c)\n" - " 20: 19f6 adds r6, r6, r7\n" - " 22: 9f00 ldr r7, [sp, #0]\n" - " 24: 607e str r6, [r7, #4]\n" - " 26: 6897 ldr r7, [r2, #8]\n" - " 28: 9e05 ldr r6, [sp, #20]\n" - " 2a: 9705 str r7, [sp, #20]\n" - " 2c: 46b6 mov lr, r6\n" - " 2e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 30: 6817 ldr r7, [r2, #0]\n" - " 32: 9e05 ldr r6, [sp, #20]\n" - " 34: 9705 str r7, [sp, #20]\n" - " 36: 46b6 mov lr, r6\n" - " 38: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 3a: 6857 ldr r7, [r2, #4]\n" - " 3c: 9e05 ldr r6, [sp, #20]\n" - " 3e: 9705 str r7, [sp, #20]\n" - " 40: 46b6 mov lr, r6\n" - " 42: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " a: 4b05 ldr r3, [pc, #20]\n" + " c: e7ff b.n 0xe\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 0036 movs r6, r6\n" + " 14: 0000 movs r0, r0\n" + " 16: 000a movs r2, r1\n" + " 18: 0000 movs r0, r0\n" + " 1a: 002c movs r4, r5\n" + " 1c: 0000 movs r0, r0\n" + " 1e: 9e00 ldr r6, [sp, #0]\n" + " 20: 68b7 ldr r7, [r6, #8]\n" + " 22: 3f01 subs r7, #1\n" + " 24: 60b7 str r7, [r6, #8]\n" + " 26: d000 beq.n 0x2a\n" + " 28: e00a b.n 0x40\n" + " 2a: 2634 movs r6, #52\n" + " 2c: 4276 negs r6, r6\n" + " 2e: a703 add r7, pc, #12\n" + " 30: 19f6 adds r6, r6, r7\n" + " 32: 9f00 ldr r7, [sp, #0]\n" + " 34: 607e str r6, [r7, #4]\n" + " 36: 6897 ldr r7, [r2, #8]\n" + " 38: 9e05 ldr r6, [sp, #20]\n" + " 3a: 9705 str r7, [sp, #20]\n" + " 3c: 46b6 mov lr, r6\n" + " 3e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 40: 6817 ldr r7, [r2, #0]\n" + " 42: 9e05 ldr r6, [sp, #20]\n" + " 44: 9705 str r7, [sp, #20]\n" + " 46: 46b6 mov lr, r6\n" + " 48: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 4a: 6857 ldr r7, [r2, #4]\n" + " 4c: 9e05 ldr r6, [sp, #20]\n" + " 4e: 9705 str r7, [sp, #20]\n" + " 50: 46b6 mov lr, r6\n" + " 52: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1203,38 +1219,38 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), Stream = ?BACKEND:stream(State5), - % Extract the final section starting at 0x10c to verify the literal pool pattern + % Extract the final section starting at 0x11c to verify the literal pool pattern Dump = << - " 10c: 9e00 ldr r6, [sp, #0]\n" - " 10e: 68b7 ldr r7, [r6, #8]\n" - " 110: 3f01 subs r7, #1\n" - " 112: 60b7 str r7, [r6, #8]\n" - " 114: d000 beq.n 0x118\n" - " 116: e00c b.n 0x132\n" - " 118: 4e00 ldr r6, [pc, #0] ; (0x11c)\n" - " 11a: e001 b.n 0x120\n" - " 11c: fee0 ffff mcr2 15, 7, pc, cr0, cr15, {7} ; \n" - " 120: a701 add r7, pc, #4 ; (adr r7, 0x128)\n" - " 122: 19f6 adds r6, r6, r7\n" - " 124: 9f00 ldr r7, [sp, #0]\n" - " 126: 607e str r6, [r7, #4]\n" - " 128: 6897 ldr r7, [r2, #8]\n" - " 12a: 9e05 ldr r6, [sp, #20]\n" - " 12c: 9705 str r7, [sp, #20]\n" - " 12e: 46b6 mov lr, r6\n" - " 130: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 132: 6817 ldr r7, [r2, #0]\n" - " 134: 9e05 ldr r6, [sp, #20]\n" - " 136: 9705 str r7, [sp, #20]\n" - " 138: 46b6 mov lr, r6\n" - " 13a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 13c: 6857 ldr r7, [r2, #4]\n" - " 13e: 9e05 ldr r6, [sp, #20]\n" - " 140: 9705 str r7, [sp, #20]\n" - " 142: 46b6 mov lr, r6\n" - " 144: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 11c: 9e00 ldr r6, [sp, #0]\n" + " 11e: 68b7 ldr r7, [r6, #8]\n" + " 120: 3f01 subs r7, #1\n" + " 122: 60b7 str r7, [r6, #8]\n" + " 124: d000 beq.n 0x128\n" + " 126: e00c b.n 0x142\n" + " 128: 4e00 ldr r6, [pc, #0] ; (0x12c)\n" + " 12a: e001 b.n 0x130\n" + " 12c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" + " 130: a701 add r7, pc, #4 ; (adr r7, 0x138)\n" + " 132: 19f6 adds r6, r6, r7\n" + " 134: 9f00 ldr r7, [sp, #0]\n" + " 136: 607e str r6, [r7, #4]\n" + " 138: 6897 ldr r7, [r2, #8]\n" + " 13a: 9e05 ldr r6, [sp, #20]\n" + " 13c: 9705 str r7, [sp, #20]\n" + " 13e: 46b6 mov lr, r6\n" + " 140: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 142: 6817 ldr r7, [r2, #0]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: 6857 ldr r7, [r2, #4]\n" + " 14e: 9e05 ldr r6, [sp, #20]\n" + " 150: 9705 str r7, [sp, #20]\n" + " 152: 46b6 mov lr, r6\n" + " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, - {_, RelevantBinary} = split_binary(Stream, 16#10c), + {_, RelevantBinary} = split_binary(Stream, 16#11c), ?assertEqual(dump_to_bin(Dump), RelevantBinary). %% Test with large gap (256+ bytes) and different alignment to force literal pool path @@ -1260,39 +1276,39 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), Stream = ?BACKEND:stream(State5), - % Extract the final section starting at 0x10a to verify the literal pool pattern with different alignment + % Extract the final section starting at 0x11a to verify the literal pool pattern with different alignment Dump = << - " 10a: 9e00 ldr r6, [sp, #0]\n" - " 10c: 68b7 ldr r7, [r6, #8]\n" - " 10e: 3f01 subs r7, #1\n" - " 110: 60b7 str r7, [r6, #8]\n" - " 112: d000 beq.n 0x116\n" - " 114: e00d b.n 0x132\n" - " 116: 4e01 ldr r6, [pc, #4] ; (0x11c)\n" - " 118: e002 b.n 0x120\n" - " 11a: 0000 movs r0, r0\n" - " 11c: fee0 ffff mcr2 15, 7, pc, cr0, cr15, {7} ; \n" - " 120: a701 add r7, pc, #4 ; (adr r7, 0x128)\n" - " 122: 19f6 adds r6, r6, r7\n" - " 124: 9f00 ldr r7, [sp, #0]\n" - " 126: 607e str r6, [r7, #4]\n" - " 128: 6897 ldr r7, [r2, #8]\n" - " 12a: 9e05 ldr r6, [sp, #20]\n" - " 12c: 9705 str r7, [sp, #20]\n" - " 12e: 46b6 mov lr, r6\n" - " 130: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 132: 6817 ldr r7, [r2, #0]\n" - " 134: 9e05 ldr r6, [sp, #20]\n" - " 136: 9705 str r7, [sp, #20]\n" - " 138: 46b6 mov lr, r6\n" - " 13a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 13c: 6857 ldr r7, [r2, #4]\n" - " 13e: 9e05 ldr r6, [sp, #20]\n" - " 140: 9705 str r7, [sp, #20]\n" - " 142: 46b6 mov lr, r6\n" - " 144: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 11a: 9e00 ldr r6, [sp, #0]\n" + " 11c: 68b7 ldr r7, [r6, #8]\n" + " 11e: 3f01 subs r7, #1\n" + " 120: 60b7 str r7, [r6, #8]\n" + " 122: d000 beq.n 0x126\n" + " 124: e00d b.n 0x142\n" + " 126: 4e01 ldr r6, [pc, #4] ; (0x12c)\n" + " 128: e002 b.n 0x130\n" + " 12a: 0000 movs r0, r0\n" + " 12c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" + " 130: a701 add r7, pc, #4 ; (adr r7, 0x138)\n" + " 132: 19f6 adds r6, r6, r7\n" + " 134: 9f00 ldr r7, [sp, #0]\n" + " 136: 607e str r6, [r7, #4]\n" + " 138: 6897 ldr r7, [r2, #8]\n" + " 13a: 9e05 ldr r6, [sp, #20]\n" + " 13c: 9705 str r7, [sp, #20]\n" + " 13e: 46b6 mov lr, r6\n" + " 140: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 142: 6817 ldr r7, [r2, #0]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: 6857 ldr r7, [r2, #4]\n" + " 14e: 9e05 ldr r6, [sp, #20]\n" + " 150: 9705 str r7, [sp, #20]\n" + " 152: 46b6 mov lr, r6\n" + " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, - {_, RelevantBinary} = split_binary(Stream, 16#10a), + {_, RelevantBinary} = split_binary(Stream, 16#11a), ?assertEqual(dump_to_bin(Dump), RelevantBinary). call_bif_with_large_literal_integer_test() -> From b26f49e03a3c13f248155ebc4b2868205bcde514 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 28 Aug 2025 21:51:50 +0200 Subject: [PATCH 28/97] armv6m: fix debugger function Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 2 +- libs/jit/src/jit_armv6m_asm.erl | 8 ++++++++ tests/libs/jit/jit_armv6m_asm_tests.erl | 19 +++++++++++++++++++ tests/libs/jit/jit_armv6m_tests.erl | 10 ++++++++++ 4 files changed, 38 insertions(+), 1 deletion(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index f0d455939d..d01798c8dc 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -270,7 +270,7 @@ offset(#state{stream_module = StreamModule, stream = Stream}) -> %%----------------------------------------------------------------------------- -spec debugger(state()) -> state(). debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> - Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:brk(0)), + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:bkpt(0)), State#state{stream = Stream1}. %%----------------------------------------------------------------------------- diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index 757805a583..b8d349dbb0 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -29,6 +29,7 @@ muls/2, b/1, bcc/2, + bkpt/1, blx/1, bx/1, cmp/2, @@ -228,6 +229,13 @@ bx(Reg) when is_atom(Reg) -> %% This branches to register without setting LR <<(16#4700 bor (RegNum bsl 3)):16/little>>. +%% Emit a BKPT (breakpoint) instruction +-spec bkpt(byte()) -> binary(). +bkpt(Imm) when is_integer(Imm), Imm >= 0, Imm =< 16#FF -> + %% ARM Thumb BKPT encoding: 1011 1110 iiii iiii + %% where iiii iiii is the 8-bit immediate value + <<(16#BE00 bor (Imm band 16#FF)):16/little>>. + %% Emit a load register (LDR) instruction -spec ldr(arm_gpr_register(), {arm_gpr_register(), integer()}) -> binary(). %% LDR Rt, [Rn, #imm5*4] - 16-bit immediate offset (0-124, multiple of 4) diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl index 8005844f10..a46e370f36 100644 --- a/tests/libs/jit/jit_armv6m_asm_tests.erl +++ b/tests/libs/jit/jit_armv6m_asm_tests.erl @@ -578,6 +578,25 @@ pop_test_() -> ) ]. +bkpt_test_() -> + [ + %% BKPT #0 + ?_assertEqual( + asm(<<16#be00:16/little>>, "bkpt #0"), + jit_armv6m_asm:bkpt(0) + ), + %% BKPT #1 + ?_assertEqual( + asm(<<16#be01:16/little>>, "bkpt #1"), + jit_armv6m_asm:bkpt(1) + ), + %% BKPT #255 + ?_assertEqual( + asm(<<16#beff:16/little>>, "bkpt #255"), + jit_armv6m_asm:bkpt(255) + ) + ]. + asm(Bin, Str) -> case erlang:system_info(machine) of "ATOM" -> diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index db03a38d7f..6cc0bb1653 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2569,6 +2569,16 @@ set_args1_y_reg_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test debugger function +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: be00 bkpt 0x0000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From 15e101904065afa76709ea589a33f421c5adda00 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 10:43:17 +0200 Subject: [PATCH 29/97] armv6m: move labels to backend states Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 59 ++++++++++---- tests/libs/jit/jit_armv6m_tests.erl | 121 ++++++++++++++-------------- 2 files changed, 107 insertions(+), 73 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index d01798c8dc..011dce8c1b 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -31,7 +31,7 @@ free_native_registers/2, assert_all_native_free/1, jump_table/2, - update_branches/2, + update_branches/1, call_primitive/3, call_primitive_last/3, call_primitive_with_cp/3, @@ -65,7 +65,9 @@ call_or_schedule_next/2, call_only_or_schedule_next/2, call_func_ptr/3, - return_labels_and_lines/3 + return_labels_and_lines/2, + add_label/2, + add_label/3 ]). -include_lib("jit.hrl"). @@ -133,7 +135,8 @@ offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [armv6m_register()], - used_regs :: [armv6m_register()] + used_regs :: [armv6m_register()], + labels :: [{integer() | reference(), integer()}] }). -type state() :: #state{}. @@ -238,7 +241,8 @@ new(_Variant, StreamModule, Stream) -> branches = [], offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, - used_regs = [] + used_regs = [], + labels = [] }. %%----------------------------------------------------------------------------- @@ -441,19 +445,18 @@ emit_jump_table_common_and_data( %% @doc Rewrite stream to update all branches for labels. %% @end %% @param State current backend state -%% @param Labels list of tuples with label, offset and size of the branch in bits %% @return Updated backend state %%----------------------------------------------------------------------------- --spec update_branches(state(), [{non_neg_integer(), non_neg_integer()}]) -> state(). -update_branches(#state{branches = []} = State, _Labels) -> +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> State; update_branches( #state{ stream_module = StreamModule, stream = Stream0, - branches = [{Label, Offset, Type} | BranchesT] - } = State, - Labels + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), Rel = LabelOffset - Offset, @@ -472,7 +475,7 @@ update_branches( <> end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), - update_branches(State#state{stream = Stream1, branches = BranchesT}, Labels). + update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- %% @doc Generate code to load a primitive function pointer into a register @@ -2466,7 +2469,6 @@ set_bs( %%----------------------------------------------------------------------------- %% @param State current state -%% @param SortedLabels labels information, sorted by offset %% @param SortedLines line information, sorted by offset %% @doc Build labels and line tables and encode a function that returns it. %% In this case, the function returns the effective address of what immediately @@ -2477,11 +2479,16 @@ set_bs( return_labels_and_lines( #state{ stream_module = StreamModule, - stream = Stream0 + stream = Stream0, + labels = Labels } = State, - SortedLabels, SortedLines ) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + % Check if current offset is 4-byte aligned CurrentOffset = StreamModule:offset(Stream0), @@ -2540,3 +2547,27 @@ args_regs(Args) -> end, Args ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -> + Offset = StreamModule:offset(Stream), + add_label(State, Label, Offset). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 6cc0bb1653..487c40d2e6 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1079,15 +1079,15 @@ shift_left_test() -> call_only_or_schedule_next_and_label_relocation_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_table(State0, 2), - Offset1 = ?BACKEND:offset(State1), - State2 = ?BACKEND:call_only_or_schedule_next(State1, 2), - Offset2 = ?BACKEND:offset(State2), - State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), % OP_INT_CALL_END - Offset0 = ?BACKEND:offset(State3), - State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), - State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), - Stream = ?BACKEND:stream(State5), + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), Dump = << " 0: 4b03 ldr r3, [pc, #12]\n" @@ -1140,15 +1140,15 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> %% First do a 2-byte instruction to create unaligned start State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), State2 = ?BACKEND:jump_table(State1, 2), - Offset1 = ?BACKEND:offset(State2), - State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), - Offset2 = ?BACKEND:offset(State3), - State4 = ?BACKEND:call_primitive_last(State3, 0, [ctx, jit_state]), + State3 = ?BACKEND:add_label(State2, 1), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:add_label(State4, 2), + State6 = ?BACKEND:call_primitive_last(State5, 0, [ctx, jit_state]), % OP_INT_CALL_END - Offset0 = ?BACKEND:offset(State4), - State5 = ?BACKEND:call_primitive_last(State4, 1, [ctx, jit_state]), - State6 = ?BACKEND:update_branches(State5, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), - Stream = ?BACKEND:stream(State6), + State7 = ?BACKEND:add_label(State6, 0), + State8 = ?BACKEND:call_primitive_last(State7, 1, [ctx, jit_state]), + State9 = ?BACKEND:update_branches(State8), + Stream = ?BACKEND:stream(State9), Dump = << " 0: 6019 str r1, [r3, #0]\n" @@ -1210,15 +1210,15 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> State1, lists:seq(1, 128) ), - Offset1 = ?BACKEND:offset(StatePadded), - State2 = ?BACKEND:call_only_or_schedule_next(StatePadded, 2), - Offset2 = ?BACKEND:offset(State2), - State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), % OP_INT_CALL_END - Offset0 = ?BACKEND:offset(State3), - State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), - State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), - Stream = ?BACKEND:stream(State5), + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), % Extract the final section starting at 0x11c to verify the literal pool pattern Dump = << " 11c: 9e00 ldr r6, [sp, #0]\n" @@ -1267,15 +1267,15 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> State1, lists:seq(1, 127) ), - Offset1 = ?BACKEND:offset(StatePadded), - State2 = ?BACKEND:call_only_or_schedule_next(StatePadded, 2), - Offset2 = ?BACKEND:offset(State2), - State3 = ?BACKEND:call_primitive_last(State2, 0, [ctx, jit_state]), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), % OP_INT_CALL_END - Offset0 = ?BACKEND:offset(State3), - State4 = ?BACKEND:call_primitive_last(State3, 1, [ctx, jit_state]), - State5 = ?BACKEND:update_branches(State4, [{0, Offset0}, {1, Offset1}, {2, Offset2}]), - Stream = ?BACKEND:stream(State5), + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), % Extract the final section starting at 0x11a to verify the literal pool pattern with different alignment Dump = << " 11a: 9e00 ldr r6, [sp, #0]\n" @@ -1412,9 +1412,9 @@ is_integer_test() -> State3 = ?BACKEND:free_native_registers(State2, [Reg]), ?BACKEND:assert_all_native_free(State3), Offset = ?BACKEND:offset(State3), - Labels = [{Label, Offset + 16#100}], - State4 = ?BACKEND:update_branches(State3, Labels), - Stream = ?BACKEND:stream(State4), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 43fe mvns r6, r7\n" @@ -1469,9 +1469,9 @@ is_number_test() -> State3 = ?BACKEND:free_native_registers(State2, [Reg]), ?BACKEND:assert_all_native_free(State3), Offset = ?BACKEND:offset(State3), - Labels = [{Label, Offset + 16#100}], - State4 = ?BACKEND:update_branches(State3, Labels), - Stream = ?BACKEND:stream(State4), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 43fe mvns r6, r7\n" @@ -1509,11 +1509,11 @@ is_boolean_test() -> end) end), State3 = ?BACKEND:free_native_registers(State2, [Reg]), - Offset = ?BACKEND:offset(State3), - Labels = [{Label, Offset + 16#100}], ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:update_branches(State3, Labels), - Stream = ?BACKEND:stream(State4), + Offset = ?BACKEND:offset(State3), + State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 2f4b cmp r7, #75 ; 0x4b\n" @@ -1534,22 +1534,22 @@ wait_timeout_test() -> State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ ctx, jit_state, {free, TimeoutReg}, Label ]), - Offset0 = ?BACKEND:offset(State3), - State4 = ?BACKEND:continuation_entry_point(State3), - {State5, ResultReg0} = ?BACKEND:call_primitive(State4, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ ctx, jit_state ]), - State6 = ?BACKEND:return_if_not_equal_to_ctx(State5, {free, ResultReg0}), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), % ?WAITING_TIMEOUT_EXPIRED - {State7, ResultReg1} = ?BACKEND:call_primitive(State6, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), - State8 = ?BACKEND:if_block(State7, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ ctx, jit_state, Label ]) end), - State9 = ?BACKEND:update_branches(State8, [{OffsetRef0, Offset0}]), + State10 = ?BACKEND:update_branches(State9), - Stream = ?BACKEND:stream(State9), + Stream = ?BACKEND:stream(State10), Dump = << " 0: a707 add r7, pc, #28 ; (adr r7, 0x20)\n" " 2: 9e00 ldr r6, [sp, #0]\n" @@ -1596,19 +1596,19 @@ wait_timeout_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -%% Test return_labels_and_lines/3 function +%% Test return_labels_and_lines/2 function return_labels_and_lines_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), - % {Label, Offset} pairs - SortedLabels = [{1, 16}, {2, 32}], % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State1 = ?BACKEND:return_labels_and_lines(State0, SortedLabels, SortedLines), - Stream = ?BACKEND:stream(State1), + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), % Should have generated adr + bx lr + labels table + lines table % adr = 4 bytes, bx = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes @@ -1637,7 +1637,7 @@ return_labels_and_lines_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -%% Test return_labels_and_lines/3 with unaligned offset - should fail +%% Test return_labels_and_lines/2 with unaligned offset - should fail return_labels_and_lines_unaligned_test() -> % Create a new state with a 2-byte instruction already in the stream % to simulate starting at an odd offset (offset 2 instead of 0) @@ -1647,11 +1647,14 @@ return_labels_and_lines_unaligned_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), % Test return_labels_and_lines with some sample labels and lines - SortedLabels = [{1, 16}, {2, 32}], + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State1 = ?BACKEND:return_labels_and_lines(State0, SortedLabels, SortedLines), - Stream = ?BACKEND:stream(State1), + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), Dump = << " 0: 4770 bx lr\n" From 46185d1da4f143dee03ac5d7e7ed56828a29b626 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 10:48:59 +0200 Subject: [PATCH 30/97] armv6m: optimize jump_to_label with known labels Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 011dce8c1b..a4eb3dc49c 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -749,14 +749,25 @@ return_if_not_equal_to_ctx( %% @return Updated backend state %%----------------------------------------------------------------------------- jump_to_label( - #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches} = State, Label + #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches, labels = Labels} = + State, + Label ) -> Offset = StreamModule:offset(Stream0), - % Placeholder offset, will be patched - I1 = jit_armv6m_asm:b(0), - Reloc = {Label, Offset, b}, - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1, branches = [Reloc | AccBranches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct branch without relocation + Rel = LabelOffset - Offset, + I1 = jit_armv6m_asm:b(Rel), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + I1 = jit_armv6m_asm:b(0), + Reloc = {Label, Offset, b}, + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1, branches = [Reloc | AccBranches]} + end. %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally From c4e30232b15f3c5509b27ed1432a3d5b09626ab3 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 06:47:11 +0200 Subject: [PATCH 31/97] armv6m: implement far branches Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 246 ++++++++++++--- tests/libs/jit/jit_armv6m_tests.erl | 449 +++++++++++++++++++--------- 2 files changed, 521 insertions(+), 174 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index a4eb3dc49c..20df565f71 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -464,11 +464,58 @@ update_branches( case Type of {adr, Reg} when Rel rem 4 =:= 0 -> jit_armv6m_asm:adr(Reg, Rel); {adr, Reg} when Rel rem 4 =:= 2 -> jit_armv6m_asm:adr(Reg, Rel + 2); - b -> - jit_armv6m_asm:b(Rel); + {far_branch, Size, TempReg} -> + % Check if branch can now be optimized to near branch + if + Rel >= -2044 andalso Rel =< 2050 andalso (Rel rem 2) =:= 0 -> + % Optimize to near branch: b + nops to fill original size + DirectBranch = jit_armv6m_asm:b(Rel), + % Fill remaining bytes with NOPs + NopCount = (Size - 2) div 2, + Nops = <<<<(jit_armv6m_asm:nop())/binary>> || _ <- lists:seq(1, NopCount)>>, + <>; + true -> + % Keep far branch sequence, calculate correct ldr immediate and update literal + + % Calculate where the literal should be placed (same logic as generation) + LdrOffset = Offset, + % ldr + add + bx = 6 bytes + AfterInstructionsOffset = Offset + 6, + AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), + + % Calculate correct PC-relative offset for ldr instruction + + % PC aligned down + PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), + LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, + + % Calculate the relative offset for the literal value + % This is the offset from the add instruction's PC to the target + % The add instruction is at Offset + 2, so PC = Offset + 2 + 4 = Offset + 6 + AddPCOffset = Offset + 6, + RelativeOffset = LabelOffset - AddPCOffset, + + if + Size =:= 12 -> + % 12-byte sequence with alignment + I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + I4 = jit_armv6m_asm:nop(), + I5 = <>, + <>; + % Size =:= 10 + true -> + % 10-byte sequence without alignment + I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + I4 = <>, + <> + end + end; {jump_table_data, AddInstrOffset} -> % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label - % PC when add instruction executes AddPC = AddInstrOffset + 4, RelativeOffset = LabelOffset - AddPC, @@ -749,25 +796,110 @@ return_if_not_equal_to_ctx( %% @return Updated backend state %%----------------------------------------------------------------------------- jump_to_label( - #state{stream_module = StreamModule, stream = Stream0, branches = AccBranches, labels = Labels} = - State, - Label + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label ) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), Offset = StreamModule:offset(Stream0), - case lists:keyfind(Label, 1, Labels) of - {Label, LabelOffset} -> - % Label is already known, emit direct branch without relocation - Rel = LabelOffset - Offset, - I1 = jit_armv6m_asm:b(Rel), - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}; - false -> - % Label not yet known, emit placeholder and add relocation - I1 = jit_armv6m_asm:b(0), - Reloc = {Label, Offset, b}, - Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1, branches = [Reloc | AccBranches]} - end. + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State1#state{stream = Stream1}. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) when + LabelOffset - Offset =< 2050, LabelOffset - Offset >= -2044 +-> + % Near branch: use direct B instruction + Rel = LabelOffset - Offset, + CodeBlock = jit_armv6m_asm:b(Rel), + {State, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _]} = State0, Offset, Label, {Label, LabelOffset} +) -> + % Far branch: use register-based sequence, need temporary register + % Calculate alignment for literal pool + LdrOffset = Offset, + % ldr + add + bx = 6 bytes + AfterInstructionsOffset = Offset + 6, + % Round up to 4-byte boundary + AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), + PaddingSize = AlignedLiteralOffset - AfterInstructionsOffset, + + % Calculate PC-relative offset for ldr instruction + % For ldr rd, [pc, #imm]: effective address = (PC+4 aligned to 4) + imm + + % PC aligned down + PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), + LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, + + % Calculate the literal value: target - PC_at_add_instruction + % The add instruction is at Offset + 2, so PC = Offset + 2 + 4 = Offset + 6 + AddPCValue = Offset + 6, + LiteralValue = LabelOffset - AddPCValue, + + if + PaddingSize > 0 -> + % Need alignment padding + I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + % Padding + I4 = jit_armv6m_asm:nop(), + I5 = <>, + CodeBlock = <>; + true -> + % No alignment padding needed + I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + I4 = <>, + CodeBlock = <> + end, + {State0, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false +) -> + % Calculate alignment for literal pool + LdrOffset = Offset, + % ldr + add + bx = 6 bytes + AfterInstructionsOffset = Offset + 6, + % Round up to 4-byte boundary + AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), + PaddingSize = AlignedLiteralOffset - AfterInstructionsOffset, + + % Calculate PC-relative offset for ldr instruction + % For ldr rd, [pc, #imm]: effective address = (PC+4 aligned to 4) + imm + + % PC aligned down + PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), + LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, + + {CodeBlock, SequenceSize} = + if + PaddingSize > 0 -> + % Need alignment padding + I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + I4 = jit_armv6m_asm:nop(), + % Placeholder offset + I5 = <<0:32/little>>, + Seq = <>, + {Seq, byte_size(Seq)}; + true -> + % No alignment padding needed + I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + % Placeholder offset + I4 = <<0:32/little>>, + Seq = <>, + {Seq, byte_size(Seq)} + end, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> + error(no_available_registers). %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally @@ -2369,7 +2501,6 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - branches = Branches, available_regs = [Temp, TempJitState | _] } = State0, Label @@ -2387,16 +2518,53 @@ call_only_or_schedule_next( % If not zero, we want to continue execution at Label % If zero, we want to fall through to scheduling code - % Skip over the unconditional branch (2 bytes) - I4 = jit_armv6m_asm:bcc(eq, 4), - % Unconditional branch to label (will be patched later) - I5 = jit_armv6m_asm:b(0), - LongBranchOffset = StreamModule:offset(Stream1) + byte_size(I4), - LongBranchReloc = {Label, LongBranchOffset, b}, - Stream2 = StreamModule:append(Stream1, <>), - State1 = State0#state{stream = Stream2, branches = [LongBranchReloc | Branches]}, - State2 = set_continuation_to_label(State1, Label), - call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + % Look up label once to avoid duplicate lookup in helper + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + % Label is known, check if we can optimize the conditional branch + BccOffset = StreamModule:offset(Stream1), + % After bcc instruction + BranchOffset = BccOffset + 2, + Rel = LabelOffset - BranchOffset, + + if + Rel >= -252 andalso Rel =< 258 andalso (Rel rem 2) =:= 0 -> + % Near branch: use direct conditional branch + + % Branch if NOT zero (ne) + I4 = jit_armv6m_asm:bcc(ne, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + % Far branch: use trampoline with helper + % Get the code block size for the far branch sequence that will follow + FarSeqOffset = BccOffset + 2, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + % Skip over the far branch sequence if zero (eq) + I4 = jit_armv6m_asm:bcc(eq, FarSeqSize + 2), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + % Label not known, get the far branch size for the skip + BccOffset = StreamModule:offset(Stream1), + FarSeqOffset = BccOffset + 2, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_armv6m_asm:bcc(eq, FarSeqSize + 2), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). call_primitive_with_cp(State0, Primitive, Args) -> {State1, RewriteOffset, TempReg} = set_cp(State0), @@ -2560,16 +2728,24 @@ args_regs(Args) -> ). %%----------------------------------------------------------------------------- -%% @doc Add a label at the current offset +%% @doc Add a label at the current offset. Eventually align it with a nop. %% @end %% @param State current backend state %% @param Label the label number or reference %% @return Updated backend state %%----------------------------------------------------------------------------- -spec add_label(state(), integer() | reference()) -> state(). -add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -> - Offset = StreamModule:offset(Stream), - add_label(State, Label, Offset). +add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + {State1, Offset1} = + if + Offset0 rem 4 =:= 0 -> + {State0, Offset0}; + true -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:nop()), + {State0#state{stream = Stream1}, Offset0 + 2} + end, + add_label(State1, Label, Offset1). %%----------------------------------------------------------------------------- %% @doc Add a label at a specific offset diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 487c40d2e6..8608823c4d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1090,47 +1090,53 @@ call_only_or_schedule_next_and_label_relocation_test() -> Stream = ?BACKEND:stream(State8), Dump = << - " 0: 4b03 ldr r3, [pc, #12]\n" + " 0: 4b03 ldr r3, [pc, #12] ; (0x10)\n" " 2: e003 b.n 0xc\n" - " 4: 4b04 ldr r3, [pc, #16]\n" + " 4: 4b04 ldr r3, [pc, #16] ; (0x18)\n" " 6: e001 b.n 0xc\n" - " 8: 4b05 ldr r3, [pc, #20]\n" + " 8: 4b05 ldr r3, [pc, #20] ; (0x20)\n" " a: e7ff b.n 0xc\n" " c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " e: 449f add pc, r3\n" - " 10: 0036 movs r6, r6\n" + " 10: 0042 lsls r2, r0, #1\n" " 12: 0000 movs r0, r0\n" " 14: 000a movs r2, r1\n" " 16: 0000 movs r0, r0\n" - " 18: 002c movs r4, r5\n" + " 18: 0036 movs r6, r6\n" " 1a: 0000 movs r0, r0\n" " 1c: 9e00 ldr r6, [sp, #0]\n" " 1e: 68b7 ldr r7, [r6, #8]\n" " 20: 3f01 subs r7, #1\n" " 22: 60b7 str r7, [r6, #8]\n" - " 24: d000 beq.n 0x28\n" - " 26: e00a b.n 0x3e\n" - " 28: 2630 movs r6, #48\n" - " 2a: 4276 negs r6, r6\n" - " 2c: a702 add r7, pc, #8\n" - " 2e: 19f6 adds r6, r6, r7\n" - " 30: 9f00 ldr r7, [sp, #0]\n" - " 32: 607e str r6, [r7, #4]\n" - " 34: 6897 ldr r7, [r2, #8]\n" - " 36: 9e05 ldr r6, [sp, #20]\n" - " 38: 9705 str r7, [sp, #20]\n" - " 3a: 46b6 mov lr, r6\n" - " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 3e: 6817 ldr r7, [r2, #0]\n" - " 40: 9e05 ldr r6, [sp, #20]\n" - " 42: 9705 str r7, [sp, #20]\n" - " 44: 46b6 mov lr, r6\n" - " 46: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 48: 6857 ldr r7, [r2, #4]\n" + " 24: d004 beq.n 0x30\n" + " 26: e00f b.n 0x48\n" + " 28: 46c0 nop ; (mov r8, r8)\n" + " 2a: 46c0 nop ; (mov r8, r8)\n" + " 2c: 46c0 nop ; (mov r8, r8)\n" + " 2e: 46c0 nop ; (mov r8, r8)\n" + " 30: 2638 movs r6, #56 ; 0x38\n" + " 32: 4276 negs r6, r6\n" + " 34: a702 add r7, pc, #8 ; (adr r7, 0x40)\n" + " 36: 19f6 adds r6, r6, r7\n" + " 38: 9f00 ldr r7, [sp, #0]\n" + " 3a: 607e str r6, [r7, #4]\n" + " 3c: 6897 ldr r7, [r2, #8]\n" + " 3e: 9e05 ldr r6, [sp, #20]\n" + " 40: 9705 str r7, [sp, #20]\n" + " 42: 46b6 mov lr, r6\n" + " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 46: 46c0 nop ; (mov r8, r8)\n" + " 48: 6817 ldr r7, [r2, #0]\n" " 4a: 9e05 ldr r6, [sp, #20]\n" " 4c: 9705 str r7, [sp, #20]\n" " 4e: 46b6 mov lr, r6\n" - " 50: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 50: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 52: 46c0 nop ; (mov r8, r8)\n" + " 54: 6857 ldr r7, [r2, #4]\n" + " 56: 9e05 ldr r6, [sp, #20]\n" + " 58: 9705 str r7, [sp, #20]\n" + " 5a: 46b6 mov lr, r6\n" + " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1152,47 +1158,54 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> Dump = << " 0: 6019 str r1, [r3, #0]\n" - " 2: 4b03 ldr r3, [pc, #12]\n" + " 2: 4b03 ldr r3, [pc, #12] ; (0x10)\n" " 4: e003 b.n 0xe\n" - " 6: 4b04 ldr r3, [pc, #16]\n" + " 6: 4b04 ldr r3, [pc, #16] ; (0x18)\n" " 8: e001 b.n 0xe\n" - " a: 4b05 ldr r3, [pc, #20]\n" + " a: 4b05 ldr r3, [pc, #20] ; (0x20)\n" " c: e7ff b.n 0xe\n" " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 10: 449f add pc, r3\n" - " 12: 0036 movs r6, r6\n" + " 12: 0044 lsls r4, r0, #1\n" " 14: 0000 movs r0, r0\n" - " 16: 000a movs r2, r1\n" + " 16: 000c movs r4, r1\n" " 18: 0000 movs r0, r0\n" - " 1a: 002c movs r4, r5\n" + " 1a: 0038 movs r0, r7\n" " 1c: 0000 movs r0, r0\n" - " 1e: 9e00 ldr r6, [sp, #0]\n" - " 20: 68b7 ldr r7, [r6, #8]\n" - " 22: 3f01 subs r7, #1\n" - " 24: 60b7 str r7, [r6, #8]\n" - " 26: d000 beq.n 0x2a\n" - " 28: e00a b.n 0x40\n" - " 2a: 2634 movs r6, #52\n" - " 2c: 4276 negs r6, r6\n" - " 2e: a703 add r7, pc, #12\n" - " 30: 19f6 adds r6, r6, r7\n" - " 32: 9f00 ldr r7, [sp, #0]\n" - " 34: 607e str r6, [r7, #4]\n" - " 36: 6897 ldr r7, [r2, #8]\n" - " 38: 9e05 ldr r6, [sp, #20]\n" - " 3a: 9705 str r7, [sp, #20]\n" - " 3c: 46b6 mov lr, r6\n" - " 3e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 40: 6817 ldr r7, [r2, #0]\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" + " 20: 9e00 ldr r6, [sp, #0]\n" + " 22: 68b7 ldr r7, [r6, #8]\n" + " 24: 3f01 subs r7, #1\n" + " 26: 60b7 str r7, [r6, #8]\n" + " 28: d004 beq.n 0x34\n" + " 2a: e00f b.n 0x4c\n" + " 2c: 46c0 nop ; (mov r8, r8)\n" + " 2e: 46c0 nop ; (mov r8, r8)\n" + " 30: 46c0 nop ; (mov r8, r8)\n" + " 32: 46c0 nop ; (mov r8, r8)\n" + " 34: 263c movs r6, #60 ; 0x3c\n" + " 36: 4276 negs r6, r6\n" + " 38: a702 add r7, pc, #8 ; (adr r7, 0x44)\n" + " 3a: 19f6 adds r6, r6, r7\n" + " 3c: 9f00 ldr r7, [sp, #0]\n" + " 3e: 607e str r6, [r7, #4]\n" + " 40: 6897 ldr r7, [r2, #8]\n" " 42: 9e05 ldr r6, [sp, #20]\n" " 44: 9705 str r7, [sp, #20]\n" " 46: 46b6 mov lr, r6\n" " 48: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 4a: 6857 ldr r7, [r2, #4]\n" - " 4c: 9e05 ldr r6, [sp, #20]\n" - " 4e: 9705 str r7, [sp, #20]\n" - " 50: 46b6 mov lr, r6\n" - " 52: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 4a: 46c0 nop ; (mov r8, r8)\n" + " 4c: 6817 ldr r7, [r2, #0]\n" + " 4e: 9e05 ldr r6, [sp, #20]\n" + " 50: 9705 str r7, [sp, #20]\n" + " 52: 46b6 mov lr, r6\n" + " 54: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 56: 46c0 nop ; (mov r8, r8)\n" + " 58: 6857 ldr r7, [r2, #4]\n" + " 5a: 9e05 ldr r6, [sp, #20]\n" + " 5c: 9705 str r7, [sp, #20]\n" + " 5e: 46b6 mov lr, r6\n" + " 60: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1221,34 +1234,40 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> Stream = ?BACKEND:stream(State8), % Extract the final section starting at 0x11c to verify the literal pool pattern Dump = << - " 11c: 9e00 ldr r6, [sp, #0]\n" + "11c: 9e00 ldr r6, [sp, #0]\n" " 11e: 68b7 ldr r7, [r6, #8]\n" " 120: 3f01 subs r7, #1\n" " 122: 60b7 str r7, [r6, #8]\n" - " 124: d000 beq.n 0x128\n" - " 126: e00c b.n 0x142\n" - " 128: 4e00 ldr r6, [pc, #0] ; (0x12c)\n" - " 12a: e001 b.n 0x130\n" - " 12c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" - " 130: a701 add r7, pc, #4 ; (adr r7, 0x138)\n" - " 132: 19f6 adds r6, r6, r7\n" - " 134: 9f00 ldr r7, [sp, #0]\n" - " 136: 607e str r6, [r7, #4]\n" - " 138: 6897 ldr r7, [r2, #8]\n" - " 13a: 9e05 ldr r6, [sp, #20]\n" - " 13c: 9705 str r7, [sp, #20]\n" - " 13e: 46b6 mov lr, r6\n" - " 140: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 142: 6817 ldr r7, [r2, #0]\n" - " 144: 9e05 ldr r6, [sp, #20]\n" - " 146: 9705 str r7, [sp, #20]\n" - " 148: 46b6 mov lr, r6\n" - " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 14c: 6857 ldr r7, [r2, #4]\n" + " 124: d004 beq.n 0x130\n" + " 126: e011 b.n 0x14c\n" + " 128: 46c0 nop ; (mov r8, r8)\n" + " 12a: 46c0 nop ; (mov r8, r8)\n" + " 12c: 46c0 nop ; (mov r8, r8)\n" + " 12e: 46c0 nop ; (mov r8, r8)\n" + " 130: 4e00 ldr r6, [pc, #0] ; (0x134)\n" + " 132: e001 b.n 0x138\n" + " 134: fec8 ffff mcr2 15, 6, pc, cr8, cr15, {7} ; \n" + " 138: a701 add r7, pc, #4 ; (adr r7, 0x140)\n" + " 13a: 19f6 adds r6, r6, r7\n" + " 13c: 9f00 ldr r7, [sp, #0]\n" + " 13e: 607e str r6, [r7, #4]\n" + " 140: 6897 ldr r7, [r2, #8]\n" + " 142: 9e05 ldr r6, [sp, #20]\n" + " 144: 9705 str r7, [sp, #20]\n" + " 146: 46b6 mov lr, r6\n" + " 148: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14a: 46c0 nop ; (mov r8, r8)\n" + " 14c: 6817 ldr r7, [r2, #0]\n" " 14e: 9e05 ldr r6, [sp, #20]\n" " 150: 9705 str r7, [sp, #20]\n" " 152: 46b6 mov lr, r6\n" - " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 156: 46c0 nop ; (mov r8, r8)\n" + " 158: 6857 ldr r7, [r2, #4]\n" + " 15a: 9e05 ldr r6, [sp, #20]\n" + " 15c: 9705 str r7, [sp, #20]\n" + " 15e: 46b6 mov lr, r6\n" + " 160: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#11c), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1278,35 +1297,41 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> Stream = ?BACKEND:stream(State8), % Extract the final section starting at 0x11a to verify the literal pool pattern with different alignment Dump = << - " 11a: 9e00 ldr r6, [sp, #0]\n" - " 11c: 68b7 ldr r7, [r6, #8]\n" - " 11e: 3f01 subs r7, #1\n" - " 120: 60b7 str r7, [r6, #8]\n" - " 122: d000 beq.n 0x126\n" - " 124: e00d b.n 0x142\n" - " 126: 4e01 ldr r6, [pc, #4] ; (0x12c)\n" - " 128: e002 b.n 0x130\n" - " 12a: 0000 movs r0, r0\n" - " 12c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" - " 130: a701 add r7, pc, #4 ; (adr r7, 0x138)\n" - " 132: 19f6 adds r6, r6, r7\n" - " 134: 9f00 ldr r7, [sp, #0]\n" - " 136: 607e str r6, [r7, #4]\n" - " 138: 6897 ldr r7, [r2, #8]\n" - " 13a: 9e05 ldr r6, [sp, #20]\n" - " 13c: 9705 str r7, [sp, #20]\n" - " 13e: 46b6 mov lr, r6\n" - " 140: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 142: 6817 ldr r7, [r2, #0]\n" - " 144: 9e05 ldr r6, [sp, #20]\n" - " 146: 9705 str r7, [sp, #20]\n" - " 148: 46b6 mov lr, r6\n" - " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 14c: 6857 ldr r7, [r2, #4]\n" + " 11a: 46c0 nop ; (mov r8, r8)\n" + " 11c: 9e00 ldr r6, [sp, #0]\n" + " 11e: 68b7 ldr r7, [r6, #8]\n" + " 120: 3f01 subs r7, #1\n" + " 122: 60b7 str r7, [r6, #8]\n" + " 124: d004 beq.n 0x130\n" + " 126: e011 b.n 0x14c\n" + " 128: 46c0 nop ; (mov r8, r8)\n" + " 12a: 46c0 nop ; (mov r8, r8)\n" + " 12c: 46c0 nop ; (mov r8, r8)\n" + " 12e: 46c0 nop ; (mov r8, r8)\n" + " 130: 4e00 ldr r6, [pc, #0] ; (0x134)\n" + " 132: e001 b.n 0x138\n" + " 134: fec8 ffff mcr2 15, 6, pc, cr8, cr15, {7} ; \n" + " 138: a701 add r7, pc, #4 ; (adr r7, 0x140)\n" + " 13a: 19f6 adds r6, r6, r7\n" + " 13c: 9f00 ldr r7, [sp, #0]\n" + " 13e: 607e str r6, [r7, #4]\n" + " 140: 6897 ldr r7, [r2, #8]\n" + " 142: 9e05 ldr r6, [sp, #20]\n" + " 144: 9705 str r7, [sp, #20]\n" + " 146: 46b6 mov lr, r6\n" + " 148: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14a: 46c0 nop ; (mov r8, r8)\n" + " 14c: 6817 ldr r7, [r2, #0]\n" " 14e: 9e05 ldr r6, [sp, #20]\n" " 150: 9705 str r7, [sp, #20]\n" " 152: 46b6 mov lr, r6\n" - " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 156: 46c0 nop ; (mov r8, r8)\n" + " 158: 6857 ldr r7, [r2, #4]\n" + " 15a: 9e05 ldr r6, [sp, #20]\n" + " 15c: 9705 str r7, [sp, #20]\n" + " 15e: 46b6 mov lr, r6\n" + " 160: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#11a), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1411,29 +1436,36 @@ is_integer_test() -> ), State3 = ?BACKEND:free_native_registers(State2, [Reg]), ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State4 = ?BACKEND:add_label(State3, Label, 16#100), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 43fe mvns r6, r7\n" " 4: 0736 lsls r6, r6, #28\n" - " 6: d00d beq.n 0x24\n" + " 6: d015 beq.n 0x34\n" " 8: 463e mov r6, r7\n" " a: 2503 movs r5, #3\n" " c: 402e ands r6, r5\n" " e: 2e02 cmp r6, #2\n" - " 10: d000 beq.n 0x14\n" - " 12: e087 b.n 0x124\n" - " 14: 2603 movs r6, #3\n" - " 16: 43b7 bics r7, r6\n" - " 18: 683f ldr r7, [r7, #0]\n" - " 1a: 263f movs r6, #63 ; 0x3f\n" - " 1c: 4037 ands r7, r6\n" - " 1e: 2f08 cmp r7, #8\n" - " 20: d000 beq.n 0x24\n" - " 22: e07f b.n 0x124" + " 10: d004 beq.n 0x1c\n" + " 12: e075 b.n 0x100\n" + " 14: 46c0 nop ; (mov r8, r8)\n" + " 16: 46c0 nop ; (mov r8, r8)\n" + " 18: 46c0 nop ; (mov r8, r8)\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: 2603 movs r6, #3\n" + " 1e: 43b7 bics r7, r6\n" + " 20: 683f ldr r7, [r7, #0]\n" + " 22: 263f movs r6, #63 ; 0x3f\n" + " 24: 4037 ands r7, r6\n" + " 26: 2f08 cmp r7, #8\n" + " 28: d004 beq.n 0x34\n" + " 2a: e069 b.n 0x100\n" + " 2c: 46c0 nop ; (mov r8, r8)\n" + " 2e: 46c0 nop ; (mov r8, r8)\n" + " 30: 46c0 nop ; (mov r8, r8)\n" + " 32: 46c0 nop ; (mov r8, r8)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1468,34 +1500,42 @@ is_number_test() -> ), State3 = ?BACKEND:free_native_registers(State2, [Reg]), ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State4 = ?BACKEND:add_label(State3, Label, 16#100), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 43fe mvns r6, r7\n" " 4: 0736 lsls r6, r6, #28\n" - " 6: d012 beq.n 0x2e\n" + " 6: d01b beq.n 0x40\n" " 8: 463e mov r6, r7\n" " a: 2503 movs r5, #3\n" " c: 402e ands r6, r5\n" " e: 2e02 cmp r6, #2\n" - " 10: d000 beq.n 0x14\n" - " 12: e08c b.n 0x12e\n" - " 14: 2603 movs r6, #3\n" - " 16: 43b7 bics r7, r6\n" - " 18: 683f ldr r7, [r7, #0]\n" - " 1a: 463e mov r6, r7\n" - " 1c: 253f movs r5, #63 ; 0x3f\n" - " 1e: 402e ands r6, r5\n" - " 20: 2e08 cmp r6, #8\n" - " 22: d004 beq.n 0x2e\n" - " 24: 263f movs r6, #63 ; 0x3f\n" - " 26: 4037 ands r7, r6\n" - " 28: 2f18 cmp r7, #24\n" - " 2a: d000 beq.n 0x2e\n" - " 2c: e07f b.n 0x12e" + " 10: d004 beq.n 0x1c\n" + " 12: e075 b.n 0x100\n" + " 14: 46c0 nop ; (mov r8, r8)\n" + " 16: 46c0 nop ; (mov r8, r8)\n" + " 18: 46c0 nop ; (mov r8, r8)\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: 2603 movs r6, #3\n" + " 1e: 43b7 bics r7, r6\n" + " 20: 683f ldr r7, [r7, #0]\n" + " 22: 463e mov r6, r7\n" + " 24: 253f movs r5, #63 ; 0x3f\n" + " 26: 402e ands r6, r5\n" + " 28: 2e08 cmp r6, #8\n" + " 2a: d009 beq.n 0x40\n" + " 2c: 263f movs r6, #63 ; 0x3f\n" + " 2e: 4037 ands r7, r6\n" + " 30: 2f18 cmp r7, #24\n" + " 32: d005 beq.n 0x40\n" + " 34: e064 b.n 0x100\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: 46c0 nop ; (mov r8, r8)\n" + " 3a: 46c0 nop ; (mov r8, r8)\n" + " 3c: 46c0 nop ; (mov r8, r8)\n" + " 3e: 46c0 nop ; (mov r8, r8)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1510,17 +1550,148 @@ is_boolean_test() -> end), State3 = ?BACKEND:free_native_registers(State2, [Reg]), ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2f4b cmp r7, #75 ; 0x4b\n" + " 4: d006 beq.n 0x14\n" + " 6: 2f0b cmp r7, #11\n" + " 8: d004 beq.n 0x14\n" + " a: e079 b.n 0x100\n" + " c: 46c0 nop ; (mov r8, r8)\n" + " e: 46c0 nop ; (mov r8, r8)\n" + " 10: 46c0 nop ; (mov r8, r8)\n" + " 12: 46c0 nop ; (mov r8, r8)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 2f4b cmp r7, #75 ; 0x4b\n" - " 4: d002 beq.n 0xc\n" + " 4: d006 beq.n 0x14\n" " 6: 2f0b cmp r7, #11\n" - " 8: d000 beq.n 0xc\n" - " a: e07f b.n 0x10c" + " 8: d004 beq.n 0x14\n" + " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" + " c: 447e add r6, pc\n" + " e: 4730 bx r6\n" + " 10: 0ff0 lsrs r0, r6, #31\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_unaligned_test() -> + % Create a new state with a 2-byte instruction already in the stream + % to simulate starting at an odd offset (offset 2 instead of 0) + PaddingInstruction = jit_armv6m_asm:bx(lr), + TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 4770 bx lr\n" + " 2: 6987 ldr r7, [r0, #24]\n" + " 4: 2f4b cmp r7, #75 ; 0x4b\n" + " 6: d007 beq.n 0x18\n" + " 8: 2f0b cmp r7, #11\n" + " a: d005 beq.n 0x18\n" + " c: 4e01 ldr r6, [pc, #4] ; (0x14)\n" + " e: 447e add r6, pc\n" + " 10: 4730 bx r6\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 0fee lsrs r6, r5, #31\n" + " 16: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2f4b cmp r7, #75 ; 0x4b\n" + " 4: d006 beq.n 0x14\n" + " 6: 2f0b cmp r7, #11\n" + " 8: d004 beq.n 0x14\n" + " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" + " c: 447e add r6, pc\n" + " e: 4730 bx r6\n" + " 10: 0ff0 lsrs r0, r6, #31\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_unaligned_test() -> + % Create a new state with a 2-byte instruction already in the stream + % to simulate starting at an odd offset (offset 2 instead of 0) + PaddingInstruction = jit_armv6m_asm:bx(lr), + TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 4770 bx lr\n" + " 2: 6987 ldr r7, [r0, #24]\n" + " 4: 2f4b cmp r7, #75 ; 0x4b\n" + " 6: d007 beq.n 0x18\n" + " 8: 2f0b cmp r7, #11\n" + " a: d005 beq.n 0x18\n" + " c: 4e01 ldr r6, [pc, #4] ; (0x14)\n" + " e: 447e add r6, pc\n" + " 10: 4730 bx r6\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 0fee lsrs r6, r5, #31\n" + " 16: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1566,7 +1737,7 @@ wait_timeout_test() -> " 18: 9605 str r6, [sp, #20]\n" " 1a: 46be mov lr, r7\n" " 1c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 1e: 0000 movs r0, r0\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" " 20: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 22: 6d57 ldr r7, [r2, #84] ; 0x54\n" " 24: b405 push {r0, r2}\n" From d34b8a05027616fec0d019372327f85641947d4c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 06:51:42 +0200 Subject: [PATCH 32/97] armv6m: remove unused move_to_native_register/3 with fpu commented test move_to_native_register/3 is never called with fp_reg Signed-off-by: Paul Guyot --- tests/libs/jit/jit_armv6m_tests.erl | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 8608823c4d..9a96954b1d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2630,16 +2630,6 @@ move_to_native_register_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream) end) - %% move_to_native_register/3: {fp_reg, N} - DISABLED for ARMv6-M (no FPU) - %% ?_test(begin - %% State1 = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}, v0), - %% Stream = ?BACKEND:stream(State1), - %% Dump = << - %% " 0: f9406007 ldr x7, [x0, #192]\n" - %% " 4: fd400ce0 ldr d0, [x7, #24]" - %% >>, - %% ?assertEqual(dump_to_bin(Dump), Stream) - %% end) ] end}. From e820c6a7d8b1c394eefb62fdbf4eb39714abc93d Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 09:06:19 +0200 Subject: [PATCH 33/97] armv6m: implement term_to_float Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 19 ++++++++++++------- tests/libs/jit/jit_armv6m_tests.erl | 28 +++++++++++++++++----------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 20df565f71..b6de0fe081 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1736,17 +1736,22 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; +% term_to_float move_to_vm_register( - #state{stream_module = StreamModule, available_regs = [Temp | _], stream = Stream0} = State, - {fp, RegA, RegB}, + #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _], stream = Stream0} = + State0, + {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> - I1 = jit_armv6m_asm:ldr(Temp, ?FP_REGS), - I2 = jit_armv6m_asm:str(RegA, {Temp, F * 8}), - I2 = jit_armv6m_asm:str(RegB, {Temp, F * 8 + 4}), - Code = <>, + I1 = jit_armv6m_asm:ldr(Temp1, ?FP_REGS), + I2 = jit_armv6m_asm:ldr(Temp2, {Reg, 8}), + I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8}), + I4 = jit_armv6m_asm:ldr(Temp2, {Reg, 12}), + I5 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8 + 4}), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}. + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. %%----------------------------------------------------------------------------- %% @doc Emit a move of an array element (reg[x]) to a vm or a native register. diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 9a96954b1d..b246ccc2c0 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2573,17 +2573,6 @@ move_to_native_register_test_() -> >>, ?assertEqual(dump_to_bin(Dump), Stream) end), - %% move_to_native_register/2: {fp_reg, N} - DISABLED for ARMv6-M (no FPU) - %% ?_test(begin - %% {State1, Reg} = ?BACKEND:move_to_native_register(State0, {fp_reg, 3}), - %% Stream = ?BACKEND:stream(State1), - %% ?assertEqual(v0, Reg), - %% Dump = << - %% " 0: f9406007 ldr x7, [x0, #192]\n" - %% " 4: fd400ce0 ldr d0, [x7, #24]" - %% >>, - %% ?assertEqual(dump_to_bin(Dump), Stream) - %% end), %% move_to_native_register/3: imm to reg ?_test(begin State1 = ?BACKEND:move_to_native_register(State0, 42, r6), @@ -2629,6 +2618,23 @@ move_to_native_register_test_() -> " 2: 6889 ldr r1, [r1, #8]" >>, ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 6e06 ldr r6, [r0, #96] ; 0x60\n" + " 4: 68bd ldr r5, [r7, #8]\n" + " 6: 61b5 str r5, [r6, #24]\n" + " 8: 68fd ldr r5, [r7, #12]\n" + " a: 61f5 str r5, [r6, #28]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) end) ] end}. From 74c1052d6cb4cc49143174396a5ff1442e84f042 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 09:32:26 +0200 Subject: [PATCH 34/97] armv6m: fix and test add & sub Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 42 ++++++++++-------- tests/libs/jit/jit_armv6m_tests.erl | 68 +++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 18 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b6de0fe081..816fd40440 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1093,7 +1093,7 @@ if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', Val} -) when is_integer(Val) -> +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -2292,20 +2292,18 @@ or_( Stream2 = StreamModule:append(Stream1, I), State1#state{available_regs = [Temp | AT], stream = Stream2}. -add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) -> - try jit_armv6m_asm:adds(Reg, Val) of - I -> - Stream1 = StreamModule:append(Stream0, I), - State0#state{stream = Stream1} - catch - error:{unencodable_immediate, Val} -> - [Temp | AT] = State0#state.available_regs, - State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), - Stream1 = State1#state.stream, - I = jit_armv6m_asm:adds(Reg, Temp), - Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2} - end. +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + (Val >= 0 andalso Val =< 255) orelse is_atom(Val) +-> + I = jit_armv6m_asm:adds(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:adds(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= 0 andalso Val =< 255 @@ -2356,10 +2354,18 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}. -sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - I1 = jit_armv6m_asm:sub(Reg, Reg, Val), +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + (Val >= 0 andalso Val =< 255) orelse is_atom(Val) +-> + I1 = jit_armv6m_asm:subs(Reg, Reg, Val), Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}. + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:subs(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. mul(State, _Reg, 1) -> State; diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index b246ccc2c0..47a364c4ca 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2639,6 +2639,74 @@ move_to_native_register_test_() -> ] end}. +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, r2, 2, << + " 0: 3202 adds r2, #2" + >>) + end), + ?_test(begin + add_test0(State0, r2, 256, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: 0100 lsls r0, r0, #4\n" + " 6: 0000 movs r0, r0\n" + " 8: 19d2 adds r2, r2, r7" + >>) + end), + ?_test(begin + add_test0(State0, r2, r3, << + " 0: 18d2 adds r2, r2, r3" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, r2, 2, << + " 0: 3a02 subs r2, #2" + >>) + end), + ?_test(begin + sub_test0(State0, r2, 256, << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e001 b.n 0x8\n" + " 4: 0100 lsls r0, r0, #4\n" + " 6: 0000 movs r0, r0\n" + " 8: 1bd2 subs r2, r2, r7" + >>) + end), + ?_test(begin + sub_test0(State0, r2, r3, << + " 0: 1ad2 subs r2, r2, r3" + >>) + end) + ] + end}. + mul_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:mul(State0, Reg, Imm), Stream = ?BACKEND:stream(State1), From 37ec447ff9a4bb74dbe4c93349af9a1bf1063844 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 09:32:40 +0200 Subject: [PATCH 35/97] armv6m: fix == with large immediate Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 23 +++++++++++++++++++++++ tests/libs/jit/jit_armv6m_tests.erl | 21 +++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 816fd40440..e6f8a740ea 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1109,6 +1109,29 @@ if_block_cond( State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Offset0 = StreamModule:offset(Stream0), + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + Offset1 = StreamModule:offset(Stream1), + I1 = jit_armv6m_asm:cmp(Reg, Temp), + I2 = jit_armv6m_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream2 = StreamModule:append(Stream1, Code), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, ne, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 47a364c4ca..92601b2e1b 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -501,6 +501,27 @@ if_block_test_() -> ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', -1}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2501 movs r5, #1\n" + " 6: 426d negs r5, r5\n" + " 8: 42af cmp r7, r5\n" + " a: d100 bne.n 0xe\n" + " c: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), ?_test(begin State1 = ?BACKEND:if_block( State0, From 97bc9b8fb2cdb1fbd880f3ab3901b84db85479c1 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 10:26:06 +0200 Subject: [PATCH 36/97] armv6m: fix stack alignment with calls Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 29 ++++++++++----- tests/libs/jit/jit_armv6m_tests.erl | 57 ++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index e6f8a740ea..9f0927c7b9 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1387,8 +1387,23 @@ call_func_ptr( [{free, FuncPtrReg} | Args] ), UsedRegs1 = UsedRegs0 -- FreeRegs, - SavedRegs = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], - {_SavedRegsOdd, Stream1} = push_registers(SavedRegs, StreamModule, Stream0), + SavedRegsBase = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + + % Calculate available registers for potential padding + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + % Add padding register if odd number to maintain 8-byte stack alignment per ARM AAPCS + {SavedRegs, AvailableRegsAfterPadding} = + case (length(SavedRegsBase) rem 2) =:= 1 of + true when AvailableRegs1 /= [] -> + [PaddingReg | RestAvailable] = AvailableRegs1, + {SavedRegsBase ++ [PaddingReg], RestAvailable}; + _ -> + {SavedRegsBase, AvailableRegs1} + end, + + Stream1 = push_registers(SavedRegs, StreamModule, Stream0), % Set up arguments following AArch64 calling convention State1 = set_args(State0#state{stream = Stream1}, Args), @@ -1399,12 +1414,10 @@ call_func_ptr( Stream4 = StreamModule:append(Stream2, Call), % If r0 is in used regs, save it to another temporary register - FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), - AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, {Stream5, ResultReg} = case lists:member(r0, SavedRegs) of true -> - [Temp | _] = AvailableRegs1, + [Temp | _] = AvailableRegsAfterPadding, {StreamModule:append(Stream4, jit_armv6m_asm:mov(Temp, r0)), Temp}; false -> {Stream4, r0} @@ -1425,11 +1438,9 @@ call_func_ptr( }. push_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> - IsOdd = (length(SavedRegs) rem 2) =:= 1, - Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:push(SavedRegs)), - {IsOdd, Stream1}; + StreamModule:append(Stream0, jit_armv6m_asm:push(SavedRegs)); push_registers([], _StreamModule, Stream0) -> - {false, Stream0}. + Stream0. pop_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:pop(SavedRegs)), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 92601b2e1b..e6bdf5ce3d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -159,25 +159,25 @@ call_primitive_extended_regs_test() -> " 8: 4607 mov r7, r0\n" " a: bc05 pop {r0, r2}\n" " c: 6c96 ldr r6, [r2, #72] ; 0x48\n" - " e: b485 push {r0, r2, r7}\n" + " e: b4c5 push {r0, r2, r6, r7}\n" " 10: 2114 movs r1, #20\n" " 12: 47b0 blx r6\n" - " 14: 4606 mov r6, r0\n" - " 16: bc85 pop {r0, r2, r7}\n" - " 18: 6c95 ldr r5, [r2, #72] ; 0x48\n" - " 1a: b4c5 push {r0, r2, r6, r7}\n" + " 14: 4605 mov r5, r0\n" + " 16: bcc5 pop {r0, r2, r6, r7}\n" + " 18: 6c96 ldr r6, [r2, #72] ; 0x48\n" + " 1a: b4a5 push {r0, r2, r5, r7}\n" " 1c: 2113 movs r1, #19\n" - " 1e: 47a8 blx r5\n" - " 20: 4605 mov r5, r0\n" - " 22: bcc5 pop {r0, r2, r6, r7}\n" + " 1e: 47b0 blx r6\n" + " 20: 4606 mov r6, r0\n" + " 22: bca5 pop {r0, r2, r5, r7}\n" " 24: 6b54 ldr r4, [r2, #52] ; 0x34\n" - " 26: b425 push {r0, r2, r5}\n" + " 26: b455 push {r0, r2, r4, r6}\n" " 28: 6839 ldr r1, [r7, #0]\n" - " 2a: 6832 ldr r2, [r6, #0]\n" + " 2a: 682a ldr r2, [r5, #0]\n" " 2c: 47a0 blx r4\n" - " 2e: 4604 mov r4, r0\n" - " 30: bc25 pop {r0, r2, r5}\n" - " 32: 602c str r4, [r5, #0]" + " 2e: 4607 mov r7, r0\n" + " 30: bc55 pop {r0, r2, r4, r6}\n" + " 32: 6037 str r7, [r6, #0]" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1381,17 +1381,17 @@ call_bif_with_large_literal_integer_test() -> " a: 4607 mov r7, r0\n" " c: bc05 pop {r0, r2}\n" " e: 6bd6 ldr r6, [r2, #60] ; 0x3c\n" - " 10: b485 push {r0, r2, r7}\n" + " 10: b4c5 push {r0, r2, r6, r7}\n" " 12: 4901 ldr r1, [pc, #4] ; (0x18)\n" " 14: e002 b.n 0x1c\n" " 16: 0000 movs r0, r0\n" " 18: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" " 1c: 47b0 blx r6\n" - " 1e: 4606 mov r6, r0\n" - " 20: bc85 pop {r0, r2, r7}\n" + " 1e: 4605 mov r5, r0\n" + " 20: bcc5 pop {r0, r2, r6, r7}\n" " 22: b405 push {r0, r2}\n" " 24: b082 sub sp, #8\n" - " 26: 9600 str r6, [sp, #0]\n" + " 26: 9500 str r5, [sp, #0]\n" " 28: 2100 movs r1, #0\n" " 2a: 2201 movs r2, #1\n" " 2c: 6983 ldr r3, [r0, #24]\n" @@ -2845,6 +2845,29 @@ dump_to_bin(Dump) -> ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) ). +%% Test for stack alignment issue in call_func_ptr +%% When we have an odd number of saved registers, the stack becomes misaligned +%% before the function call, violating ARM AAPCS which requires 8-byte alignment +call_func_ptr_stack_alignment_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:call_func_ptr(State3, {free, r3}, [42]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: b4ed push {r0, r2, r3, r5, r6, r7}\n" + " 8: 202a movs r0, #42 ; 0x2a\n" + " a: 4798 blx r3\n" + " c: 4604 mov r4, r0\n" + " e: bced pop {r0, r2, r3, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> dump_to_bin0(Tail, hex, Acc); dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> From 3db13ee2291dc1f678029b0ea68808301acbf89c Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 30 Aug 2025 20:29:30 +0200 Subject: [PATCH 37/97] armv6m: fix issues with call_func_ptr and handle reg exhaustion Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 129 +++++++++++++++--------- libs/jit/src/jit_armv6m_asm.erl | 3 +- tests/libs/jit/jit_armv6m_tests.erl | 146 ++++++++++++++++++++++++++-- 3 files changed, 225 insertions(+), 53 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 9f0927c7b9..b6918b9e49 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -621,7 +621,8 @@ call_primitive_last( State0#state{ stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs }, - lists:sublist(ArgsForTailCall, 4) + lists:sublist(ArgsForTailCall, 4), + 0 ), % 5th argument needs to be moved to r5 FifthArg = lists:nth(5, ArgsForTailCall), @@ -661,7 +662,8 @@ call_primitive_last( State0#state{ stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs }, - ArgsForTailCall + ArgsForTailCall, + 0 ), State3 = tail_call_with_jit_state_registers_only(State1, Temp) end, @@ -1394,33 +1396,59 @@ call_func_ptr( AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, % Add padding register if odd number to maintain 8-byte stack alignment per ARM AAPCS - {SavedRegs, AvailableRegsAfterPadding} = + SavedRegs = case (length(SavedRegsBase) rem 2) =:= 1 of true when AvailableRegs1 /= [] -> - [PaddingReg | RestAvailable] = AvailableRegs1, - {SavedRegsBase ++ [PaddingReg], RestAvailable}; + [PaddingReg | _] = AvailableRegs1, + SavedRegsBase ++ [PaddingReg]; _ -> - {SavedRegsBase, AvailableRegs1} + SavedRegsBase end, Stream1 = push_registers(SavedRegs, StreamModule, Stream0), - % Set up arguments following AArch64 calling convention - State1 = set_args(State0#state{stream = Stream1}, Args), + % Set up arguments following ARM AAPCS calling convention + % Since we pushed registers to stack, those saved registers can now be used as temporaries + + ArgsRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (Reg) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + Args + ), + SavedRegsForTemps = SavedRegs -- [?CTX_REG, ?NATIVE_INTERFACE_REG, FuncPtrReg] -- ArgsRegs, + State1 = set_args( + State0#state{stream = Stream1, available_regs = SavedRegsForTemps ++ AvailableRegs0}, + Args, + length(SavedRegs) * 4 + ), #state{stream = Stream2} = State1, % Call the function pointer (using BLX for call with return) Call = jit_armv6m_asm:blx(FuncPtrReg), Stream4 = StreamModule:append(Stream2, Call), - % If r0 is in used regs, save it to another temporary register - {Stream5, ResultReg} = - case lists:member(r0, SavedRegs) of - true -> - [Temp | _] = AvailableRegsAfterPadding, - {StreamModule:append(Stream4, jit_armv6m_asm:mov(Temp, r0)), Temp}; - false -> - {Stream4, r0} + % For result, we need a free register (including FuncPtrReg) but ideally + % not the one used for padding. If none are available (all 8 registers + % were pushed to the stack), we write the result to the stack position + % of FuncPtrReg + Stream5 = + case length(SavedRegs) of + 8 -> + % We use FuncPtrReg then as we know it's available. + % Calculate stack offset: register number * 4 bytes + ResultReg = FuncPtrReg, + StackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, + StoreResult = jit_armv6m_asm:str(r0, {sp, StackOffset}), + StreamModule:append(Stream4, StoreResult); + _ -> + % Use any free that is not in SavedRegs + [ResultReg | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_armv6m_asm:mov(ResultReg, r0), + StreamModule:append(Stream4, MoveResult) end, Stream6 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream5), @@ -1448,22 +1476,22 @@ pop_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> pop_registers([], _StreamModule, Stream0) -> Stream0. --spec set_args(state(), [arg()]) -> state(). +-spec set_args(state(), [arg()], non_neg_integer()) -> state(). % Handle 5 parameters: handle 5th on stack first, then first 4 in registers r0-r3 -set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5]) -> +set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5], StackOffset) -> % Handle 5th argument on stack first (with alignment) - this may free registers State1 = set_args_push_stack(State, Arg5, undefined), % Then set up first 4 arguments in registers using existing logic - set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4]); + set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset); % Handle 6 parameters: handle 5th and 6th on stack first, then first 4 in registers r0-r3 -set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5, Arg6]) -> +set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5, Arg6], StackOffset) -> % Handle 5th and 6th arguments on stack first (no alignment needed) - this may free registers State1 = set_args_push_stack(State, Arg5, Arg6), % Then set up first 4 arguments in registers using existing logic - set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4]); + set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset); % Handle up to 4 parameters: all in registers r0-r3 -set_args(State, Args) when length(Args) =< 4 -> - set_args_registers_only(State, Args). +set_args(State, Args, StackOffset) when length(Args) =< 4 -> + set_args_registers_only(State, Args, StackOffset). %% @doc Handle 5th and optionally 6th arguments on stack. %% For 5 args: push 5th arg at sp+0 with 4-byte padding at sp+4 for 8-byte alignment @@ -1528,7 +1556,9 @@ set_args_push_stack( State2. set_args_registers_only( - #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, Args + #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, + Args, + StackOffset ) -> ParamRegs = parameter_regs(Args), ArgsRegs = args_regs(Args), @@ -1542,7 +1572,7 @@ set_args_registers_only( end || Arg <- Args ], - State1 = set_args0(State0, Args1, ArgsRegs, ParamRegs, AvailableScratchGP), + State1 = set_args0(State0, Args1, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset), Stream1 = State1#state.stream, NewUsedRegs = lists:foldl( fun @@ -1597,41 +1627,49 @@ replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> replace_reg0([Other | T], Reg, Replacement, Acc) -> replace_reg0(T, Reg, Replacement, [Other | Acc]). -set_args0(State, [], [], [], _AvailGP) -> +set_args0(State, [], [], [], _AvailGP, _StackOffset) -> State; -set_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP) -> - set_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP); -set_args0(State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP) -> - set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP); +set_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); set_args0( #state{stream_module = StreamModule, stream = Stream0} = State, [jit_state | ArgsT], [jit_state | ArgsRegs], [ParamReg | ParamRegs], - AvailGP + AvailGP, + StackOffset ) -> - false = lists:member(ParamReg, ArgsRegs), - % jit_state is saved on stack, load from stack offset - I = jit_armv6m_asm:ldr(ParamReg, {sp, ?STACK_OFFSET_JITSTATE}), + % jit_state is loaded from a fixed stack location, so we don't need to check + % for register conflicts like other arguments - it can overwrite any existing + % register content since it comes from stack + % After stack space allocation for parameters, jit_state is at higher offset + JitStateOffset = ?STACK_OFFSET_JITSTATE + StackOffset, + I = jit_armv6m_asm:ldr(ParamReg, {sp, JitStateOffset}), Stream1 = StreamModule:append(Stream0, I), - set_args0(State#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP); + set_args0(State#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); set_args0( State, [jit_state_tail_call | ArgsT], [jit_state | ArgsRegs], [ParamReg | ParamRegs], - AvailGP + AvailGP, + StackOffset ) -> false = lists:member(ParamReg, ArgsRegs), % For tail calls, jit_state will be restored by pop - skip generating load instruction - set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP); + set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); % Handle stack parameters - load argument into temp register and push to stack set_args0( #state{stream_module = StreamModule} = State, [Arg | ArgsT], [stack | ArgsRegs], [stack | ParamRegs], - [TempReg | _] = AvailGP + [TempReg | _] = AvailGP, + StackOffset ) -> % Generate code to set up argument in temp register State1 = set_args1(State, Arg, TempReg), @@ -1639,28 +1677,31 @@ set_args0( DecSP = jit_armv6m_asm:sub(sp, sp, 4), StoreInstr = jit_armv6m_asm:str(TempReg, {sp, 0}), Stream1 = StreamModule:append(State1#state.stream, <>), - set_args0(State1#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP); + set_args0(State1#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); % ctx is special as we need it to access x_reg/y_reg/fp_reg -set_args0(State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP) -> +set_args0(State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset) -> false = lists:member(?CTX_REG, ArgsRegs), State1 = set_args1(State, Arg, ?CTX_REG), - set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP); + set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); set_args0( #state{stream_module = StreamModule} = State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [ParamReg | ParamRegs], - [Avail | AvailGPT] = AvailGP + [Avail | AvailGPT] = AvailGP, + StackOffset ) -> State1 = set_args1(State, Arg, ParamReg), case lists:member(ParamReg, ArgsRegs) of false -> - set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP); + set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); true -> I = jit_armv6m_asm:mov(Avail, ParamReg), Stream1 = StreamModule:append(State1#state.stream, I), NewArgsT = replace_reg(ArgsT, ParamReg, Avail), - set_args0(State1#state{stream = Stream1}, NewArgsT, ArgsRegs, ParamRegs, AvailGPT) + set_args0( + State1#state{stream = Stream1}, NewArgsT, ArgsRegs, ParamRegs, AvailGPT, StackOffset + ) end. set_args1(State, Reg, Reg) -> diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index b8d349dbb0..0ef5c741c7 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -51,7 +51,8 @@ tst/2, adr/2, push/1, - pop/1 + pop/1, + reg_to_num/1 ]). -export_type([ diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index e6bdf5ce3d..7fc50ddd53 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -43,7 +43,7 @@ call_primitive_0_test() -> << " 0: 6817 ldr r7, [r2, #0]\n" " 2: b405 push {r0, r2}\n" - " 4: 9900 ldr r1, [sp, #0]\n" + " 4: 9902 ldr r1, [sp, #8]\n" " 6: 47b8 blx r7\n" " 8: 4607 mov r7, r0\n" " a: bc05 pop {r0, r2}" @@ -59,7 +59,7 @@ call_primitive_1_test() -> << " 0: 6857 ldr r7, [r2, #4]\n" " 2: b405 push {r0, r2}\n" - " 4: 9900 ldr r1, [sp, #0]\n" + " 4: 9902 ldr r1, [sp, #8]\n" " 6: 47b8 blx r7\n" " 8: 4607 mov r7, r0\n" " a: bc05 pop {r0, r2}" @@ -130,7 +130,7 @@ call_primitive_6_args_test() -> " 10: 9601 str r6, [sp, #4]\n" " 12: 2608 movs r6, #8\n" " 14: 9600 str r6, [sp, #0]\n" - " 16: 9900 ldr r1, [sp, #0]\n" + " 16: 9902 ldr r1, [sp, #8]\n" " 18: 463a mov r2, r7\n" " 1a: 2340 movs r3, #64 ; 0x40\n" " 1c: 47a8 blx r5\n" @@ -350,7 +350,7 @@ return_if_not_equal_to_ctx_test_() -> << " 0: 6d57 ldr r7, [r2, #84] ; 0x54\n" " 2: b405 push {r0, r2}\n" - " 4: 9900 ldr r1, [sp, #0]\n" + " 4: 9902 ldr r1, [sp, #8]\n" " 6: 47b8 blx r7\n" " 8: 4607 mov r7, r0\n" " a: bc05 pop {r0, r2}\n" @@ -376,7 +376,7 @@ return_if_not_equal_to_ctx_test_() -> << " 0: 6d57 ldr r7, [r2, #84] ; 0x54\n" " 2: b405 push {r0, r2}\n" - " 4: 9900 ldr r1, [sp, #0]\n" + " 4: 9902 ldr r1, [sp, #8]\n" " 6: 47b8 blx r7\n" " 8: 4607 mov r7, r0\n" " a: bc05 pop {r0, r2}\n" @@ -1375,7 +1375,7 @@ call_bif_with_large_literal_integer_test() -> << " 0: 6a17 ldr r7, [r2, #32]\n" " 2: b405 push {r0, r2}\n" - " 4: 9800 ldr r0, [sp, #0]\n" + " 4: 9802 ldr r0, [sp, #8]\n" " 6: 2102 movs r1, #2\n" " 8: 47b8 blx r7\n" " a: 4607 mov r7, r0\n" @@ -1762,7 +1762,7 @@ wait_timeout_test() -> " 20: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 22: 6d57 ldr r7, [r2, #84] ; 0x54\n" " 24: b405 push {r0, r2}\n" - " 26: 9900 ldr r1, [sp, #0]\n" + " 26: 9902 ldr r1, [sp, #8]\n" " 28: 47b8 blx r7\n" " 2a: 4607 mov r7, r0\n" " 2c: bc05 pop {r0, r2}\n" @@ -1882,7 +1882,7 @@ gc_bif2_test() -> Dump = << " 0: 6a17 ldr r7, [r2, #32]\n" " 2: b405 push {r0, r2}\n" - " 4: 9800 ldr r0, [sp, #0]\n" + " 4: 9802 ldr r0, [sp, #8]\n" " 6: 212a movs r1, #42 ; 0x2a\n" " 8: 47b8 blx r7\n" " a: 4607 mov r7, r0\n" @@ -2868,6 +2868,136 @@ call_func_ptr_stack_alignment_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test for register exhaustion issue in call_func_ptr with 5+ arguments +%% When all registers are used and we call a function with 5+ args, +%% set_args needs temporary registers but none are available +call_func_ptr_register_exhaustion_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Allocate all available registers to simulate register pressure + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + State6 + end, + fun(State6) -> + [ + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r6}, + [ctx, jit_state, {free, r3}, 3, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" + " e: b082 sub sp, #8\n" + " 10: 2101 movs r1, #1\n" + " 12: 9100 str r1, [sp, #0]\n" + " 14: 9906 ldr r1, [sp, #24]\n" + " 16: 461a mov r2, r3\n" + " 18: 2303 movs r3, #3\n" + " 1a: 47b0 blx r6\n" + " 1c: 4606 mov r6, r0\n" + " 1e: bcb7 pop {r0, r1, r2, r4, r5, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r6}, + [ctx, jit_state, {free, r3}, 1, r1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" + " e: b082 sub sp, #8\n" + " 10: 9100 str r1, [sp, #0]\n" + " 12: 9906 ldr r1, [sp, #24]\n" + " 14: 461a mov r2, r3\n" + " 16: 2301 movs r3, #1\n" + " 18: 47b0 blx r6\n" + " 1a: 4606 mov r6, r0\n" + " 1c: bcb7 pop {r0, r1, r2, r4, r5, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r6}, + [ctx, jit_state, {free, r3}, r1, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" + " e: b082 sub sp, #8\n" + " 10: 2101 movs r1, #1\n" + " 12: 9100 str r1, [sp, #0]\n" + " 14: 9906 ldr r1, [sp, #24]\n" + " 16: 461a mov r2, r3\n" + " 18: 460b mov r3, r1\n" + " 1a: 47b0 blx r6\n" + " 1c: 4606 mov r6, r0\n" + " 1e: bcb7 pop {r0, r1, r2, r4, r5, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r6, ResultReg) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r1}, + [r6, r3] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" + " e: 4630 mov r0, r6\n" + " 10: 4619 mov r1, r3\n" + " 12: 4788 blx r1\n" + " 14: 9001 str r0, [sp, #4]\n" + " 16: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> dump_to_bin0(Tail, hex, Acc); dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> From 674d5397aeb28722c965d052dc073778f6e73378 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 31 Aug 2025 06:24:11 +0200 Subject: [PATCH 38/97] armv6m: fix issues with and_ reg exhaustion Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 46 +++++++++++++++++++++++-- tests/libs/jit/jit_armv6m_tests.erl | 52 +++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 3 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b6918b9e49..42a34b19ef 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -181,8 +181,8 @@ % aarch64 ABI specific %% ARMv6-M register mappings -%% Link register --define(LR_REG, r14). +%% IP can be used as an additional scratch register +-define(IP_REG, r12). %% Stack offset for function prolog: push {r1,r4,r5,r6,r7,lr} %% r1 (JITSTATE_REG) is at SP+0 after push @@ -2354,7 +2354,47 @@ and_( Stream1 = State1#state.stream, I = jit_armv6m_asm:ands(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}. + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + % No available registers, use r0 as temp and save it to r12 + Stream0 = State0#state.stream, + % Save r0 to r12 + Save = jit_armv6m_asm:mov(?IP_REG, r0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into r0 + State1 = mov_immediate(State0#state{stream = Stream1}, r0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BICS operation + I = jit_armv6m_asm:bics(Reg, r0), + Stream3 = StreamModule:append(Stream2, I), + % Restore r0 from r12 + Restore = jit_armv6m_asm:mov(r0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) -> + % No available registers, use r0 as temp and save it to r12 + Stream0 = State0#state.stream, + % Save r0 to r12 + Save = jit_armv6m_asm:mov(?IP_REG, r0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into r0 + State1 = mov_immediate(State0#state{stream = Stream1}, r0, Val), + Stream2 = State1#state.stream, + % Perform ANDS operation + I = jit_armv6m_asm:ands(Reg, r0), + Stream3 = StreamModule:append(Stream2, I), + % Restore r0 from r12 + Restore = jit_armv6m_asm:mov(r0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}. or_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 7fc50ddd53..79d8c112cf 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2838,6 +2838,58 @@ debugger_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +and_register_exhaustion_negative_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test negative immediate (-4) which should use BICS with r0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, r7, -4), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: 4684 mov ip, r0\n" + " e: 2003 movs r0, #3\n" + " 10: 4387 bics r7, r0\n" + " 12: 4660 mov r0, ip" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +and_register_exhaustion_positive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test positive immediate (0x3F) which should use ANDS with r0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, r7, 16#3F), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: 4684 mov ip, r0\n" + " e: 203f movs r0, #63 ; 0x3f\n" + " 10: 4007 ands r7, r0\n" + " 12: 4660 mov r0, ip" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From 3014b96c62a146d57212a9bf980b28950314d51e Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 31 Aug 2025 06:38:02 +0200 Subject: [PATCH 39/97] armv6m: fix cmp with large immediates Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 43 ++++++++++++++++++- tests/libs/jit/jit_armv6m_tests.erl | 65 +++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+), 2 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 42a34b19ef..3d7cabc683 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1020,7 +1020,7 @@ if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, { if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', Val} -) when is_atom(Reg), is_integer(Val) -> +) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> I1 = jit_armv6m_asm:cmp(Reg, Val), % ge = greater than or equal I2 = jit_armv6m_asm:bcc(ge, 0), @@ -1031,6 +1031,22 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, Code), State1 = State0#state{stream = Stream1}, {State1, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + State1 = mov_immediate(State0, Temp, Val), + Stream0 = State1#state.stream, + I1 = jit_armv6m_asm:cmp(Reg, Temp), + % ge = greater than or equal + I2 = jit_armv6m_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', RegB} @@ -1074,7 +1090,7 @@ if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '!=', Val} -) when is_integer(Val) orelse ?IS_GPR(Val) -> +) when (is_integer(Val) andalso Val >= 0 andalso Val =< 255) orelse ?IS_GPR(Val) -> Reg = case RegOrTuple of {free, Reg0} -> Reg0; @@ -1134,6 +1150,29 @@ if_block_cond( State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, {State3, ne, Offset1 - Offset0 + byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Offset0 = StreamModule:offset(Stream0), + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + Offset1 = StreamModule:offset(Stream1), + I1 = jit_armv6m_asm:cmp(Reg, Temp), + I2 = jit_armv6m_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream2 = StreamModule:append(Stream1, Code), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, eq, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 79d8c112cf..2ab167a166 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -463,6 +463,48 @@ if_block_test_() -> ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: da00 bge.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 1024}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" + " 6: da04 bge.n 0x12\n" + " 8: 0400 lsls r0, r0, #16\n" + " a: 0000 movs r0, r0\n" + " c: 42af cmp r7, r5\n" + " e: dafe bge.n 0xe\n" + " 10: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), ?_test(begin State1 = ?BACKEND:if_block( State0, @@ -617,6 +659,29 @@ if_block_test_() -> ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) end), + ?_test(begin + % Test large immediate (1995) that requires temporary register + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', 1995}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 1) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" + " 6: e001 b.n 0xc\n" + " 8: 07cb lsls r3, r1, #31\n" + " a: 0000 movs r0, r0\n" + " c: 42af cmp r7, r5\n" + " e: d000 beq.n 0x12\n" + " 10: 3601 adds r6, #1" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), ?_test(begin State1 = ?BACKEND:if_block( State0, From 5aab9746b7b10328e9a04babcfecea784aff8359 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 31 Aug 2025 08:53:07 +0200 Subject: [PATCH 40/97] armv6m: fix jump table to handle many labels Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 99 ++------ src/libAtomVM/jit.h | 2 +- tests/libs/jit/jit_armv6m_tests.erl | 346 +++++++++++++++------------- 3 files changed, 205 insertions(+), 242 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 3d7cabc683..8f0e3dc205 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -178,6 +178,8 @@ -define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). -define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). +-define(JUMP_TABLE_ENTRY_SIZE, 12). + % aarch64 ABI specific %% ARMv6-M register mappings @@ -345,21 +347,14 @@ assert_all_native_free(#state{ %% 0 (special entry for lines and labels information) to LabelsCount included %% (special entry for OP_INT_CALL_END). %% -%% On this platform, the jump table is composed of +%% On this platform, each jump table entry is 12 bytes. %% ``` -%% ldr r3, offset_to_label_0 -%% b common -%% ldr r3, offset_to_label_1 -%% b common -%% ... -%% offset_to_label_0: dword (32 bits with offset) -%% offset_to_label_1: dword (32 bits with offset) -%% ... -%% common: -%% push {r1, r4, r5, r6, r7, lr} -%% add pc, pc, r3 +%% ldr r3, pc+8 +%% push {r1, r4, r5, r6, r7, lr} +%% add pc, pc, r3 +%% nop() +%% offset_to_label0 %% ``` -%% so each entry can be anywhere (we're not limited by b's range) %% %% @end %% @param State current backend state @@ -371,75 +366,29 @@ jump_table(State, LabelsCount) -> jump_table0(State, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> - % After all jump table entries, emit the common handler and offset data - emit_jump_table_common_and_data(State, LabelsCount); + State; jump_table0( #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, N, LabelsCount ) -> - Offset = StreamModule:offset(Stream0), - % Calculate offsets at emit time: - % Layout: [entries] [common_handler] [data] - - % 4 bytes per entry - EntriesSize = (LabelsCount + 1) * 4, - % push (2 bytes) + add pc, pc, r3 (2 bytes) - CommonHandlerSize = 4, - - % Offset to common handler from current branch instruction (branch is at entry+2) - CommonHandlerOffset = EntriesSize - (N * 4) - 2, - - % Offset to data from current ldr instruction - - % PC when ldr executes - CurrentPC = Offset + 4, - DataOffset = Offset + EntriesSize + CommonHandlerSize + (N * 4) - CurrentPC, - - % Create jump table entry with calculated offsets - LdrInstr = jit_armv6m_asm:ldr(r3, {pc, DataOffset}), - % branch offset in bytes - BranchInstr = jit_armv6m_asm:b(CommonHandlerOffset), - JumpEntry = <>, + % Create jump table entry with calculated offsets - all at emit time + % LDR r3, {pc, 8} - load data from 8 bytes after PC (constant offset) + I1 = jit_armv6m_asm:ldr(r3, {pc, 8}), + I2 = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + I3 = jit_armv6m_asm:add(pc, r3), + I4 = jit_armv6m_asm:nop(), + + JumpEntry = <>, Stream1 = StreamModule:append(Stream0, JumpEntry), - % No relocations needed since we calculated everything at emit time - jump_table0(State#state{stream = Stream1, branches = Branches}, N + 1, LabelsCount). + % Add relocation for the data entry so update_branches/2 can patch the jump target + DataOffset = StreamModule:offset(Stream1) - 4, + % No add instruction offset needed + DataReloc = {N, DataOffset, {jump_table_data, 0}}, + UpdatedState = State#state{stream = Stream1, branches = [DataReloc | Branches]}, -%%----------------------------------------------------------------------------- -%% @doc Emit the common handler and offset data for the jump table. -%% @end -%%----------------------------------------------------------------------------- -emit_jump_table_common_and_data( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, - LabelsCount -) -> - % Emit common handler: push {r1, r4, r5, r6, r7, lr} + add pc, pc, r3 - CommonHandlerOffset = StreamModule:offset(Stream0), - PushInstr = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), - AddInstrOffset = CommonHandlerOffset + byte_size(PushInstr), - % indirect jump using loaded offset - AddInstr = jit_armv6m_asm:add(pc, r3), - CommonHandler = <>, - Stream1 = StreamModule:append(Stream0, CommonHandler), - - % Emit offset data (32-bit offsets for each label, will be updated by update_branches/2) - {Stream2, NewBranches} = lists:foldl( - fun(N, {StreamAcc, BranchesAcc}) -> - Offset = StreamModule:offset(StreamAcc), - % Each data entry is a 32-bit offset that will be patched by update_branches/2 - - % placeholder, will be updated - DataEntry = <<0:32/little>>, - StreamNext = StreamModule:append(StreamAcc, DataEntry), - % Add relocation for this data entry, including the add instruction offset - DataReloc = {N, Offset, {jump_table_data, AddInstrOffset}}, - {StreamNext, [DataReloc | BranchesAcc]} - end, - {Stream1, Branches}, - lists:seq(0, LabelsCount) - ), - State#state{stream = Stream2, branches = NewBranches}. + jump_table0(UpdatedState, N + 1, LabelsCount). %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. @@ -2272,7 +2221,7 @@ set_continuation_to_label( ) -> Offset = StreamModule:offset(Stream0), % Calculate jump table entry offset - JumpTableEntryOffset = Label * 4, + JumpTableEntryOffset = Label * ?JUMP_TABLE_ENTRY_SIZE, % Assume mov_immediate will be at most 10 bytes MaxMovImmediateSize = 10, diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index eae3672ec5..973a253a74 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -190,7 +190,7 @@ enum TrapAndLoadResult #ifdef __arm__ #define JIT_ARCH_TARGET JIT_ARCH_ARMV6M -#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#define JIT_JUMPTABLE_ENTRY_SIZE 12 #endif #ifndef JIT_ARCH_TARGET diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 2ab167a166..7bfd6957e9 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1176,53 +1176,57 @@ call_only_or_schedule_next_and_label_relocation_test() -> Stream = ?BACKEND:stream(State8), Dump = << - " 0: 4b03 ldr r3, [pc, #12] ; (0x10)\n" - " 2: e003 b.n 0xc\n" - " 4: 4b04 ldr r3, [pc, #16] ; (0x18)\n" - " 6: e001 b.n 0xc\n" - " 8: 4b05 ldr r3, [pc, #20] ; (0x20)\n" - " a: e7ff b.n 0xc\n" - " c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " e: 449f add pc, r3\n" - " 10: 0042 lsls r2, r0, #1\n" - " 12: 0000 movs r0, r0\n" - " 14: 000a movs r2, r1\n" + " 0: 4b02 ldr r3, [pc, #8] ; (0xc)\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop ; (mov r8, r8)\n" + " 8: 0058 lsls r0, r3, #1\n" + " a: 0000 movs r0, r0\n" + " c: 4b02 ldr r3, [pc, #8] ; (0x18)\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 0020 movs r0, r4\n" " 16: 0000 movs r0, r0\n" - " 18: 0036 movs r6, r6\n" - " 1a: 0000 movs r0, r0\n" - " 1c: 9e00 ldr r6, [sp, #0]\n" - " 1e: 68b7 ldr r7, [r6, #8]\n" - " 20: 3f01 subs r7, #1\n" - " 22: 60b7 str r7, [r6, #8]\n" - " 24: d004 beq.n 0x30\n" - " 26: e00f b.n 0x48\n" - " 28: 46c0 nop ; (mov r8, r8)\n" - " 2a: 46c0 nop ; (mov r8, r8)\n" - " 2c: 46c0 nop ; (mov r8, r8)\n" - " 2e: 46c0 nop ; (mov r8, r8)\n" - " 30: 2638 movs r6, #56 ; 0x38\n" - " 32: 4276 negs r6, r6\n" - " 34: a702 add r7, pc, #8 ; (adr r7, 0x40)\n" - " 36: 19f6 adds r6, r6, r7\n" - " 38: 9f00 ldr r7, [sp, #0]\n" - " 3a: 607e str r6, [r7, #4]\n" - " 3c: 6897 ldr r7, [r2, #8]\n" - " 3e: 9e05 ldr r6, [sp, #20]\n" - " 40: 9705 str r7, [sp, #20]\n" - " 42: 46b6 mov lr, r6\n" - " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 46: 46c0 nop ; (mov r8, r8)\n" - " 48: 6817 ldr r7, [r2, #0]\n" - " 4a: 9e05 ldr r6, [sp, #20]\n" - " 4c: 9705 str r7, [sp, #20]\n" - " 4e: 46b6 mov lr, r6\n" - " 50: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 52: 46c0 nop ; (mov r8, r8)\n" - " 54: 6857 ldr r7, [r2, #4]\n" - " 56: 9e05 ldr r6, [sp, #20]\n" - " 58: 9705 str r7, [sp, #20]\n" - " 5a: 46b6 mov lr, r6\n" - " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 18: 4b02 ldr r3, [pc, #8] ; (0x24)\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" + " 20: 004c lsls r4, r1, #1\n" + " 22: 0000 movs r0, r0\n" + " 24: 9e00 ldr r6, [sp, #0]\n" + " 26: 68b7 ldr r7, [r6, #8]\n" + " 28: 3f01 subs r7, #1\n" + " 2a: 60b7 str r7, [r6, #8]\n" + " 2c: d004 beq.n 0x38\n" + " 2e: e00f b.n 0x50\n" + " 30: 46c0 nop ; (mov r8, r8)\n" + " 32: 46c0 nop ; (mov r8, r8)\n" + " 34: 46c0 nop ; (mov r8, r8)\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: 2630 movs r6, #48 ; 0x30\n" + " 3a: 4276 negs r6, r6\n" + " 3c: a702 add r7, pc, #8 ; (adr r7, 0x48)\n" + " 3e: 19f6 adds r6, r6, r7\n" + " 40: 9f00 ldr r7, [sp, #0]\n" + " 42: 607e str r6, [r7, #4]\n" + " 44: 6897 ldr r7, [r2, #8]\n" + " 46: 9e05 ldr r6, [sp, #20]\n" + " 48: 9705 str r7, [sp, #20]\n" + " 4a: 46b6 mov lr, r6\n" + " 4c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 4e: 46c0 nop ; (mov r8, r8)\n" + " 50: 6817 ldr r7, [r2, #0]\n" + " 52: 9e05 ldr r6, [sp, #20]\n" + " 54: 9705 str r7, [sp, #20]\n" + " 56: 46b6 mov lr, r6\n" + " 58: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 5a: 46c0 nop ; (mov r8, r8)\n" + " 5c: 6857 ldr r7, [r2, #4]\n" + " 5e: 9e05 ldr r6, [sp, #20]\n" + " 60: 9705 str r7, [sp, #20]\n" + " 62: 46b6 mov lr, r6\n" + " 64: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1244,54 +1248,58 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> Dump = << " 0: 6019 str r1, [r3, #0]\n" - " 2: 4b03 ldr r3, [pc, #12] ; (0x10)\n" - " 4: e003 b.n 0xe\n" - " 6: 4b04 ldr r3, [pc, #16] ; (0x18)\n" - " 8: e001 b.n 0xe\n" - " a: 4b05 ldr r3, [pc, #20] ; (0x20)\n" - " c: e7ff b.n 0xe\n" - " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 10: 449f add pc, r3\n" - " 12: 0044 lsls r4, r0, #1\n" - " 14: 0000 movs r0, r0\n" - " 16: 000c movs r4, r1\n" + " 2: 4b02 ldr r3, [pc, #8] ; (0xc)\n" + " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 6: 449f add pc, r3\n" + " 8: 46c0 nop ; (mov r8, r8)\n" + " a: 005c lsls r4, r3, #1\n" + " c: 0000 movs r0, r0\n" + " e: 4b02 ldr r3, [pc, #8] ; (0x18)\n" + " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 12: 449f add pc, r3\n" + " 14: 46c0 nop ; (mov r8, r8)\n" + " 16: 0024 movs r4, r4\n" " 18: 0000 movs r0, r0\n" - " 1a: 0038 movs r0, r7\n" - " 1c: 0000 movs r0, r0\n" - " 1e: 46c0 nop ; (mov r8, r8)\n" - " 20: 9e00 ldr r6, [sp, #0]\n" - " 22: 68b7 ldr r7, [r6, #8]\n" - " 24: 3f01 subs r7, #1\n" - " 26: 60b7 str r7, [r6, #8]\n" - " 28: d004 beq.n 0x34\n" - " 2a: e00f b.n 0x4c\n" - " 2c: 46c0 nop ; (mov r8, r8)\n" - " 2e: 46c0 nop ; (mov r8, r8)\n" - " 30: 46c0 nop ; (mov r8, r8)\n" - " 32: 46c0 nop ; (mov r8, r8)\n" - " 34: 263c movs r6, #60 ; 0x3c\n" - " 36: 4276 negs r6, r6\n" - " 38: a702 add r7, pc, #8 ; (adr r7, 0x44)\n" - " 3a: 19f6 adds r6, r6, r7\n" - " 3c: 9f00 ldr r7, [sp, #0]\n" - " 3e: 607e str r6, [r7, #4]\n" - " 40: 6897 ldr r7, [r2, #8]\n" - " 42: 9e05 ldr r6, [sp, #20]\n" - " 44: 9705 str r7, [sp, #20]\n" - " 46: 46b6 mov lr, r6\n" - " 48: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 4a: 46c0 nop ; (mov r8, r8)\n" - " 4c: 6817 ldr r7, [r2, #0]\n" - " 4e: 9e05 ldr r6, [sp, #20]\n" - " 50: 9705 str r7, [sp, #20]\n" - " 52: 46b6 mov lr, r6\n" - " 54: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 56: 46c0 nop ; (mov r8, r8)\n" - " 58: 6857 ldr r7, [r2, #4]\n" - " 5a: 9e05 ldr r6, [sp, #20]\n" - " 5c: 9705 str r7, [sp, #20]\n" - " 5e: 46b6 mov lr, r6\n" - " 60: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 1a: 4b02 ldr r3, [pc, #8] ; (0x24)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 449f add pc, r3\n" + " 20: 46c0 nop ; (mov r8, r8)\n" + " 22: 0050 lsls r0, r2, #1\n" + " 24: 0000 movs r0, r0\n" + " 26: 46c0 nop ; (mov r8, r8)\n" + " 28: 9e00 ldr r6, [sp, #0]\n" + " 2a: 68b7 ldr r7, [r6, #8]\n" + " 2c: 3f01 subs r7, #1\n" + " 2e: 60b7 str r7, [r6, #8]\n" + " 30: d004 beq.n 0x3c\n" + " 32: e00f b.n 0x54\n" + " 34: 46c0 nop ; (mov r8, r8)\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: 46c0 nop ; (mov r8, r8)\n" + " 3a: 46c0 nop ; (mov r8, r8)\n" + " 3c: 2634 movs r6, #52 ; 0x34\n" + " 3e: 4276 negs r6, r6\n" + " 40: a702 add r7, pc, #8 ; (adr r7, 0x4c)\n" + " 42: 19f6 adds r6, r6, r7\n" + " 44: 9f00 ldr r7, [sp, #0]\n" + " 46: 607e str r6, [r7, #4]\n" + " 48: 6897 ldr r7, [r2, #8]\n" + " 4a: 9e05 ldr r6, [sp, #20]\n" + " 4c: 9705 str r7, [sp, #20]\n" + " 4e: 46b6 mov lr, r6\n" + " 50: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 52: 46c0 nop ; (mov r8, r8)\n" + " 54: 6817 ldr r7, [r2, #0]\n" + " 56: 9e05 ldr r6, [sp, #20]\n" + " 58: 9705 str r7, [sp, #20]\n" + " 5a: 46b6 mov lr, r6\n" + " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 5e: 46c0 nop ; (mov r8, r8)\n" + " 60: 6857 ldr r7, [r2, #4]\n" + " 62: 9e05 ldr r6, [sp, #20]\n" + " 64: 9705 str r7, [sp, #20]\n" + " 66: 46b6 mov lr, r6\n" + " 68: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1318,44 +1326,44 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), State8 = ?BACKEND:update_branches(State7), Stream = ?BACKEND:stream(State8), - % Extract the final section starting at 0x11c to verify the literal pool pattern + % Extract the final section starting at 0x124 to verify the literal pool pattern Dump = << - "11c: 9e00 ldr r6, [sp, #0]\n" - " 11e: 68b7 ldr r7, [r6, #8]\n" - " 120: 3f01 subs r7, #1\n" - " 122: 60b7 str r7, [r6, #8]\n" - " 124: d004 beq.n 0x130\n" - " 126: e011 b.n 0x14c\n" - " 128: 46c0 nop ; (mov r8, r8)\n" - " 12a: 46c0 nop ; (mov r8, r8)\n" - " 12c: 46c0 nop ; (mov r8, r8)\n" - " 12e: 46c0 nop ; (mov r8, r8)\n" - " 130: 4e00 ldr r6, [pc, #0] ; (0x134)\n" - " 132: e001 b.n 0x138\n" - " 134: fec8 ffff mcr2 15, 6, pc, cr8, cr15, {7} ; \n" - " 138: a701 add r7, pc, #4 ; (adr r7, 0x140)\n" - " 13a: 19f6 adds r6, r6, r7\n" - " 13c: 9f00 ldr r7, [sp, #0]\n" - " 13e: 607e str r6, [r7, #4]\n" - " 140: 6897 ldr r7, [r2, #8]\n" - " 142: 9e05 ldr r6, [sp, #20]\n" - " 144: 9705 str r7, [sp, #20]\n" - " 146: 46b6 mov lr, r6\n" - " 148: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 14a: 46c0 nop ; (mov r8, r8)\n" - " 14c: 6817 ldr r7, [r2, #0]\n" - " 14e: 9e05 ldr r6, [sp, #20]\n" - " 150: 9705 str r7, [sp, #20]\n" - " 152: 46b6 mov lr, r6\n" - " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 156: 46c0 nop ; (mov r8, r8)\n" - " 158: 6857 ldr r7, [r2, #4]\n" - " 15a: 9e05 ldr r6, [sp, #20]\n" - " 15c: 9705 str r7, [sp, #20]\n" - " 15e: 46b6 mov lr, r6\n" - " 160: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 124: 9e00 ldr r6, [sp, #0]\n" + " 126: 68b7 ldr r7, [r6, #8]\n" + " 128: 3f01 subs r7, #1\n" + " 12a: 60b7 str r7, [r6, #8]\n" + " 12c: d004 beq.n 0x138\n" + " 12e: e011 b.n 0x154\n" + " 130: 46c0 nop ; (mov r8, r8)\n" + " 132: 46c0 nop ; (mov r8, r8)\n" + " 134: 46c0 nop ; (mov r8, r8)\n" + " 136: 46c0 nop ; (mov r8, r8)\n" + " 138: 4e00 ldr r6, [pc, #0] ; (0x13c)\n" + " 13a: e001 b.n 0x140\n" + " 13c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" + " 140: a701 add r7, pc, #4 ; (adr r7, 0x148)\n" + " 142: 19f6 adds r6, r6, r7\n" + " 144: 9f00 ldr r7, [sp, #0]\n" + " 146: 607e str r6, [r7, #4]\n" + " 148: 6897 ldr r7, [r2, #8]\n" + " 14a: 9e05 ldr r6, [sp, #20]\n" + " 14c: 9705 str r7, [sp, #20]\n" + " 14e: 46b6 mov lr, r6\n" + " 150: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 152: 46c0 nop ; (mov r8, r8)\n" + " 154: 6817 ldr r7, [r2, #0]\n" + " 156: 9e05 ldr r6, [sp, #20]\n" + " 158: 9705 str r7, [sp, #20]\n" + " 15a: 46b6 mov lr, r6\n" + " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15e: 46c0 nop ; (mov r8, r8)\n" + " 160: 6857 ldr r7, [r2, #4]\n" + " 162: 9e05 ldr r6, [sp, #20]\n" + " 164: 9705 str r7, [sp, #20]\n" + " 166: 46b6 mov lr, r6\n" + " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, - {_, RelevantBinary} = split_binary(Stream, 16#11c), + {_, RelevantBinary} = split_binary(Stream, 16#124), ?assertEqual(dump_to_bin(Dump), RelevantBinary). %% Test with large gap (256+ bytes) and different alignment to force literal pool path @@ -1381,45 +1389,45 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), State8 = ?BACKEND:update_branches(State7), Stream = ?BACKEND:stream(State8), - % Extract the final section starting at 0x11a to verify the literal pool pattern with different alignment + % Extract the final section starting at 0x122 to verify the literal pool pattern with different alignment Dump = << - " 11a: 46c0 nop ; (mov r8, r8)\n" - " 11c: 9e00 ldr r6, [sp, #0]\n" - " 11e: 68b7 ldr r7, [r6, #8]\n" - " 120: 3f01 subs r7, #1\n" - " 122: 60b7 str r7, [r6, #8]\n" - " 124: d004 beq.n 0x130\n" - " 126: e011 b.n 0x14c\n" - " 128: 46c0 nop ; (mov r8, r8)\n" - " 12a: 46c0 nop ; (mov r8, r8)\n" - " 12c: 46c0 nop ; (mov r8, r8)\n" - " 12e: 46c0 nop ; (mov r8, r8)\n" - " 130: 4e00 ldr r6, [pc, #0] ; (0x134)\n" - " 132: e001 b.n 0x138\n" - " 134: fec8 ffff mcr2 15, 6, pc, cr8, cr15, {7} ; \n" - " 138: a701 add r7, pc, #4 ; (adr r7, 0x140)\n" - " 13a: 19f6 adds r6, r6, r7\n" - " 13c: 9f00 ldr r7, [sp, #0]\n" - " 13e: 607e str r6, [r7, #4]\n" - " 140: 6897 ldr r7, [r2, #8]\n" - " 142: 9e05 ldr r6, [sp, #20]\n" - " 144: 9705 str r7, [sp, #20]\n" - " 146: 46b6 mov lr, r6\n" - " 148: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 14a: 46c0 nop ; (mov r8, r8)\n" - " 14c: 6817 ldr r7, [r2, #0]\n" - " 14e: 9e05 ldr r6, [sp, #20]\n" - " 150: 9705 str r7, [sp, #20]\n" - " 152: 46b6 mov lr, r6\n" - " 154: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 156: 46c0 nop ; (mov r8, r8)\n" - " 158: 6857 ldr r7, [r2, #4]\n" - " 15a: 9e05 ldr r6, [sp, #20]\n" - " 15c: 9705 str r7, [sp, #20]\n" - " 15e: 46b6 mov lr, r6\n" - " 160: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 122: 46c0 nop ; (mov r8, r8)\n" + " 124: 9e00 ldr r6, [sp, #0]\n" + " 126: 68b7 ldr r7, [r6, #8]\n" + " 128: 3f01 subs r7, #1\n" + " 12a: 60b7 str r7, [r6, #8]\n" + " 12c: d004 beq.n 0x138\n" + " 12e: e011 b.n 0x154\n" + " 130: 46c0 nop ; (mov r8, r8)\n" + " 132: 46c0 nop ; (mov r8, r8)\n" + " 134: 46c0 nop ; (mov r8, r8)\n" + " 136: 46c0 nop ; (mov r8, r8)\n" + " 138: 4e00 ldr r6, [pc, #0] ; (0x13c)\n" + " 13a: e001 b.n 0x140\n" + " 13c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" + " 140: a701 add r7, pc, #4 ; (adr r7, 0x148)\n" + " 142: 19f6 adds r6, r6, r7\n" + " 144: 9f00 ldr r7, [sp, #0]\n" + " 146: 607e str r6, [r7, #4]\n" + " 148: 6897 ldr r7, [r2, #8]\n" + " 14a: 9e05 ldr r6, [sp, #20]\n" + " 14c: 9705 str r7, [sp, #20]\n" + " 14e: 46b6 mov lr, r6\n" + " 150: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 152: 46c0 nop ; (mov r8, r8)\n" + " 154: 6817 ldr r7, [r2, #0]\n" + " 156: 9e05 ldr r6, [sp, #20]\n" + " 158: 9705 str r7, [sp, #20]\n" + " 15a: 46b6 mov lr, r6\n" + " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15e: 46c0 nop ; (mov r8, r8)\n" + " 160: 6857 ldr r7, [r2, #4]\n" + " 162: 9e05 ldr r6, [sp, #20]\n" + " 164: 9705 str r7, [sp, #20]\n" + " 166: 46b6 mov lr, r6\n" + " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, - {_, RelevantBinary} = split_binary(Stream, 16#11a), + {_, RelevantBinary} = split_binary(Stream, 16#122), ?assertEqual(dump_to_bin(Dump), RelevantBinary). call_bif_with_large_literal_integer_test() -> @@ -2955,6 +2963,12 @@ and_register_exhaustion_positive_test() -> >>, ?assertEqual(dump_to_bin(ExpectedDump), Stream). +jump_table_large_labels_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 512), + Stream = ?BACKEND:stream(State1), + ?assertEqual((512 + 1) * 12, byte_size(Stream)). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From 52d39d31a9e57db46734b0afd05dcab1fad8cd1a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 1 Sep 2025 07:47:03 +0200 Subject: [PATCH 41/97] armv6m: fix jump table and stack calculations Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 25 ++++++++---- libs/jit/src/jit_armv6m_asm.erl | 19 ++++++++- tests/libs/jit/jit_armv6m_tests.erl | 60 ++++++++++++++++------------- 3 files changed, 68 insertions(+), 36 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 8f0e3dc205..a2b789a080 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -349,7 +349,7 @@ assert_all_native_free(#state{ %% %% On this platform, each jump table entry is 12 bytes. %% ``` -%% ldr r3, pc+8 +%% ldr r3, pc+4 %% push {r1, r4, r5, r6, r7, lr} %% add pc, pc, r3 %% nop() @@ -373,8 +373,7 @@ jump_table0( LabelsCount ) -> % Create jump table entry with calculated offsets - all at emit time - % LDR r3, {pc, 8} - load data from 8 bytes after PC (constant offset) - I1 = jit_armv6m_asm:ldr(r3, {pc, 8}), + I1 = jit_armv6m_asm:ldr(r3, {pc, 4}), I2 = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), I3 = jit_armv6m_asm:add(pc, r3), I4 = jit_armv6m_asm:nop(), @@ -384,8 +383,10 @@ jump_table0( % Add relocation for the data entry so update_branches/2 can patch the jump target DataOffset = StreamModule:offset(Stream1) - 4, - % No add instruction offset needed - DataReloc = {N, DataOffset, {jump_table_data, 0}}, + % Calculate the offset of the add instruction (3rd instruction, at offset 4 from entry start) + EntryStartOffset = StreamModule:offset(Stream1) - 12, + AddInstrOffset = EntryStartOffset + 4, + DataReloc = {N, DataOffset, {jump_table_data, AddInstrOffset}}, UpdatedState = State#state{stream = Stream1, branches = [DataReloc | Branches]}, jump_table0(UpdatedState, N + 1, LabelsCount). @@ -1439,14 +1440,24 @@ call_func_ptr( StreamModule:append(Stream4, MoveResult) end, - Stream6 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream5), + % Deallocate stack space if we allocated it for 5+ arguments + Stream6 = + case length(Args) >= 5 of + true -> + DeallocateArgs = jit_armv6m_asm:add(sp, 8), + StreamModule:append(Stream5, DeallocateArgs); + false -> + Stream5 + end, + + Stream7 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream6), AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), UsedRegs2 = [ResultReg | UsedRegs1], { State1#state{ - stream = Stream6, + stream = Stream7, available_regs = AvailableRegs3, used_regs = UsedRegs2 }, diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index 0ef5c741c7..7ad73f311e 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -20,6 +20,7 @@ -export([ add/2, + add/3, adds/2, adds/3, sub/2, @@ -154,7 +155,10 @@ cond_to_num(nv) -> 15. %% Emit an ADD instruction (Thumb encoding, high register form) %% ADD Rd, Rm - adds register value to register (supports high registers including PC) %% Encoding: 01000100 DN RmNum[3:0] RdLow3[2:0] --spec add(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% ADD SP, #imm - adds immediate value to stack pointer +-spec add + (arm_gpr_register(), arm_gpr_register()) -> binary(); + (sp, integer()) -> binary(). add(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), @@ -163,7 +167,18 @@ add(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> RdLow3 = RdNum band 7, % Build 16-bit instruction: 01000100 DN RmNum[3:0] RdLow3[2:0] Instr = (2#01000100 bsl 8) bor (DN bsl 7) bor (RmNum bsl 3) bor RdLow3, - <>. + <>; +add(sp, Imm) when is_integer(Imm), Imm >= 0, Imm =< 508, (Imm rem 4) =:= 0 -> + %% Thumb ADD SP, SP, #imm7*4 encoding: 10110000 0iiiiiii + Imm7 = Imm div 4, + <<(16#B000 bor (Imm7 band 127)):16/little>>; +add(sp, Imm) when is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% ADD SP, SP, #imm - adds immediate value to stack pointer (3-operand form) +-spec add(sp, sp, integer()) -> binary(). +add(sp, sp, Imm) -> + add(sp, Imm). %% Emit an ADDS instruction (Thumb encoding) %% ADDS Rd, #imm - adds immediate value to register and sets flags (2-operand form) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 7bfd6957e9..37d2993ed5 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -135,7 +135,8 @@ call_primitive_6_args_test() -> " 1a: 2340 movs r3, #64 ; 0x40\n" " 1c: 47a8 blx r5\n" " 1e: 4605 mov r5, r0\n" - " 20: bc05 pop {r0, r2}" + " 20: b002 add sp, #8\n" + " 22: bc05 pop {r0, r2}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1176,23 +1177,23 @@ call_only_or_schedule_next_and_label_relocation_test() -> Stream = ?BACKEND:stream(State8), Dump = << - " 0: 4b02 ldr r3, [pc, #8] ; (0xc)\n" + " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 4: 449f add pc, r3\n" " 6: 46c0 nop ; (mov r8, r8)\n" - " 8: 0058 lsls r0, r3, #1\n" + " 8: 0054 lsls r4, r2, #1\n" " a: 0000 movs r0, r0\n" - " c: 4b02 ldr r3, [pc, #8] ; (0x18)\n" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 10: 449f add pc, r3\n" " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0020 movs r0, r4\n" + " 14: 0010 movs r0, r2\n" " 16: 0000 movs r0, r0\n" - " 18: 4b02 ldr r3, [pc, #8] ; (0x24)\n" + " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 1c: 449f add pc, r3\n" " 1e: 46c0 nop ; (mov r8, r8)\n" - " 20: 004c lsls r4, r1, #1\n" + " 20: 0030 movs r0, r6\n" " 22: 0000 movs r0, r0\n" " 24: 9e00 ldr r6, [sp, #0]\n" " 26: 68b7 ldr r7, [r6, #8]\n" @@ -1248,23 +1249,23 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> Dump = << " 0: 6019 str r1, [r3, #0]\n" - " 2: 4b02 ldr r3, [pc, #8] ; (0xc)\n" + " 2: 4b01 ldr r3, [pc, #4] ; (0xa)\n" " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 6: 449f add pc, r3\n" " 8: 46c0 nop ; (mov r8, r8)\n" - " a: 005c lsls r4, r3, #1\n" + " a: 0056 lsls r6, r2, #1\n" " c: 0000 movs r0, r0\n" - " e: 4b02 ldr r3, [pc, #8] ; (0x18)\n" + " e: 4b01 ldr r3, [pc, #4] ; (0x16)\n" " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 12: 449f add pc, r3\n" " 14: 46c0 nop ; (mov r8, r8)\n" - " 16: 0024 movs r4, r4\n" + " 16: 0012 movs r2, r2\n" " 18: 0000 movs r0, r0\n" - " 1a: 4b02 ldr r3, [pc, #8] ; (0x24)\n" + " 1a: 4b01 ldr r3, [pc, #4] ; (0x22)\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 1e: 449f add pc, r3\n" " 20: 46c0 nop ; (mov r8, r8)\n" - " 22: 0050 lsls r0, r2, #1\n" + " 22: 0032 movs r2, r6\n" " 24: 0000 movs r0, r0\n" " 26: 46c0 nop ; (mov r8, r8)\n" " 28: 9e00 ldr r6, [sp, #0]\n" @@ -1470,16 +1471,17 @@ call_bif_with_large_literal_integer_test() -> " 2c: 6983 ldr r3, [r0, #24]\n" " 2e: 47b8 blx r7\n" " 30: 4607 mov r7, r0\n" - " 32: bc05 pop {r0, r2}\n" - " 34: 2f00 cmp r7, #0\n" - " 36: d105 bne.n 0x44\n" - " 38: 6997 ldr r7, [r2, #24]\n" - " 3a: 223a movs r2, #58 ; 0x3a\n" - " 3c: 9e05 ldr r6, [sp, #20]\n" - " 3e: 9705 str r7, [sp, #20]\n" - " 40: 46b6 mov lr, r6\n" - " 42: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 44: 6187 str r7, [r0, #24]" + " 32: b002 add sp, #8\n" + " 34: bc05 pop {r0, r2}\n" + " 36: 2f00 cmp r7, #0\n" + " 38: d105 bne.n 0x46\n" + " 3a: 6997 ldr r7, [r2, #24]\n" + " 3c: 223c movs r2, #60 ; 0x3c\n" + " 3e: 9e05 ldr r6, [sp, #20]\n" + " 40: 9705 str r7, [sp, #20]\n" + " 42: 46b6 mov lr, r6\n" + " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 46: 6187 str r7, [r0, #24]" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1970,7 +1972,8 @@ gc_bif2_test() -> " 1c: 681b ldr r3, [r3, #0]\n" " 1e: 47b8 blx r7\n" " 20: 4607 mov r7, r0\n" - " 22: bc05 pop {r0, r2}" + " 22: b002 add sp, #8\n" + " 24: bc05 pop {r0, r2}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -3042,7 +3045,8 @@ call_func_ptr_register_exhaustion_test_() -> " 18: 2303 movs r3, #3\n" " 1a: 47b0 blx r6\n" " 1c: 4606 mov r6, r0\n" - " 1e: bcb7 pop {r0, r1, r2, r4, r5, r7}" + " 1e: b002 add sp, #8\n" + " 20: bcb7 pop {r0, r1, r2, r4, r5, r7}" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3069,7 +3073,8 @@ call_func_ptr_register_exhaustion_test_() -> " 16: 2301 movs r3, #1\n" " 18: 47b0 blx r6\n" " 1a: 4606 mov r6, r0\n" - " 1c: bcb7 pop {r0, r1, r2, r4, r5, r7}" + " 1c: b002 add sp, #8\n" + " 1e: bcb7 pop {r0, r1, r2, r4, r5, r7}" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3097,7 +3102,8 @@ call_func_ptr_register_exhaustion_test_() -> " 18: 460b mov r3, r1\n" " 1a: 47b0 blx r6\n" " 1c: 4606 mov r6, r0\n" - " 1e: bcb7 pop {r0, r1, r2, r4, r5, r7}" + " 1e: b002 add sp, #8\n" + " 20: bcb7 pop {r0, r1, r2, r4, r5, r7}" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(r6, ResultReg) From 0481e5ad9d4c31e279efa9a3e683a8cf070e5eaa Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 1 Sep 2025 08:06:41 +0200 Subject: [PATCH 42/97] armv6m: set the thumb bit on function pointers Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 14 +-- .../generic_unix/lib/jit_stream_mmap.c | 6 ++ src/platforms/generic_unix/lib/sys.c | 5 + tests/libs/jit/jit_armv6m_tests.erl | 92 +++++++++---------- 4 files changed, 65 insertions(+), 52 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index a2b789a080..26b64308bd 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2241,8 +2241,8 @@ set_continuation_to_label( EstimatedAdrPC = (EstimatedAdrOffset band (bnot 3)) + 4 + 4, RelativeOffset = JumpTableEntryOffset - EstimatedAdrPC, - % Generate mov_immediate with the relative offset - State1 = mov_immediate(State, Temp2, RelativeOffset), + % Generate mov_immediate with the relative offset + 1 (to set thumb bit) + State1 = mov_immediate(State, Temp2, RelativeOffset + 1), Stream1 = State1#state.stream, ActualMovImmediateSize = StreamModule:offset(Stream1) - Offset, @@ -2259,7 +2259,7 @@ set_continuation_to_label( % Get PC address using adr I1 = jit_armv6m_asm:adr(Temp1, AdrOffset), - % Add PC + offset, load jit_state, and store continuation + % Add PC + offset (with thumb bit set), load jit_state, and store continuation I2 = jit_armv6m_asm:adds(Temp2, Temp2, Temp1), I3 = jit_armv6m_asm:ldr(Temp1, {sp, ?STACK_OFFSET_JITSTATE}), I4 = jit_armv6m_asm:str(Temp2, ?JITSTATE_CONTINUATION(Temp1)), @@ -2284,10 +2284,12 @@ set_continuation_to_offset( Offset = StreamModule:offset(Stream0), I1 = jit_armv6m_asm:adr(Temp, 4), Reloc = {OffsetRef, Offset, {adr, Temp}}, + % Set thumb bit (LSB = 1) by adding 1 to the 4-byte aligned address + I2 = jit_armv6m_asm:adds(Temp, Temp, 1), % Load jit_state pointer from stack, then store continuation - I2a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), - I2b = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), - Code = <>, + I3 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + I4 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c index 376f7384d0..f246a9791d 100644 --- a/src/platforms/generic_unix/lib/jit_stream_mmap.c +++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c @@ -244,7 +244,13 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) #elif defined(__GNUC__) __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size); #endif +#if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + // Set thumb bit for armv6m + ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base + 1; +#else ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base; +#endif + js_obj->stream_base = NULL; return result; } diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 086b39de79..099164dd89 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -842,7 +842,12 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t si } __builtin___clear_cache((char *) native_code_mmap, (char *) (native_code_mmap + size)); #endif +#if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + // Set thumb bit for armv6m + return (ModuleNativeEntryPoint) (native_code_mmap + offset + 1); +#else return (ModuleNativeEntryPoint) (native_code_mmap + offset); +#endif #else UNUSED(size); return (ModuleNativeEntryPoint) (native_code + offset); diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 37d2993ed5..07222776eb 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1205,7 +1205,7 @@ call_only_or_schedule_next_and_label_relocation_test() -> " 32: 46c0 nop ; (mov r8, r8)\n" " 34: 46c0 nop ; (mov r8, r8)\n" " 36: 46c0 nop ; (mov r8, r8)\n" - " 38: 2630 movs r6, #48 ; 0x30\n" + " 38: 262f movs r6, #47 ; 0x2f\n" " 3a: 4276 negs r6, r6\n" " 3c: a702 add r7, pc, #8 ; (adr r7, 0x48)\n" " 3e: 19f6 adds r6, r6, r7\n" @@ -1249,19 +1249,19 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> Dump = << " 0: 6019 str r1, [r3, #0]\n" - " 2: 4b01 ldr r3, [pc, #4] ; (0xa)\n" + " 2: 4b01 ldr r3, [pc, #4] ; (0x8)\n" " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 6: 449f add pc, r3\n" " 8: 46c0 nop ; (mov r8, r8)\n" " a: 0056 lsls r6, r2, #1\n" " c: 0000 movs r0, r0\n" - " e: 4b01 ldr r3, [pc, #4] ; (0x16)\n" + " e: 4b01 ldr r3, [pc, #4] ; (0x14)\n" " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 12: 449f add pc, r3\n" " 14: 46c0 nop ; (mov r8, r8)\n" " 16: 0012 movs r2, r2\n" " 18: 0000 movs r0, r0\n" - " 1a: 4b01 ldr r3, [pc, #4] ; (0x22)\n" + " 1a: 4b01 ldr r3, [pc, #4] ; (0x20)\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 1e: 449f add pc, r3\n" " 20: 46c0 nop ; (mov r8, r8)\n" @@ -1278,7 +1278,7 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> " 36: 46c0 nop ; (mov r8, r8)\n" " 38: 46c0 nop ; (mov r8, r8)\n" " 3a: 46c0 nop ; (mov r8, r8)\n" - " 3c: 2634 movs r6, #52 ; 0x34\n" + " 3c: 2633 movs r6, #51 ; 0x33\n" " 3e: 4276 negs r6, r6\n" " 40: a702 add r7, pc, #8 ; (adr r7, 0x4c)\n" " 42: 19f6 adds r6, r6, r7\n" @@ -1341,7 +1341,7 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 136: 46c0 nop ; (mov r8, r8)\n" " 138: 4e00 ldr r6, [pc, #0] ; (0x13c)\n" " 13a: e001 b.n 0x140\n" - " 13c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" + " 13c: fed1 ffff mrc2 15, 6, pc, cr1, cr15, {7}\n" " 140: a701 add r7, pc, #4 ; (adr r7, 0x148)\n" " 142: 19f6 adds r6, r6, r7\n" " 144: 9f00 ldr r7, [sp, #0]\n" @@ -1405,7 +1405,7 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> " 136: 46c0 nop ; (mov r8, r8)\n" " 138: 4e00 ldr r6, [pc, #0] ; (0x13c)\n" " 13a: e001 b.n 0x140\n" - " 13c: fed0 ffff mrc2 15, 6, pc, cr0, cr15, {7}\n" + " 13c: fed1 ffff mrc2 15, 6, pc, cr1, cr15, {7}\n" " 140: a701 add r7, pc, #4 ; (adr r7, 0x148)\n" " 142: 19f6 adds r6, r6, r7\n" " 144: 9f00 ldr r7, [sp, #0]\n" @@ -1818,48 +1818,48 @@ wait_timeout_test() -> Stream = ?BACKEND:stream(State10), Dump = << - " 0: a707 add r7, pc, #28 ; (adr r7, 0x20)\n" - " 2: 9e00 ldr r6, [sp, #0]\n" - " 4: 6077 str r7, [r6, #4]\n" - " 6: 4f01 ldr r7, [pc, #4] ; (0xc)\n" - " 8: e002 b.n 0x10\n" - " a: 0000 movs r0, r0\n" + " 0: a707 add r7, pc, #28 ; (adr r7, 0x22)\n" + " 2: 3701 adds r7, #1\n" + " 4: 9e00 ldr r6, [sp, #0]\n" + " 6: 6077 str r7, [r6, #4]\n" + " 8: 4f00 ldr r7, [pc, #0] ; (0xc)\n" + " a: e001 b.n 0x10\n" " c: 1388 asrs r0, r1, #14\n" " e: 0000 movs r0, r0\n" " 10: 6f96 ldr r6, [r2, #120] ; 0x78\n" - " 12: 463a mov r2, r7\n" - " 14: 232a movs r3, #42 ; 0x2a\n" - " 16: 9f05 ldr r7, [sp, #20]\n" - " 18: 9605 str r6, [sp, #20]\n" - " 1a: 46be mov lr, r7\n" - " 1c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 1e: 46c0 nop ; (mov r8, r8)\n" - " 20: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 22: 6d57 ldr r7, [r2, #84] ; 0x54\n" - " 24: b405 push {r0, r2}\n" - " 26: 9902 ldr r1, [sp, #8]\n" - " 28: 47b8 blx r7\n" - " 2a: 4607 mov r7, r0\n" - " 2c: bc05 pop {r0, r2}\n" - " 2e: 4287 cmp r7, r0\n" - " 30: d001 beq.n 0x36\n" - " 32: 4638 mov r0, r7\n" - " 34: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 36: 2784 movs r7, #132 ; 0x84\n" - " 38: 59d7 ldr r7, [r2, r7]\n" - " 3a: b405 push {r0, r2}\n" - " 3c: 2102 movs r1, #2\n" - " 3e: 47b8 blx r7\n" - " 40: 4607 mov r7, r0\n" - " 42: bc05 pop {r0, r2}\n" - " 44: 2f00 cmp r7, #0\n" - " 46: d105 bne.n 0x54\n" - " 48: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" - " 4a: 222a movs r2, #42 ; 0x2a\n" - " 4c: 9e05 ldr r6, [sp, #20]\n" - " 4e: 9705 str r7, [sp, #20]\n" - " 50: 46b6 mov lr, r6\n" - " 52: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 14: 463a mov r2, r7\n" + " 16: 232a movs r3, #42 ; 0x2a\n" + " 18: 9f05 ldr r7, [sp, #20]\n" + " 1a: 9605 str r6, [sp, #20]\n" + " 1c: 46be mov lr, r7\n" + " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 20: 46c0 nop ; (mov r8, r8)\n" + " 22: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 24: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 26: b405 push {r0, r2}\n" + " 28: 9902 ldr r1, [sp, #8]\n" + " 2a: 47b8 blx r7\n" + " 2c: 4607 mov r7, r0\n" + " 2e: bc05 pop {r0, r2}\n" + " 30: 4287 cmp r7, r0\n" + " 32: d001 beq.n 0x38\n" + " 34: 4638 mov r0, r7\n" + " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 38: 2784 movs r7, #132 ; 0x84\n" + " 3a: 59d7 ldr r7, [r2, r7]\n" + " 3c: b405 push {r0, r2}\n" + " 3e: 2102 movs r1, #2\n" + " 40: 47b8 blx r7\n" + " 42: 4607 mov r7, r0\n" + " 44: bc05 pop {r0, r2}\n" + " 46: 2f00 cmp r7, #0\n" + " 48: d105 bne.n 0x56\n" + " 4a: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" + " 4c: 222a movs r2, #42 ; 0x2a\n" + " 4e: 9e05 ldr r6, [sp, #20]\n" + " 50: 9705 str r7, [sp, #20]\n" + " 52: 46b6 mov lr, r6\n" + " 54: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). From ab38306c121a5b4c9d853a880ca9cce977835455 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 1 Sep 2025 21:39:21 +0200 Subject: [PATCH 43/97] armv6m: fix function pointer calculations, first tests pass Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 50 +-- tests/libs/jit/jit_armv6m_tests.erl | 484 +++++++++++++++++++--------- 2 files changed, 358 insertions(+), 176 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 26b64308bd..31ea0fb605 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1481,13 +1481,13 @@ set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5], StackOffset) -> % Handle 5th argument on stack first (with alignment) - this may free registers State1 = set_args_push_stack(State, Arg5, undefined), % Then set up first 4 arguments in registers using existing logic - set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset); + set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset + 8); % Handle 6 parameters: handle 5th and 6th on stack first, then first 4 in registers r0-r3 set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5, Arg6], StackOffset) -> % Handle 5th and 6th arguments on stack first (no alignment needed) - this may free registers State1 = set_args_push_stack(State, Arg5, Arg6), % Then set up first 4 arguments in registers using existing logic - set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset); + set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset + 8); % Handle up to 4 parameters: all in registers r0-r3 set_args(State, Args, StackOffset) when length(Args) =< 4 -> set_args_registers_only(State, Args, StackOffset). @@ -2581,9 +2581,10 @@ decrement_reductions_and_maybe_schedule_next( % Set continuation to the next instruction ADROffset = BNEOffset + byte_size(I4), I5 = jit_armv6m_asm:adr(Temp, 4), - I6 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), + I6 = jit_armv6m_asm:adds(Temp, Temp, 1), + I7 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), % Append the instructions to the stream - Stream2 = StreamModule:append(Stream1, <>), + Stream2 = StreamModule:append(Stream1, <>), State1 = State0#state{stream = Stream2}, State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), % Add the prolog at the continuation point (where scheduled execution resumes) @@ -2731,37 +2732,46 @@ rewrite_cp_offset( RewriteOffset, TempReg ) -> - NewOffset = StreamModule:offset(Stream0) - CodeOffset, - OffsetImm = NewOffset bsl 2, + CurrentOffset = StreamModule:offset(Stream0), + AlignedOffset = (CurrentOffset + 3) band (bnot 3), + PaddingSize = AlignedOffset - CurrentOffset, + % Execution should resume at an aligned offset + + Delta0 = AlignedOffset - CodeOffset, + OffsetImm0 = Delta0 bsl 2, % Check if offset fits in movs immediate (0-255) {NewMoveInstr, Stream1} = if - OffsetImm =< 255 -> - {jit_armv6m_asm:movs(TempReg, OffsetImm), Stream0}; + OffsetImm0 =< 255 -> + PaddedStream = + if + PaddingSize > 0 -> + StreamModule:append(Stream0, <<0:16>>); + true -> + Stream0 + end, + {jit_armv6m_asm:movs(TempReg, OffsetImm0), PaddedStream}; true -> % Need to emit literal pool with proper alignment - CurrentOffset = StreamModule:offset(Stream0), - % Ensure 4-byte alignment for literal pool - AlignedOffset = (CurrentOffset + 3) band (bnot 3), - PaddingSize = AlignedOffset - CurrentOffset, - Padding = <<0:(PaddingSize * 8)>>, - - % Emit the 32-bit literal - Literal = <>, + Delta1 = Delta0 + 4, + OffsetImm1 = Delta1 bsl 2, + % Emit the 32-bit literal to point to position after + % the pool StreamWithLiteral = StreamModule:append( - StreamModule:append(Stream0, Padding), Literal + Stream0, <<0:(PaddingSize * 8), OffsetImm1:32/little>> ), % Compute PC-relative offset for ldr instruction - % PC is (RewriteOffset + 4) aligned to 4-byte boundary, literal is at AlignedOffset - PCValue = (RewriteOffset + 4 + 3) band (bnot 3), + PCValue = (RewriteOffset + 4) band (bnot 3), PCRelOffset = AlignedOffset - PCValue, LdrInstr = jit_armv6m_asm:ldr(TempReg, {pc, PCRelOffset}), {LdrInstr, StreamWithLiteral} end, Stream2 = StreamModule:replace(Stream1, RewriteOffset, NewMoveInstr), - State0#state{stream = Stream2}. + Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + Stream3 = StreamModule:append(Stream2, Prolog), + State0#state{stream = Stream3}. set_bs( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 07222776eb..fd863028b0 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -130,7 +130,7 @@ call_primitive_6_args_test() -> " 10: 9601 str r6, [sp, #4]\n" " 12: 2608 movs r6, #8\n" " 14: 9600 str r6, [sp, #0]\n" - " 16: 9902 ldr r1, [sp, #8]\n" + " 16: 9904 ldr r1, [sp, #16]\n" " 18: 463a mov r2, r7\n" " 1a: 2340 movs r3, #64 ; 0x40\n" " 1c: 47a8 blx r5\n" @@ -192,27 +192,29 @@ call_ext_only_test() -> " 2: 68b7 ldr r7, [r6, #8]\n" " 4: 3f01 subs r7, #1\n" " 6: 60b7 str r7, [r6, #8]\n" - " 8: d107 bne.n 0x1a\n" - " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" - " c: 6077 str r7, [r6, #4]\n" - " e: 6897 ldr r7, [r2, #8]\n" - " 10: 9e05 ldr r6, [sp, #20]\n" - " 12: 9705 str r7, [sp, #20]\n" - " 14: 46b6 mov lr, r6\n" - " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 1a: 6917 ldr r7, [r2, #16]\n" - " 1c: 221c movs r2, #28\n" - " 1e: 2302 movs r3, #2\n" - " 20: 2502 movs r5, #2\n" - " 22: 4639 mov r1, r7\n" - " 24: 9f05 ldr r7, [sp, #20]\n" - " 26: 46be mov lr, r7\n" - " 28: 9f04 ldr r7, [sp, #16]\n" - " 2a: 9504 str r5, [sp, #16]\n" - " 2c: 9105 str r1, [sp, #20]\n" - " 2e: 9603 str r6, [sp, #12]\n" - " 30: bd72 pop {r1, r4, r5, r6, pc}" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6917 ldr r7, [r2, #16]\n" + " 20: 2220 movs r2, #32\n" + " 22: 2302 movs r3, #2\n" + " 24: 2502 movs r5, #2\n" + " 26: 4639 mov r1, r7\n" + " 28: 9f05 ldr r7, [sp, #20]\n" + " 2a: 46be mov lr, r7\n" + " 2c: 9f04 ldr r7, [sp, #16]\n" + " 2e: 9504 str r5, [sp, #16]\n" + " 30: 9105 str r1, [sp, #20]\n" + " 32: 9603 str r6, [sp, #12]\n" + " 34: bd72 pop {r1, r4, r5, r6, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -231,13 +233,13 @@ call_ext_only_unaligned_test() -> " 8: 60b7 str r7, [r6, #8]\n" " a: d108 bne.n 0x1e\n" " c: a703 add r7, pc, #12 ; (adr r7, 0x1c)\n" - " e: 6077 str r7, [r6, #4]\n" - " 10: 6897 ldr r7, [r2, #8]\n" - " 12: 9e05 ldr r6, [sp, #20]\n" - " 14: 9705 str r7, [sp, #20]\n" - " 16: 46b6 mov lr, r6\n" - " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 1a: 46c0 nop ; (mov r8, r8)\n" + " e: 3701 adds r7, #1\n" + " 10: 6077 str r7, [r6, #4]\n" + " 12: 6897 ldr r7, [r2, #8]\n" + " 14: 9e05 ldr r6, [sp, #20]\n" + " 16: 9705 str r7, [sp, #20]\n" + " 18: 46b6 mov lr, r6\n" + " 1a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 1e: 6917 ldr r7, [r2, #16]\n" " 20: 2220 movs r2, #32\n" @@ -292,27 +294,29 @@ call_ext_last_test() -> " 2: 68b7 ldr r7, [r6, #8]\n" " 4: 3f01 subs r7, #1\n" " 6: 60b7 str r7, [r6, #8]\n" - " 8: d107 bne.n 0x1a\n" - " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" - " c: 6077 str r7, [r6, #4]\n" - " e: 6897 ldr r7, [r2, #8]\n" - " 10: 9e05 ldr r6, [sp, #20]\n" - " 12: 9705 str r7, [sp, #20]\n" - " 14: 46b6 mov lr, r6\n" - " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 1a: 6917 ldr r7, [r2, #16]\n" - " 1c: 221c movs r2, #28\n" - " 1e: 2302 movs r3, #2\n" - " 20: 2502 movs r5, #2\n" - " 22: 4639 mov r1, r7\n" - " 24: 9f05 ldr r7, [sp, #20]\n" - " 26: 46be mov lr, r7\n" - " 28: 9f04 ldr r7, [sp, #16]\n" - " 2a: 9504 str r5, [sp, #16]\n" - " 2c: 9105 str r1, [sp, #20]\n" - " 2e: 9603 str r6, [sp, #12]\n" - " 30: bd72 pop {r1, r4, r5, r6, pc}" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6917 ldr r7, [r2, #16]\n" + " 20: 2220 movs r2, #32\n" + " 22: 2302 movs r3, #2\n" + " 24: 2502 movs r5, #2\n" + " 26: 4639 mov r1, r7\n" + " 28: 9f05 ldr r7, [sp, #20]\n" + " 2a: 46be mov lr, r7\n" + " 2c: 9f04 ldr r7, [sp, #16]\n" + " 2e: 9504 str r5, [sp, #16]\n" + " 30: 9105 str r1, [sp, #20]\n" + " 32: 9603 str r6, [sp, #12]\n" + " 34: bd72 pop {r1, r4, r5, r6, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1988,38 +1992,41 @@ call_ext_test() -> " 2: 68b7 ldr r7, [r6, #8]\n" " 4: 3f01 subs r7, #1\n" " 6: 60b7 str r7, [r6, #8]\n" - " 8: d107 bne.n 0x1a\n" - " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" - " c: 6077 str r7, [r6, #4]\n" - " e: 6897 ldr r7, [r2, #8]\n" - " 10: 9e05 ldr r6, [sp, #20]\n" - " 12: 9705 str r7, [sp, #20]\n" - " 14: 46b6 mov lr, r6\n" - " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 1a: 9e00 ldr r6, [sp, #0]\n" - " 1c: 6837 ldr r7, [r6, #0]\n" - " 1e: 683f ldr r7, [r7, #0]\n" - " 20: 063f lsls r7, r7, #24\n" - " 22: 4d07 ldr r5, [pc, #28] ; (0x40)\n" - " 24: 432f orrs r7, r5\n" - " 26: 65c7 str r7, [r0, #92] ; 0x5c\n" - " 28: 6917 ldr r7, [r2, #16]\n" - " 2a: 2202 movs r2, #2\n" - " 2c: 2305 movs r3, #5\n" - " 2e: 2501 movs r5, #1\n" - " 30: 426d negs r5, r5\n" - " 32: 4639 mov r1, r7\n" - " 34: 9f05 ldr r7, [sp, #20]\n" - " 36: 46be mov lr, r7\n" - " 38: 9f04 ldr r7, [sp, #16]\n" - " 3a: 9504 str r5, [sp, #16]\n" - " 3c: 9105 str r1, [sp, #20]\n" - " 3e: 9e03 ldr r6, [sp, #12]\n" - " 40: bd32 pop {r1, r4, r5, pc}\n" - " 42: 0000 movs r0, r0\n" - " 44: 0108 lsls r0, r1, #4\n" - " 46: 0000 movs r0, r0" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 9e00 ldr r6, [sp, #0]\n" + " 20: 6837 ldr r7, [r6, #0]\n" + " 22: 683f ldr r7, [r7, #0]\n" + " 24: 063f lsls r7, r7, #24\n" + " 26: 4d08 ldr r5, [pc, #32] ; (0x48)\n" + " 28: 432f orrs r7, r5\n" + " 2a: 65c7 str r7, [r0, #92] ; 0x5c\n" + " 2c: 6917 ldr r7, [r2, #16]\n" + " 2e: 2202 movs r2, #2\n" + " 30: 2305 movs r3, #5\n" + " 32: 2501 movs r5, #1\n" + " 34: 426d negs r5, r5\n" + " 36: 4639 mov r1, r7\n" + " 38: 9f05 ldr r7, [sp, #20]\n" + " 3a: 46be mov lr, r7\n" + " 3c: 9f04 ldr r7, [sp, #16]\n" + " 3e: 9504 str r5, [sp, #16]\n" + " 40: 9105 str r1, [sp, #20]\n" + " 42: 9e03 ldr r6, [sp, #12]\n" + " 44: bd32 pop {r1, r4, r5, pc}\n" + " 46: 0000 movs r0, r0\n" + " 48: 0130 lsls r0, r6, #4\n" + " 4a: 0000 movs r0, r0\n" + " 4c: b5f2 push {r1, r4, r5, r6, r7, lr}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2057,78 +2064,81 @@ call_fun_test() -> " 2: 68b7 ldr r7, [r6, #8]\n" " 4: 3f01 subs r7, #1\n" " 6: 60b7 str r7, [r6, #8]\n" - " 8: d107 bne.n 0x1a\n" - " a: a703 add r7, pc, #12 ; (adr r7, 0x18)\n" - " c: 6077 str r7, [r6, #4]\n" - " e: 6897 ldr r7, [r2, #8]\n" - " 10: 9e05 ldr r6, [sp, #20]\n" - " 12: 9705 str r7, [sp, #20]\n" - " 14: 46b6 mov lr, r6\n" - " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 18: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 1a: 6987 ldr r7, [r0, #24]\n" - " 1c: 463e mov r6, r7\n" - " 1e: 4635 mov r5, r6\n" - " 20: 2403 movs r4, #3\n" - " 22: 4025 ands r5, r4\n" - " 24: 2d02 cmp r5, #2\n" - " 26: d00e beq.n 0x46\n" - " 28: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" - " 2a: 222a movs r2, #42 ; 0x2a\n" - " 2c: 4b00 ldr r3, [pc, #0] ; (0x30)\n" - " 2e: e001 b.n 0x34\n" - " 30: 020b lsls r3, r1, #8\n" - " 32: 0000 movs r0, r0\n" - " 34: 4635 mov r5, r6\n" - " 36: 4639 mov r1, r7\n" - " 38: 9f05 ldr r7, [sp, #20]\n" - " 3a: 46be mov lr, r7\n" - " 3c: 9f04 ldr r7, [sp, #16]\n" - " 3e: 9504 str r5, [sp, #16]\n" - " 40: 9105 str r1, [sp, #20]\n" - " 42: 9e03 ldr r6, [sp, #12]\n" - " 44: bd32 pop {r1, r4, r5, pc}\n" - " 46: 2503 movs r5, #3\n" - " 48: 43ae bics r6, r5\n" - " 4a: 6836 ldr r6, [r6, #0]\n" - " 4c: 4635 mov r5, r6\n" - " 4e: 243f movs r4, #63 ; 0x3f\n" - " 50: 4025 ands r5, r4\n" - " 52: 2d14 cmp r5, #20\n" - " 54: d00f beq.n 0x76\n" - " 56: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" - " 58: 2258 movs r2, #88 ; 0x58\n" - " 5a: 4b01 ldr r3, [pc, #4] ; (0x60)\n" - " 5c: e002 b.n 0x64\n" - " 5e: 0000 movs r0, r0\n" - " 60: 020b lsls r3, r1, #8\n" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6987 ldr r7, [r0, #24]\n" + " 20: 463e mov r6, r7\n" + " 22: 4635 mov r5, r6\n" + " 24: 2403 movs r4, #3\n" + " 26: 4025 ands r5, r4\n" + " 28: 2d02 cmp r5, #2\n" + " 2a: d00e beq.n 0x4a\n" + " 2c: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 2e: 222e movs r2, #46 ; 0x2e\n" + " 30: 4b00 ldr r3, [pc, #0] ; (0x34)\n" + " 32: e001 b.n 0x38\n" + " 34: 020b lsls r3, r1, #8\n" + " 36: 0000 movs r0, r0\n" + " 38: 4635 mov r5, r6\n" + " 3a: 4639 mov r1, r7\n" + " 3c: 9f05 ldr r7, [sp, #20]\n" + " 3e: 46be mov lr, r7\n" + " 40: 9f04 ldr r7, [sp, #16]\n" + " 42: 9504 str r5, [sp, #16]\n" + " 44: 9105 str r1, [sp, #20]\n" + " 46: 9e03 ldr r6, [sp, #12]\n" + " 48: bd32 pop {r1, r4, r5, pc}\n" + " 4a: 2503 movs r5, #3\n" + " 4c: 43ae bics r6, r5\n" + " 4e: 6836 ldr r6, [r6, #0]\n" + " 50: 4635 mov r5, r6\n" + " 52: 243f movs r4, #63 ; 0x3f\n" + " 54: 4025 ands r5, r4\n" + " 56: 2d14 cmp r5, #20\n" + " 58: d00f beq.n 0x7a\n" + " 5a: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 5c: 225c movs r2, #92 ; 0x5c\n" + " 5e: 4b01 ldr r3, [pc, #4] ; (0x64)\n" + " 60: e002 b.n 0x68\n" " 62: 0000 movs r0, r0\n" - " 64: 4635 mov r5, r6\n" - " 66: 4639 mov r1, r7\n" - " 68: 9f05 ldr r7, [sp, #20]\n" - " 6a: 46be mov lr, r7\n" - " 6c: 9f04 ldr r7, [sp, #16]\n" - " 6e: 9504 str r5, [sp, #16]\n" - " 70: 9105 str r1, [sp, #20]\n" - " 72: 9e03 ldr r6, [sp, #12]\n" - " 74: bd32 pop {r1, r4, r5, pc}\n" - " 76: 9d00 ldr r5, [sp, #0]\n" - " 78: 682e ldr r6, [r5, #0]\n" - " 7a: 6836 ldr r6, [r6, #0]\n" - " 7c: 0636 lsls r6, r6, #24\n" - " 7e: 4c04 ldr r4, [pc, #16] ; (0x90)\n" - " 80: 4326 orrs r6, r4\n" - " 82: 65c6 str r6, [r0, #92] ; 0x5c\n" - " 84: 2680 movs r6, #128 ; 0x80\n" - " 86: 5996 ldr r6, [r2, r6]\n" - " 88: 463a mov r2, r7\n" - " 8a: 2300 movs r3, #0\n" - " 8c: 9f05 ldr r7, [sp, #20]\n" - " 8e: 9605 str r6, [sp, #20]\n" - " 90: 46be mov lr, r7\n" - " 92: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 94: 0250 lsls r0, r2, #9\n" - " 96: 0000 movs r0, r0" + " 64: 020b lsls r3, r1, #8\n" + " 66: 0000 movs r0, r0\n" + " 68: 4635 mov r5, r6\n" + " 6a: 4639 mov r1, r7\n" + " 6c: 9f05 ldr r7, [sp, #20]\n" + " 6e: 46be mov lr, r7\n" + " 70: 9f04 ldr r7, [sp, #16]\n" + " 72: 9504 str r5, [sp, #16]\n" + " 74: 9105 str r1, [sp, #20]\n" + " 76: 9e03 ldr r6, [sp, #12]\n" + " 78: bd32 pop {r1, r4, r5, pc}\n" + " 7a: 9d00 ldr r5, [sp, #0]\n" + " 7c: 682e ldr r6, [r5, #0]\n" + " 7e: 6836 ldr r6, [r6, #0]\n" + " 80: 0636 lsls r6, r6, #24\n" + " 82: 4c05 ldr r4, [pc, #20] ; (0x98)\n" + " 84: 4326 orrs r6, r4\n" + " 86: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 88: 2680 movs r6, #128 ; 0x80\n" + " 8a: 5996 ldr r6, [r2, r6]\n" + " 8c: 463a mov r2, r7\n" + " 8e: 2300 movs r3, #0\n" + " 90: 9f05 ldr r7, [sp, #20]\n" + " 92: 9605 str r6, [sp, #20]\n" + " 94: 46be mov lr, r7\n" + " 96: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 98: 0270 lsls r0, r6, #9\n" + " 9a: 0000 movs r0, r0\n" + " 9c: b5f2 push {r1, r4, r5, r6, r7, lr}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -3040,7 +3050,7 @@ call_func_ptr_register_exhaustion_test_() -> " e: b082 sub sp, #8\n" " 10: 2101 movs r1, #1\n" " 12: 9100 str r1, [sp, #0]\n" - " 14: 9906 ldr r1, [sp, #24]\n" + " 14: 9908 ldr r1, [sp, #32]\n" " 16: 461a mov r2, r3\n" " 18: 2303 movs r3, #3\n" " 1a: 47b0 blx r6\n" @@ -3068,7 +3078,7 @@ call_func_ptr_register_exhaustion_test_() -> " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" " e: b082 sub sp, #8\n" " 10: 9100 str r1, [sp, #0]\n" - " 12: 9906 ldr r1, [sp, #24]\n" + " 12: 9908 ldr r1, [sp, #32]\n" " 14: 461a mov r2, r3\n" " 16: 2301 movs r3, #1\n" " 18: 47b0 blx r6\n" @@ -3097,7 +3107,7 @@ call_func_ptr_register_exhaustion_test_() -> " e: b082 sub sp, #8\n" " 10: 2101 movs r1, #1\n" " 12: 9100 str r1, [sp, #0]\n" - " 14: 9906 ldr r1, [sp, #24]\n" + " 14: 9908 ldr r1, [sp, #32]\n" " 16: 461a mov r2, r3\n" " 18: 460b mov r3, r1\n" " 1a: 47b0 blx r6\n" @@ -3135,6 +3145,168 @@ call_func_ptr_register_exhaustion_test_() -> ] end}. +%% Mimic part of add.beam +add_beam_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}), + State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}), + State5 = ?BACKEND:call_only_or_schedule_next(State4, 2), + State6 = ?BACKEND:add_label(State5, 2), + {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [ + ctx, jit_state, 1, 0, 1 + ]), + State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + State10 = ?BACKEND:call_or_schedule_next(State9, 3), + State11 = ?BACKEND:add_label(State10, 3), + State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [ + ctx, jit_state + ]), + % OP_INT_CALL_END + State13 = ?BACKEND:add_label(State12, 0), + State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), + State15 = ?BACKEND:update_branches(State14), + Stream = ?BACKEND:stream(State15), + Dump = + << + % jump table + " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop ; (mov r8, r8)\n" + " 8: 00d8 lsls r0, r3, #3\n" + " a: 0000 movs r0, r0\n" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 001c movs r4, r3\n" + " 16: 0000 movs r0, r0\n" + " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" + " 20: 0044 lsls r4, r0, #1\n" + " 22: 0000 movs r0, r0\n" + " 24: 4b01 ldr r3, [pc, #4] ; (0x2c)\n" + " 26: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 28: 449f add pc, r3\n" + " 2a: 46c0 nop ; (mov r8, r8)\n" + " 2c: 00a8 lsls r0, r5, #2\n" + " 2e: 0000 movs r0, r0\n" + % label 1 + % {move,{integer,9},{x,1}}. + " 30: 279f movs r7, #159 ; 0x9f\n" + " 32: 61c7 str r7, [r0, #28]\n" + % {move,{integer,8},{x,0}} + " 34: 278f movs r7, #143 ; 0x8f\n" + " 36: 6187 str r7, [r0, #24]\n" + % {call_only,2,{f,2}}. + " 38: 9e00 ldr r6, [sp, #0]\n" + " 3a: 68b7 ldr r7, [r6, #8]\n" + " 3c: 3f01 subs r7, #1\n" + " 3e: 60b7 str r7, [r6, #8]\n" + " 40: d004 beq.n 0x4c\n" + " 42: e00f b.n 0x64\n" + " 44: 46c0 nop ; (mov r8, r8)\n" + " 46: 46c0 nop ; (mov r8, r8)\n" + " 48: 46c0 nop ; (mov r8, r8)\n" + " 4a: 46c0 nop ; (mov r8, r8)\n" + " 4c: 2643 movs r6, #67 ; 0x43\n" + " 4e: 4276 negs r6, r6\n" + " 50: a702 add r7, pc, #8 ; (adr r7, 0x5c)\n" + " 52: 19f6 adds r6, r6, r7\n" + " 54: 9f00 ldr r7, [sp, #0]\n" + " 56: 607e str r6, [r7, #4]\n" + " 58: 6897 ldr r7, [r2, #8]\n" + " 5a: 9e05 ldr r6, [sp, #20]\n" + " 5c: 9705 str r7, [sp, #20]\n" + " 5e: 46b6 mov lr, r6\n" + " 60: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 62: 46c0 nop ; (mov r8, r8)\n" + % label 2 + % {allocate,1,1}. + " 64: 6957 ldr r7, [r2, #20]\n" + " 66: b405 push {r0, r2}\n" + " 68: b082 sub sp, #8\n" + " 6a: 2601 movs r6, #1\n" + " 6c: 9600 str r6, [sp, #0]\n" + " 6e: 9904 ldr r1, [sp, #16]\n" + " 70: 2201 movs r2, #1\n" + " 72: 2300 movs r3, #0\n" + " 74: 47b8 blx r7\n" + " 76: 4607 mov r7, r0\n" + " 78: b002 add sp, #8\n" + " 7a: bc05 pop {r0, r2}\n" + " 7c: 07fe lsls r6, r7, #31\n" + " 7e: d405 bmi.n 0x8c\n" + " 80: 6997 ldr r7, [r2, #24]\n" + " 82: 2282 movs r2, #130 ; 0x82\n" + " 84: 9e05 ldr r6, [sp, #20]\n" + " 86: 9705 str r7, [sp, #20]\n" + " 88: 46b6 mov lr, r6\n" + " 8a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + % {init_yregs,{list,[{y,0}]}}. + %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + " 8c: 6947 ldr r7, [r0, #20]\n" + " 8e: 263b movs r6, #59 ; 0x3b\n" + " 90: 603e str r6, [r7, #0]\n" + % {call,1,{f,3}} + %% call_or_schedule_next(State9, 3), + " 92: 9e00 ldr r6, [sp, #0]\n" + " 94: 6837 ldr r7, [r6, #0]\n" + " 96: 683f ldr r7, [r7, #0]\n" + " 98: 063f lsls r7, r7, #24\n" + " 9a: 4d0c ldr r5, [pc, #48] ; (0xcc)\n" + " 9c: 432f orrs r7, r5\n" + " 9e: 65c7 str r7, [r0, #92] ; 0x5c\n" + " a0: 9d00 ldr r5, [sp, #0]\n" + " a2: 68af ldr r7, [r5, #8]\n" + " a4: 3f01 subs r7, #1\n" + " a6: 60af str r7, [r5, #8]\n" + " a8: d004 beq.n 0xb4\n" + " aa: e013 b.n 0xd4\n" + " ac: 46c0 nop ; (mov r8, r8)\n" + " ae: 46c0 nop ; (mov r8, r8)\n" + " b0: 46c0 nop ; (mov r8, r8)\n" + " b2: 46c0 nop ; (mov r8, r8)\n" + " b4: 259f movs r5, #159 ; 0x9f\n" + " b6: 426d negs r5, r5\n" + " b8: a702 add r7, pc, #8 ; (adr r7, 0xc4)\n" + " ba: 19ed adds r5, r5, r7\n" + " bc: 9f00 ldr r7, [sp, #0]\n" + " be: 607d str r5, [r7, #4]\n" + " c0: 6897 ldr r7, [r2, #8]\n" + " c2: 9e05 ldr r6, [sp, #20]\n" + " c4: 9705 str r7, [sp, #20]\n" + " c6: 46b6 mov lr, r6\n" + " c8: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " ca: 0000 movs r0, r0\n" + "cc: 0340 lsls r0, r0, #13\n" + " ce: 0000 movs r0, r0\n" + %% (continuation) + " d0: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " d2: 46c0 nop ; (mov r8, r8)\n" + % label 3 + " d4: 6857 ldr r7, [r2, #4]\n" + " d6: 9e05 ldr r6, [sp, #20]\n" + " d8: 9705 str r7, [sp, #20]\n" + " da: 46b6 mov lr, r6\n" + " dc: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " de: 46c0 nop ; (mov r8, r8)\n" + % label 0 + " e0: 6857 ldr r7, [r2, #4]\n" + " e2: 9e05 ldr r6, [sp, #20]\n" + " e4: 9705 str r7, [sp, #20]\n" + " e6: 46b6 mov lr, r6\n" + " e8: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> dump_to_bin0(Tail, hex, Acc); dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> From 053dc986a02dac2fa2d9e4c1bfe68ad5e66bb425 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 2 Sep 2025 07:37:35 +0200 Subject: [PATCH 44/97] armv6m: fix tail call that shouldn't be a tail call We can't tail call if we're calling a function with 5+ args on armv6m because additional parameters are passed on the stack and therefore the signature is too different from the native entry point Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 102 ++--------- tests/libs/jit/jit_armv6m_tests.erl | 257 ++++++++++++++-------------- 2 files changed, 144 insertions(+), 215 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 31ea0fb605..e7807245c7 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -559,55 +559,33 @@ call_primitive_last( UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), - % Assert that jit_state is the second argument for tail_call_with_jit_state - [FirstArg, jit_state | ArgsT] = Args, - ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], - % Handle arguments differently for tail calls with 5+ arguments + % Handle arguments differently for 5+ arguments - use direct call without register preservation case length(Args) of NumArgs when NumArgs >= 5 -> - % For tail calls with 5+ args, set first 4 args in registers without stack allocation - State1 = set_args_registers_only( + % For 5+ args, call directly without preserving registers since we return immediately + State1 = set_args( State0#state{ stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs }, - lists:sublist(ArgsForTailCall, 4), + Args, 0 ), - % 5th argument needs to be moved to r5 - FifthArg = lists:nth(5, ArgsForTailCall), - State2 = - case FifthArg of - % Already in r5 - {free, r5} -> - State1; - % Already in r5 - r5 -> - State1; - % Handle {free, Reg} - extract the register and move to r5 - {free, Reg} -> - move_to_native_register(State1, Reg, r5); - _ -> - % Move 5th argument to r5 - move_to_native_register(State1, FifthArg, r5) - end, - % Move function pointer to r1 if it's not already in r1 - #state{stream = Stream2} = State2, - {FinalFuncPtrReg, Stream3} = - case Temp of - % Already in r1, no move needed - r1 -> - {r1, Stream2}; - _ -> - % Move from Temp register to r1 - MoveToR1 = jit_armv6m_asm:mov(r1, Temp), - {r1, StreamModule:append(Stream2, MoveToR1)} - end, - State3 = tail_call_with_jit_state_stack( - State2#state{stream = Stream3}, FinalFuncPtrReg, NumArgs - ); + #state{stream = Stream2} = State1, + % Call the function pointer directly + Call = jit_armv6m_asm:blx(Temp), + Stream3 = StreamModule:append(Stream2, Call), + % Deallocate stack space that was allocated for 5+ arguments + DeallocateArgs = jit_armv6m_asm:add(sp, sp, 8), + Stream4 = StreamModule:append(Stream3, DeallocateArgs), + % Return: pop prolog registers and return + PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + Stream5 = StreamModule:append(Stream4, PopCode), + State3 = State1#state{stream = Stream5}; _ -> - % For 4 or fewer args, use standard argument setup + % For 4 or fewer args, use tail call + [FirstArg, jit_state | ArgsT] = Args, + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], State1 = set_args( State0#state{ stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs @@ -657,50 +635,6 @@ tail_call_with_jit_state_registers_only( Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. -tail_call_with_jit_state_stack( - #state{ - stream_module = StreamModule, - stream = Stream0 - } = State, - FuncPtrReg, - NumArgs -) when NumArgs >= 5 -> - % Tail call with 5 or 6 arguments - need to handle 5th (and 6th) stack parameters - % 5th argument is in r5, 6th argument (if present) is in r6, function pointer in FuncPtrReg - % Restore lr first (using r7 as temp since r6 might contain 6th arg), then r7, then r6 - - % Load lr value to r7 (temp) - LoadLRtoR7 = jit_armv6m_asm:ldr(r7, {sp, 20}), - % Move to lr - MoveLR = jit_armv6m_asm:mov(lr, r7), - % Restore r7 from stack - RestoreR7 = jit_armv6m_asm:ldr(r7, {sp, 16}), - % Store 5th arg where r7 was - Store5thArg = jit_armv6m_asm:str(r5, {sp, 16}), - % Store function ptr where lr was - StoreFuncPtr = jit_armv6m_asm:str(FuncPtrReg, {sp, 20}), - - % Handle 6th argument if present (NumArgs == 6) - {Store6thArg, RestoreR6, PopAndJump} = - case NumArgs of - 5 -> - % For 5 args: restore r6 from stack, pop r1,r4,r5,pc - RestoreR6_5 = jit_armv6m_asm:ldr(r6, {sp, 12}), - PopAndJump_5 = jit_armv6m_asm:pop([r1, r4, r5, pc]), - {<<>>, RestoreR6_5, PopAndJump_5}; - 6 -> - % For 6 args: store r6 (6th arg) where r6 was saved, pop r1,r4,r5,r6,pc - Store6thArg_6 = jit_armv6m_asm:str(r6, {sp, 12}), - PopAndJump_6 = jit_armv6m_asm:pop([r1, r4, r5, r6, pc]), - {Store6thArg_6, <<>>, PopAndJump_6} - end, - - Code = - <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1}. - %%----------------------------------------------------------------------------- %% @doc Emit a return of a value if it's not equal to ctx. %% This logic is used to break out to the scheduler, typically after signal diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index fd863028b0..4b33cdf98d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -91,17 +91,15 @@ call_primitive_5_args_test() -> Dump = << " 0: 6957 ldr r7, [r2, #20]\n" - " 2: 2210 movs r2, #16\n" - " 4: 2320 movs r3, #32\n" - " 6: 2502 movs r5, #2\n" - " 8: 4639 mov r1, r7\n" - " a: 9f05 ldr r7, [sp, #20]\n" - " c: 46be mov lr, r7\n" - " e: 9f04 ldr r7, [sp, #16]\n" - " 10: 9504 str r5, [sp, #16]\n" - " 12: 9105 str r1, [sp, #20]\n" - " 14: 9e03 ldr r6, [sp, #12]\n" - " 16: bd32 pop {r1, r4, r5, pc}" + " 2: b082 sub sp, #8\n" + " 4: 2602 movs r6, #2\n" + " 6: 9600 str r6, [sp, #0]\n" + " 8: 9902 ldr r1, [sp, #8]\n" + " a: 2210 movs r2, #16\n" + " c: 2320 movs r3, #32\n" + " e: 47b8 blx r7\n" + " 10: b002 add sp, #8\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -204,17 +202,18 @@ call_ext_only_test() -> " 1a: 46c0 nop ; (mov r8, r8)\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 1e: 6917 ldr r7, [r2, #16]\n" - " 20: 2220 movs r2, #32\n" - " 22: 2302 movs r3, #2\n" - " 24: 2502 movs r5, #2\n" - " 26: 4639 mov r1, r7\n" - " 28: 9f05 ldr r7, [sp, #20]\n" - " 2a: 46be mov lr, r7\n" - " 2c: 9f04 ldr r7, [sp, #16]\n" - " 2e: 9504 str r5, [sp, #16]\n" - " 30: 9105 str r1, [sp, #20]\n" - " 32: 9603 str r6, [sp, #12]\n" - " 34: bd72 pop {r1, r4, r5, r6, pc}" + " 20: b082 sub sp, #8\n" + " 22: 2601 movs r6, #1\n" + " 24: 4276 negs r6, r6\n" + " 26: 9601 str r6, [sp, #4]\n" + " 28: 2602 movs r6, #2\n" + " 2a: 9600 str r6, [sp, #0]\n" + " 2c: 9902 ldr r1, [sp, #8]\n" + " 2e: 222c movs r2, #44 ; 0x2c\n" + " 30: 2302 movs r3, #2\n" + " 32: 47b8 blx r7\n" + " 34: b002 add sp, #8\n" + " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -226,7 +225,9 @@ call_ext_only_unaligned_test() -> State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), Stream = ?BACKEND:stream(State3), Dump = << + % State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), " 0: 6019 str r1, [r3, #0]\n" + % State2 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State1), " 2: 9e00 ldr r6, [sp, #0]\n" " 4: 68b7 ldr r7, [r6, #8]\n" " 6: 3f01 subs r7, #1\n" @@ -241,18 +242,20 @@ call_ext_only_unaligned_test() -> " 18: 46b6 mov lr, r6\n" " 1a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + % State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), " 1e: 6917 ldr r7, [r2, #16]\n" - " 20: 2220 movs r2, #32\n" - " 22: 2302 movs r3, #2\n" - " 24: 2502 movs r5, #2\n" - " 26: 4639 mov r1, r7\n" - " 28: 9f05 ldr r7, [sp, #20]\n" - " 2a: 46be mov lr, r7\n" - " 2c: 9f04 ldr r7, [sp, #16]\n" - " 2e: 9504 str r5, [sp, #16]\n" - " 30: 9105 str r1, [sp, #20]\n" - " 32: 9603 str r6, [sp, #12]\n" - " 34: bd72 pop {r1, r4, r5, r6, pc}" + " 20: b082 sub sp, #8\n" + " 22: 2601 movs r6, #1\n" + " 24: 4276 negs r6, r6\n" + " 26: 9601 str r6, [sp, #4]\n" + " 28: 2602 movs r6, #2\n" + " 2a: 9600 str r6, [sp, #0]\n" + " 2c: 9902 ldr r1, [sp, #8]\n" + " 2e: 222c movs r2, #44 ; 0x2c\n" + " 30: 2302 movs r3, #2\n" + " 32: 47b8 blx r7\n" + " 34: b002 add sp, #8\n" + " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -264,23 +267,21 @@ call_primitive_last_5_args_test() -> ]), Stream = ?BACKEND:stream(State2), Dump = << + % {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), " 0: 6987 ldr r7, [r0, #24]\n" + % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [... " 2: 6cd6 ldr r6, [r2, #76] ; 0x4c\n" - " 4: 2204 movs r2, #4\n" - " 6: 4b01 ldr r3, [pc, #4] ; (0xc)\n" - " 8: e002 b.n 0x10\n" - " a: 0000 movs r0, r0\n" - " c: 080b lsrs r3, r1, #32\n" - " e: 0000 movs r0, r0\n" - " 10: 463d mov r5, r7\n" - " 12: 4631 mov r1, r6\n" - " 14: 9f05 ldr r7, [sp, #20]\n" - " 16: 46be mov lr, r7\n" - " 18: 9f04 ldr r7, [sp, #16]\n" - " 1a: 9504 str r5, [sp, #16]\n" - " 1c: 9105 str r1, [sp, #20]\n" - " 1e: 9e03 ldr r6, [sp, #12]\n" - " 20: bd32 pop {r1, r4, r5, pc}" + " 4: b082 sub sp, #8\n" + " 6: 9700 str r7, [sp, #0]\n" + " 8: 9902 ldr r1, [sp, #8]\n" + " a: 2208 movs r2, #8\n" + " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" + " e: e001 b.n 0x14\n" + " 10: 02cb lsrs r3, r1, #16\n" + " 12: 0000 movs r0, r0\n" + " 14: 47b0 blx r6\n" + " 16: b002 add sp, #8\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -290,6 +291,7 @@ call_ext_last_test() -> State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), Stream = ?BACKEND:stream(State2), Dump = << + % State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), " 0: 9e00 ldr r6, [sp, #0]\n" " 2: 68b7 ldr r7, [r6, #8]\n" " 4: 3f01 subs r7, #1\n" @@ -305,18 +307,19 @@ call_ext_last_test() -> " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 1a: 46c0 nop ; (mov r8, r8)\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), " 1e: 6917 ldr r7, [r2, #16]\n" - " 20: 2220 movs r2, #32\n" - " 22: 2302 movs r3, #2\n" - " 24: 2502 movs r5, #2\n" - " 26: 4639 mov r1, r7\n" - " 28: 9f05 ldr r7, [sp, #20]\n" - " 2a: 46be mov lr, r7\n" - " 2c: 9f04 ldr r7, [sp, #16]\n" - " 2e: 9504 str r5, [sp, #16]\n" - " 30: 9105 str r1, [sp, #20]\n" - " 32: 9603 str r6, [sp, #12]\n" - " 34: bd72 pop {r1, r4, r5, r6, pc}" + " 20: b082 sub sp, #8\n" + " 22: 260a movs r6, #10\n" + " 24: 9601 str r6, [sp, #4]\n" + " 26: 2602 movs r6, #2\n" + " 28: 9600 str r6, [sp, #0]\n" + " 2a: 9902 ldr r1, [sp, #8]\n" + " 2c: 222a movs r2, #42 ; 0x2a\n" + " 2e: 2302 movs r3, #2\n" + " 30: 47b8 blx r7\n" + " 32: b002 add sp, #8\n" + " 34: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2007,26 +2010,24 @@ call_ext_test() -> " 20: 6837 ldr r7, [r6, #0]\n" " 22: 683f ldr r7, [r7, #0]\n" " 24: 063f lsls r7, r7, #24\n" - " 26: 4d08 ldr r5, [pc, #32] ; (0x48)\n" + " 26: 4d07 ldr r5, [pc, #28] ; (0x44)\n" " 28: 432f orrs r7, r5\n" " 2a: 65c7 str r7, [r0, #92] ; 0x5c\n" " 2c: 6917 ldr r7, [r2, #16]\n" - " 2e: 2202 movs r2, #2\n" - " 30: 2305 movs r3, #5\n" - " 32: 2501 movs r5, #1\n" - " 34: 426d negs r5, r5\n" - " 36: 4639 mov r1, r7\n" - " 38: 9f05 ldr r7, [sp, #20]\n" - " 3a: 46be mov lr, r7\n" - " 3c: 9f04 ldr r7, [sp, #16]\n" - " 3e: 9504 str r5, [sp, #16]\n" - " 40: 9105 str r1, [sp, #20]\n" - " 42: 9e03 ldr r6, [sp, #12]\n" - " 44: bd32 pop {r1, r4, r5, pc}\n" + " 2e: b082 sub sp, #8\n" + " 30: 2601 movs r6, #1\n" + " 32: 4276 negs r6, r6\n" + " 34: 9600 str r6, [sp, #0]\n" + " 36: 9902 ldr r1, [sp, #8]\n" + " 38: 2202 movs r2, #2\n" + " 3a: 2305 movs r3, #5\n" + " 3c: 47b8 blx r7\n" + " 3e: b002 add sp, #8\n" + " 40: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 42: 0000 movs r0, r0\n" + " 44: 0120 lsls r0, r4, #4\n" " 46: 0000 movs r0, r0\n" - " 48: 0130 lsls r0, r6, #4\n" - " 4a: 0000 movs r0, r0\n" - " 4c: b5f2 push {r1, r4, r5, r6, r7, lr}" + " 48: b5f2 push {r1, r4, r5, r6, r7, lr}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2081,64 +2082,58 @@ call_fun_test() -> " 24: 2403 movs r4, #3\n" " 26: 4025 ands r5, r4\n" " 28: 2d02 cmp r5, #2\n" - " 2a: d00e beq.n 0x4a\n" + " 2a: d00c beq.n 0x46\n" " 2c: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" - " 2e: 222e movs r2, #46 ; 0x2e\n" - " 30: 4b00 ldr r3, [pc, #0] ; (0x34)\n" - " 32: e001 b.n 0x38\n" - " 34: 020b lsls r3, r1, #8\n" - " 36: 0000 movs r0, r0\n" - " 38: 4635 mov r5, r6\n" - " 3a: 4639 mov r1, r7\n" - " 3c: 9f05 ldr r7, [sp, #20]\n" - " 3e: 46be mov lr, r7\n" - " 40: 9f04 ldr r7, [sp, #16]\n" - " 42: 9504 str r5, [sp, #16]\n" - " 44: 9105 str r1, [sp, #20]\n" - " 46: 9e03 ldr r6, [sp, #12]\n" - " 48: bd32 pop {r1, r4, r5, pc}\n" - " 4a: 2503 movs r5, #3\n" - " 4c: 43ae bics r6, r5\n" - " 4e: 6836 ldr r6, [r6, #0]\n" - " 50: 4635 mov r5, r6\n" - " 52: 243f movs r4, #63 ; 0x3f\n" - " 54: 4025 ands r5, r4\n" - " 56: 2d14 cmp r5, #20\n" - " 58: d00f beq.n 0x7a\n" - " 5a: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" - " 5c: 225c movs r2, #92 ; 0x5c\n" - " 5e: 4b01 ldr r3, [pc, #4] ; (0x64)\n" - " 60: e002 b.n 0x68\n" - " 62: 0000 movs r0, r0\n" - " 64: 020b lsls r3, r1, #8\n" + " 2e: b082 sub sp, #8\n" + " 30: 9600 str r6, [sp, #0]\n" + " 32: 9902 ldr r1, [sp, #8]\n" + " 34: 2232 movs r2, #50 ; 0x32\n" + " 36: 4b01 ldr r3, [pc, #4] ; (0x3c)\n" + " 38: e002 b.n 0x40\n" + " 3a: 0000 movs r0, r0\n" + " 3c: 018b lsls r3, r1, #6\n" + " 3e: 0000 movs r0, r0\n" + " 40: 47b8 blx r7\n" + " 42: b002 add sp, #8\n" + " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 46: 2503 movs r5, #3\n" + " 48: 43ae bics r6, r5\n" + " 4a: 6836 ldr r6, [r6, #0]\n" + " 4c: 4635 mov r5, r6\n" + " 4e: 243f movs r4, #63 ; 0x3f\n" + " 50: 4025 ands r5, r4\n" + " 52: 2d14 cmp r5, #20\n" + " 54: d00b beq.n 0x6e\n" + " 56: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 58: b082 sub sp, #8\n" + " 5a: 9600 str r6, [sp, #0]\n" + " 5c: 9902 ldr r1, [sp, #8]\n" + " 5e: 225c movs r2, #92 ; 0x5c\n" + " 60: 4b00 ldr r3, [pc, #0] ; (0x64)\n" + " 62: e001 b.n 0x68\n" + " 64: 018b lsls r3, r1, #6\n" " 66: 0000 movs r0, r0\n" - " 68: 4635 mov r5, r6\n" - " 6a: 4639 mov r1, r7\n" - " 6c: 9f05 ldr r7, [sp, #20]\n" - " 6e: 46be mov lr, r7\n" - " 70: 9f04 ldr r7, [sp, #16]\n" - " 72: 9504 str r5, [sp, #16]\n" - " 74: 9105 str r1, [sp, #20]\n" - " 76: 9e03 ldr r6, [sp, #12]\n" - " 78: bd32 pop {r1, r4, r5, pc}\n" - " 7a: 9d00 ldr r5, [sp, #0]\n" - " 7c: 682e ldr r6, [r5, #0]\n" - " 7e: 6836 ldr r6, [r6, #0]\n" - " 80: 0636 lsls r6, r6, #24\n" - " 82: 4c05 ldr r4, [pc, #20] ; (0x98)\n" - " 84: 4326 orrs r6, r4\n" - " 86: 65c6 str r6, [r0, #92] ; 0x5c\n" - " 88: 2680 movs r6, #128 ; 0x80\n" - " 8a: 5996 ldr r6, [r2, r6]\n" - " 8c: 463a mov r2, r7\n" - " 8e: 2300 movs r3, #0\n" - " 90: 9f05 ldr r7, [sp, #20]\n" - " 92: 9605 str r6, [sp, #20]\n" - " 94: 46be mov lr, r7\n" - " 96: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 98: 0270 lsls r0, r6, #9\n" - " 9a: 0000 movs r0, r0\n" - " 9c: b5f2 push {r1, r4, r5, r6, r7, lr}" + " 68: 47b8 blx r7\n" + " 6a: b002 add sp, #8\n" + " 6c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 6e: 9d00 ldr r5, [sp, #0]\n" + " 70: 682e ldr r6, [r5, #0]\n" + " 72: 6836 ldr r6, [r6, #0]\n" + " 74: 0636 lsls r6, r6, #24\n" + " 76: 4c05 ldr r4, [pc, #20] ; (0x8c)\n" + " 78: 4326 orrs r6, r4\n" + " 7a: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 7c: 2680 movs r6, #128 ; 0x80\n" + " 7e: 5996 ldr r6, [r2, r6]\n" + " 80: 463a mov r2, r7\n" + " 82: 2300 movs r3, #0\n" + " 84: 9f05 ldr r7, [sp, #20]\n" + " 86: 9605 str r6, [sp, #20]\n" + " 88: 46be mov lr, r7\n" + " 8a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 8c: 0240 lsls r0, r0, #9\n" + " 8e: 0000 movs r0, r0\n" + " 90: b5f2 push {r1, r4, r5, r6, r7, lr}" >>, ?assertEqual(dump_to_bin(Dump), Stream). From a31a0ba267502aace6bb2267a03c440add269029 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 2 Sep 2025 22:10:45 +0200 Subject: [PATCH 45/97] armv6m: fix return lines and labels function Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 2 +- tests/libs/jit/jit_armv6m_tests.erl | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index e7807245c7..25711be0a9 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2751,7 +2751,7 @@ return_labels_and_lines( % Unaligned - use offset 8 with 2-byte padding {jit_armv6m_asm:adr(r0, 8), <<0:16>>} end, - I2 = jit_armv6m_asm:bx(lr), + I2 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, LinesTable = <<<> || {Line, Offset} <- SortedLines>>, Stream1 = StreamModule:append( diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 4b33cdf98d..d274b2da76 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1884,16 +1884,16 @@ return_labels_and_lines_test() -> State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), Stream = ?BACKEND:stream(State3), - % Should have generated adr + bx lr + labels table + lines table - % adr = 4 bytes, bx = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Should have generated adr + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table + % adr = 4 bytes, pop = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes % Total minimum: 30 bytes ?assert(byte_size(Stream) >= 30), - % Expected: adr r0, + bx lr + labels table + lines table + % Expected: adr r0, + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table % The data tables start at offset 4, so adr should be adr r0, 4 not adr r0, 8 Dump = << " 0: a000 add r0, pc, #0 ; (adr r0, 0x4)\n" - " 2: 4770 bx lr\n" + " 2: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 4: 0200 lsls r0, r0, #8\n" " 6: 0100 lsls r0, r0, #4\n" " 8: 0000 movs r0, r0\n" @@ -1933,7 +1933,7 @@ return_labels_and_lines_unaligned_test() -> Dump = << " 0: 4770 bx lr\n" "2: a001 add r0, pc, #4 ; (adr r0, 0x8)\n" - "4: 4770 bx lr\n" + "4: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" "6: 0000 movs r0, r0\n" "8: 0200 lsls r0, r0, #8\n" "a: 0100 lsls r0, r0, #4\n" From bee611819b01d6643b3d83f4b581c73262f3e954 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 2 Sep 2025 23:54:00 +0200 Subject: [PATCH 46/97] armv6m: handle register exhaustion Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 82 ++++++++++++++++++++++------- tests/libs/jit/jit_armv6m_tests.erl | 37 +++++++++++-- 2 files changed, 96 insertions(+), 23 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 25711be0a9..fff18286fd 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -529,7 +529,13 @@ call_primitive( available_regs = RestRegs, used_regs = [TempReg | UsedRegs] }, - call_func_ptr(StateCall, {free, TempReg}, Args). + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = []} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). %%----------------------------------------------------------------------------- %% @doc Emit a jump (call without return) to a primitive with arguments. This @@ -1291,7 +1297,7 @@ shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, %% @param Args arguments to pass to the function %% @return Updated backend state and return register %%----------------------------------------------------------------------------- --spec call_func_ptr(state(), {free, armv6m_register()}, [arg()]) -> +-spec call_func_ptr(state(), {free, armv6m_register()} | {primitive, non_neg_integer()}, [arg()]) -> {state(), armv6m_register()}. call_func_ptr( #state{ @@ -1300,7 +1306,7 @@ call_func_ptr( available_regs = AvailableRegs0, used_regs = UsedRegs0 } = State0, - {free, FuncPtrReg}, + FuncPtrTuple, Args ) -> FreeRegs = lists:flatmap( @@ -1309,7 +1315,7 @@ call_func_ptr( ({free, Reg}) when is_atom(Reg) -> [Reg]; (_) -> [] end, - [{free, FuncPtrReg} | Args] + [FuncPtrTuple | Args] ), UsedRegs1 = UsedRegs0 -- FreeRegs, SavedRegsBase = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], @@ -1325,6 +1331,7 @@ call_func_ptr( [PaddingReg | _] = AvailableRegs1, SavedRegsBase ++ [PaddingReg]; _ -> + PaddingReg = undefined, SavedRegsBase end, @@ -1342,36 +1349,76 @@ call_func_ptr( end, Args ), - SavedRegsForTemps = SavedRegs -- [?CTX_REG, ?NATIVE_INTERFACE_REG, FuncPtrReg] -- ArgsRegs, - State1 = set_args( - State0#state{stream = Stream1, available_regs = SavedRegsForTemps ++ AvailableRegs0}, - Args, - length(SavedRegs) * 4 - ), - #state{stream = Stream2} = State1, + SavedRegsForTemps0 = SavedRegs -- [?CTX_REG, ?NATIVE_INTERFACE_REG] -- ArgsRegs, + ParameterRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + {State1, FuncPtrReg} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must swap it with a free reg. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + [FuncPtrReg1 | _] = SavedRegsForTemps0 -- ArgsRegs, + MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), + SavedRegsForTemps1 = SavedRegsForTemps0 -- [FuncPtrReg1], + { + State0#state{ + stream = StreamModule:append(Stream1, MovInstr), + available_regs = + SavedRegsForTemps1 ++ [FuncPtrReg0] ++ AvailableRegs0 + }, + FuncPtrReg1 + }; + false -> + SavedRegsForTemps1 = SavedRegsForTemps0 -- [FuncPtrReg0], + { + State0#state{ + stream = Stream1, + available_regs = SavedRegsForTemps1 ++ AvailableRegs0 + }, + FuncPtrReg0 + } + end; + {primitive, Primitive} -> + [FuncPtrReg0 | _] = + ((SavedRegsForTemps0 ++ AvailableRegs0) -- ArgsRegs) -- ParameterRegs, + SetArgsAvailableRegs = SavedRegsForTemps0 ++ AvailableRegs0 -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(Stream1, PrepCall), + {State0#state{stream = Stream2, available_regs = SetArgsAvailableRegs}, FuncPtrReg0} + end, + + State2 = set_args(State1, Args, length(SavedRegs) * 4), + #state{stream = Stream3} = State2, % Call the function pointer (using BLX for call with return) Call = jit_armv6m_asm:blx(FuncPtrReg), - Stream4 = StreamModule:append(Stream2, Call), + Stream4 = StreamModule:append(Stream3, Call), % For result, we need a free register (including FuncPtrReg) but ideally % not the one used for padding. If none are available (all 8 registers % were pushed to the stack), we write the result to the stack position % of FuncPtrReg - Stream5 = + {Stream5, UsedRegs2} = case length(SavedRegs) of - 8 -> + 8 when element(1, FuncPtrTuple) =:= free -> % We use FuncPtrReg then as we know it's available. % Calculate stack offset: register number * 4 bytes ResultReg = FuncPtrReg, StackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, StoreResult = jit_armv6m_asm:str(r0, {sp, StackOffset}), - StreamModule:append(Stream4, StoreResult); + {StreamModule:append(Stream4, StoreResult), [ResultReg | UsedRegs1]}; + 8 when PaddingReg =/= undefined -> + % We use PaddingReg then as we know it's available. + % Calculate stack offset: register number * 4 bytes + ResultReg = PaddingReg, + StackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, + StoreResult = jit_armv6m_asm:str(r0, {sp, StackOffset}), + {StreamModule:append(Stream4, StoreResult), [PaddingReg | UsedRegs1]}; _ -> % Use any free that is not in SavedRegs [ResultReg | _] = AvailableRegs1 -- SavedRegs, MoveResult = jit_armv6m_asm:mov(ResultReg, r0), - StreamModule:append(Stream4, MoveResult) + {StreamModule:append(Stream4, MoveResult), [ResultReg | UsedRegs1]} end, % Deallocate stack space if we allocated it for 5+ arguments @@ -1388,9 +1435,8 @@ call_func_ptr( AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), - UsedRegs2 = [ResultReg | UsedRegs1], { - State1#state{ + State2#state{ stream = Stream7, available_regs = AvailableRegs3, used_regs = UsedRegs2 diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index d274b2da76..76cff29f89 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -3129,11 +3129,38 @@ call_func_ptr_register_exhaustion_test_() -> " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" - " e: 4630 mov r0, r6\n" - " 10: 4619 mov r1, r3\n" - " 12: 4788 blx r1\n" - " 14: 9001 str r0, [sp, #4]\n" - " 16: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" + " e: 460c mov r4, r1\n" + " 10: 4630 mov r0, r6\n" + " 12: 4619 mov r1, r3\n" + " 14: 47a0 blx r4\n" + " 16: 9004 str r0, [sp, #16]\n" + " 18: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {primitive, 2}, + [{free, r6}, r3] + ), + ?assertEqual(ResultReg, r6), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" + " e: 6894 ldr r4, [r2, #8]\n" + " 10: 4630 mov r0, r6\n" + " 12: 4619 mov r1, r3\n" + " 14: 47a0 blx r4\n" + " 16: 9006 str r0, [sp, #24]\n" + " 18: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) From 5efea7705d346fec433a80df02b282a9a49878e7 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 2 Sep 2025 23:54:22 +0200 Subject: [PATCH 47/97] armv6m: precompile libraries Signed-off-by: Paul Guyot --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d23b067683..490aaccdc0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR From d2e6eb3926253f2e11a6ecc07f9a7ed70f15b1c1 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 3 Sep 2025 08:19:59 +0200 Subject: [PATCH 48/97] armv6m: fix far branches to set thumb bit Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 4 +++- tests/libs/jit/jit_armv6m_tests.erl | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index fff18286fd..f2c4b106a7 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -442,7 +442,9 @@ update_branches( % Calculate the relative offset for the literal value % This is the offset from the add instruction's PC to the target % The add instruction is at Offset + 2, so PC = Offset + 2 + 4 = Offset + 6 - AddPCOffset = Offset + 6, + % We also need to set thumb bit to 1, so eventually we only substract 5. + AddPCOffset = Offset + 5, + % Set thumb bit for bx instruction - target address must be odd for Thumb mode RelativeOffset = LabelOffset - AddPCOffset, if diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 76cff29f89..b9f7210e00 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1693,7 +1693,7 @@ is_boolean_far_test() -> " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" " c: 447e add r6, pc\n" " e: 4730 bx r6\n" - " 10: 0ff0 lsrs r0, r6, #31\n" + " 10: 0ff1 lsrs r0, r6, #31\n" " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1729,7 +1729,7 @@ is_boolean_far_unaligned_test() -> " e: 447e add r6, pc\n" " 10: 4730 bx r6\n" " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0fee lsrs r6, r5, #31\n" + " 14: 0fef lsrs r6, r5, #31\n" " 16: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 9d4dd0ee0efe14bd053ebcb6d0c496e8490de048 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 3 Sep 2025 08:25:02 +0200 Subject: [PATCH 49/97] armv6m: minimal compilation for rp2 and stm32 Signed-off-by: Paul Guyot --- src/platforms/rp2/src/lib/CMakeLists.txt | 1 + src/platforms/rp2/src/lib/jit_stream_flash.c | 34 +++++++++++++++ src/platforms/rp2/src/lib/sys.c | 9 ++++ .../rp2/tests/test_erl_sources/CMakeLists.txt | 41 +++++++++++++++---- src/platforms/stm32/CMakeLists.txt | 8 ++-- src/platforms/stm32/src/lib/CMakeLists.txt | 1 + .../stm32/src/lib/jit_stream_flash.c | 34 +++++++++++++++ src/platforms/stm32/src/lib/sys.c | 9 ++++ 8 files changed, 127 insertions(+), 10 deletions(-) create mode 100644 src/platforms/rp2/src/lib/jit_stream_flash.c create mode 100644 src/platforms/stm32/src/lib/jit_stream_flash.c diff --git a/src/platforms/rp2/src/lib/CMakeLists.txt b/src/platforms/rp2/src/lib/CMakeLists.txt index 957e346539..3cc69b56a3 100644 --- a/src/platforms/rp2/src/lib/CMakeLists.txt +++ b/src/platforms/rp2/src/lib/CMakeLists.txt @@ -31,6 +31,7 @@ set(HEADER_FILES set(SOURCE_FILES gpiodriver.c + jit_stream_flash.c networkdriver.c otp_crypto_platform.c platform_defaultatoms.c diff --git a/src/platforms/rp2/src/lib/jit_stream_flash.c b/src/platforms/rp2/src/lib/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/rp2/src/lib/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/rp2/src/lib/sys.c b/src/platforms/rp2/src/lib/sys.c index aeffd72870..ac4c9c717c 100644 --- a/src/platforms/rp2/src/lib/sys.c +++ b/src/platforms/rp2/src/lib/sys.c @@ -474,3 +474,12 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) struct RP2PlatformData *platform = global->platform_data; SMP_MUTEX_UNLOCK(platform->random_mutex); } + +#ifndef AVM_NO_JIT +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + // We need to set the Thumb bit + return (ModuleNativeEntryPoint) ((uintptr_t) (native_code + offset) | 1); +} +#endif diff --git a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt index cbdf581eef..b203d168b2 100644 --- a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt +++ b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt @@ -19,12 +19,30 @@ # include(ExternalProject) +if(NOT AVM_DISABLE_JIT) +set(host_atomvm_jit_target "--target=jit") +else() +set(host_atomvm_jit_target "") +endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." - BUILD_COMMAND cmake --build . --target=atomvmlib --target=PackBEAM --target=UF2Tool + BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM --target=UF2Tool ) +macro(jit_precompile module_name) + if(NOT AVM_DISABLE_JIT) + add_custom_command( + OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam + COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH} + && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam + DEPENDS ${module_name}.beam HostAtomVM + COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}" + VERBATIM + ) + endif() +endmacro() + function(compile_erlang module_name module_src_dir) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" @@ -33,6 +51,7 @@ function(compile_erlang module_name module_src_dir) COMMENT "Compiling ${module_name}.erl" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + jit_precompile(${module_name}) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() @@ -41,18 +60,26 @@ compile_erlang(test_clocks "") compile_erlang(test_smp "") compile_erlang(test_crypto ../../../esp32/test/main/test_erl_sources/) +set(erlang_test_beams + test_clocks.beam + test_smp.beam + test_crypto.beam +) + +if(NOT AVM_DISABLE_JIT) + set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) + list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) + list(APPEND erlang_test_beams ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rp2_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i rp2_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm - test_clocks.beam - test_smp.beam - test_crypto.beam + ${erlang_test_beams} DEPENDS HostAtomVM - "${CMAKE_CURRENT_BINARY_DIR}/test_clocks.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_smp.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam" + ${erlang_test_beams} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM ) diff --git a/src/platforms/stm32/CMakeLists.txt b/src/platforms/stm32/CMakeLists.txt index 569b0a5a17..51489ca5bf 100644 --- a/src/platforms/stm32/CMakeLists.txt +++ b/src/platforms/stm32/CMakeLists.txt @@ -36,9 +36,6 @@ option(AVM_CONFIG_REBOOT_ON_NOT_OK "Reboot when application exits with non 'ok' option(AVM_DISABLE_GPIO_NIFS "Disable GPIO nifs (input and output)" OFF) option(AVM_DISABLE_GPIO_PORT_DRIVER "Disable GPIO 'port' driver (input, output, and interrupts)" OFF) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON FORCE) - set(AVM_DISABLE_SMP ON FORCE) set(AVM_DISABLE_TASK_DRIVER ON FORCE) @@ -85,6 +82,11 @@ if (NOT CMAKE_TOOLCHAIN_FILE) endif () mark_as_advanced(CMAKE_TOOLCHAIN_FILE) +option(AVM_DISABLE_JIT "Disable just in time compilation." ON) +if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "armv6m") +endif() + if ((NOT ${CMAKE_C_COMPILER_ID} STREQUAL "GNU") OR (NOT ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") OR (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 7.2.1)) diff --git a/src/platforms/stm32/src/lib/CMakeLists.txt b/src/platforms/stm32/src/lib/CMakeLists.txt index f1846c070d..536d21cc88 100644 --- a/src/platforms/stm32/src/lib/CMakeLists.txt +++ b/src/platforms/stm32/src/lib/CMakeLists.txt @@ -33,6 +33,7 @@ set(HEADER_FILES set(SOURCE_FILES gpio_driver.c + jit_stream_flash.c platform_nifs.c sys.c ../../../../libAtomVM/portnifloader.c diff --git a/src/platforms/stm32/src/lib/jit_stream_flash.c b/src/platforms/stm32/src/lib/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/stm32/src/lib/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/stm32/src/lib/sys.c b/src/platforms/stm32/src/lib/sys.c index c65a39cab3..262a2be8e1 100644 --- a/src/platforms/stm32/src/lib/sys.c +++ b/src/platforms/stm32/src/lib/sys.c @@ -296,3 +296,12 @@ void sys_init_icache() __dsb; __isb; } + +#ifndef AVM_NO_JIT +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + // We need to set the Thumb bit + return (ModuleNativeEntryPoint) ((uintptr_t) (native_code + offset) | 1); +} +#endif From 0b35683e3dd30ca30b388326286994974c36c119 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 4 Sep 2025 00:13:02 +0200 Subject: [PATCH 50/97] armv6m: fix compilation with large y_reg accesses, fix y_reg in args Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 137 +++++++++++++------- tests/libs/jit/jit_armv6m_tests.erl | 191 ++++++++++++++++++++++------ 2 files changed, 244 insertions(+), 84 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index f2c4b106a7..d62c2c4f8b 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1699,10 +1699,13 @@ set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {ptr, I = jit_armv6m_asm:ldr(Reg, {Source, 0}), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; -set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, X}, Reg) -> - I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(Reg, {Reg, X * 8}), - Stream1 = StreamModule:append(Stream0, <>), +set_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + {y_reg, X}, + Reg +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg) when ?IS_GPR(ArgReg) @@ -1737,23 +1740,19 @@ move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> I1 = jit_armv6m_asm:str(Src, {Reg, 0}), Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), State0#state{stream = Stream1}; -move_to_vm_register(#state{available_regs = [Temp | _]} = State0, Src, {y_reg, Y}) when +move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when is_atom(Src) -> - I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), - I2 = jit_armv6m_asm:str(Src, {Temp, Y * 4}), - Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), State0#state{stream = Stream1}; % Source is an integer to y_reg (optimized: ldr first, then movs) -move_to_vm_register(#state{available_regs = [Temp1, Temp2 | _]} = State0, N, {y_reg, Y}) when +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when is_integer(N), N >= 0, N =< 255 -> - I1 = jit_armv6m_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_armv6m_asm:movs(Temp2, N), - I3 = jit_armv6m_asm:str(Temp2, {Temp1, Y * 4}), - Stream1 = (State0#state.stream_module):append( - State0#state.stream, <> - ), + I1 = jit_armv6m_asm:movs(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), State0#state{stream = Stream1}; % Source is an integer (0-255 for movs, negative values need different handling) move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when @@ -1787,9 +1786,8 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, R State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> - I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(Temp, {Temp, Y * 4}), - Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), State1#state{available_regs = AR0}; % term_to_float @@ -1845,29 +1843,27 @@ move_array_element( Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | _]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = State, Reg, Index, {y_reg, Y} ) when is_atom(Reg) andalso is_integer(Index) -> - I1 = jit_armv6m_asm:ldr(Temp1, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}), - I3 = jit_armv6m_asm:str(Temp2, {Temp1, Y * 4}), - Code = <>, + I1 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; move_array_element( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = State, {free, Reg}, Index, {y_reg, Y} ) when is_integer(Index) -> - I1 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), - I3 = jit_armv6m_asm:str(Reg, {Temp, Y * 4}), - Code = <>, + I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; move_array_element( @@ -1924,7 +1920,7 @@ move_array_element( #state{ stream_module = StreamModule, stream = Stream0, - available_regs = [Temp | _] = AvailableRegs0, + available_regs = [Temp | AT] = AvailableRegs0, used_regs = UsedRegs0 } = State, Reg, @@ -1932,14 +1928,14 @@ move_array_element( {y_reg, Y} ) when is_atom(IndexReg) -> I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), - I2 = jit_armv6m_asm:ldr(Temp, ?Y_REGS), - I3 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), - I4 = jit_armv6m_asm:str(IndexReg, {Temp, Y * 4}), + I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I3 = Code, {AvailableRegs1, UsedRegs1} = free_reg( AvailableRegs0, UsedRegs0, IndexReg ), Stream1 = StreamModule:append( - Stream0, <> + Stream0, <> ), State#state{ available_regs = AvailableRegs1, @@ -2084,9 +2080,7 @@ move_to_native_register( } = State, {y_reg, Y} ) -> - I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(Reg, {Reg, Y * 4}), - Code = <>, + Code = ldr_y_reg(Reg, Y, AvailT), Stream1 = StreamModule:append(Stream0, Code), {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; move_to_native_register( @@ -2132,11 +2126,11 @@ move_to_native_register( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; move_to_native_register( - #state{stream_module = StreamModule, stream = Stream0} = State, {y_reg, Y}, RegDst + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State, + {y_reg, Y}, + RegDst ) -> - I1 = jit_armv6m_asm:ldr(RegDst, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(RegDst, {RegDst, Y * 4}), - Code = <>, + Code = ldr_y_reg(RegDst, Y, AT), Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; move_to_native_register( @@ -2183,13 +2177,12 @@ copy_to_native_register(State, Reg) -> move_to_native_register(State, Reg). move_to_cp( - #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, {y_reg, Y} ) -> - I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), - I2 = jit_armv6m_asm:ldr(Reg, {Reg, Y * 4}), - I3 = jit_armv6m_asm:str(Reg, ?CP), - Code = <>, + I1 = ldr_y_reg(Reg, Y, AvailT), + I2 = jit_armv6m_asm:str(Reg, ?CP), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}. @@ -2809,6 +2802,60 @@ return_labels_and_lines( ), State#state{stream = Stream1}. +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), + I2 = jit_armv6m_asm:str(SrcReg, {TempReg, Y * 4}), + <>; +str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> + % Large offset - use register arithmetic with second available register + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS), + I2 = jit_armv6m_asm:movs(TempReg2, Offset), + I3 = jit_armv6m_asm:add(TempReg2, TempReg1), + I4 = jit_armv6m_asm:str(SrcReg, {TempReg2, 0}), + <>; +str_y_reg(SrcReg, Y, TempReg1, []) -> + % Large offset - no additional registers available, use IP_REG as second temp + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS), + I2 = jit_armv6m_asm:mov(?IP_REG, TempReg1), + I3 = jit_armv6m_asm:movs(TempReg1, Offset), + I4 = jit_armv6m_asm:add(TempReg1, ?IP_REG), + I5 = jit_armv6m_asm:str(SrcReg, {TempReg1, 0}), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(DstReg, {TempReg, Y * 4}), + <>; +ldr_y_reg(DstReg, Y, [TempReg | _]) -> + % Large offset - use DstReg as second temp register for arithmetic + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), + I2 = jit_armv6m_asm:movs(DstReg, Offset), + I3 = jit_armv6m_asm:add(DstReg, TempReg), + I4 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), + <>; +ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> + % Small offset, no registers available - use DstReg as temp + I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(DstReg, {DstReg, Y * 4}), + <>; +ldr_y_reg(DstReg, Y, []) -> + % Large offset, no registers available - use IP_REG as temp register + % Note: IP_REG (r12) can only be used with mov, not ldr directly + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS), + I2 = jit_armv6m_asm:mov(?IP_REG, DstReg), + I3 = jit_armv6m_asm:movs(DstReg, Offset), + I4 = jit_armv6m_asm:add(DstReg, ?IP_REG), + I5 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), + <>. + free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), true = lists:member(Reg, UsedRegs0), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index b9f7210e00..cf6bb0cc43 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -405,8 +405,8 @@ move_to_cp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 683f ldr r7, [r7, #0]\n" + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6837 ldr r7, [r6, #0]\n" " 4: 65c7 str r7, [r0, #92] ; 0x5c" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1505,11 +1505,11 @@ get_list_test() -> " 0: 6987 ldr r7, [r0, #24]\n" " 2: 2603 movs r6, #3\n" " 4: 43b7 bics r7, r6\n" - " 6: 6946 ldr r6, [r0, #20]\n" - " 8: 687d ldr r5, [r7, #4]\n" + " 6: 687d ldr r5, [r7, #4]\n" + " 8: 6946 ldr r6, [r0, #20]\n" " a: 6075 str r5, [r6, #4]\n" - " c: 6946 ldr r6, [r0, #20]\n" - " e: 683d ldr r5, [r7, #0]\n" + " c: 683d ldr r5, [r7, #0]\n" + " e: 6946 ldr r6, [r0, #20]\n" " 10: 6035 str r5, [r6, #0]" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1975,8 +1975,8 @@ gc_bif2_test() -> " 14: 9600 str r6, [sp, #0]\n" " 16: 2100 movs r1, #0\n" " 18: 2203 movs r2, #3\n" - " 1a: 6943 ldr r3, [r0, #20]\n" - " 1c: 681b ldr r3, [r3, #0]\n" + " 1a: 6946 ldr r6, [r0, #20]\n" + " 1c: 6833 ldr r3, [r6, #0]\n" " 1e: 47b8 blx r7\n" " 20: 4607 mov r7, r0\n" " 22: b002 add sp, #8\n" @@ -2169,15 +2169,15 @@ move_to_vm_register_test_() -> end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 2600 movs r6, #0\n" + " 0: 2600 movs r6, #0\n" + " 2: 6947 ldr r7, [r0, #20]\n" " 4: 60be str r6, [r7, #8]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 2600 movs r6, #0\n" + " 0: 2600 movs r6, #0\n" + " 2: 6947 ldr r7, [r0, #20]\n" " 4: 653e str r6, [r7, #80] ; 0x50" >>) end), @@ -2196,15 +2196,15 @@ move_to_vm_register_test_() -> end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 262a movs r6, #42 ; 0x2a\n" + " 0: 262a movs r6, #42 ; 0x2a\n" + " 2: 6947 ldr r7, [r0, #20]\n" " 4: 60be str r6, [r7, #8]" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 262a movs r6, #42 ; 0x2a\n" + " 0: 262a movs r6, #42 ; 0x2a\n" + " 2: 6947 ldr r7, [r0, #20]\n" " 4: 653e str r6, [r7, #80] ; 0x50" >>) end), @@ -2247,16 +2247,16 @@ move_to_vm_register_test_() -> %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 683f ldr r7, [r7, #0]\n" + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6837 ldr r7, [r6, #0]\n" " 4: 6247 str r7, [r0, #36] ; 0x24" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 687f ldr r7, [r7, #4]\n" + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6877 ldr r7, [r6, #4]\n" " 4: 6247 str r7, [r0, #36] ; 0x24" >>) end), @@ -2362,11 +2362,21 @@ move_to_vm_register_test_() -> %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 6fff ldr r7, [r7, #124] ; 0x7c\n" + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c\n" " 4: 6547 str r7, [r0, #84] ; 0x54" >>) end), + %% Test: Large y_reg index (32) that exceeds str immediate offset limit + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 32}, << + " 0: 262a movs r6, #42 ; 0x2a\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 2580 movs r5, #128 ; 0x80\n" + " 6: 443d add r5, r7\n" + " 8: 602e str r6, [r5, #0]" + >>) + end), %% Test: Negative immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << @@ -2407,8 +2417,8 @@ move_array_element_test_() -> %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, r3, 1, {y_reg, 2}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 685e ldr r6, [r3, #4]\n" + " 0: 685e ldr r6, [r3, #4]\n" + " 2: 6947 ldr r7, [r0, #20]\n" " 4: 60be str r6, [r7, #8]" >>) end), @@ -2421,8 +2431,8 @@ move_array_element_test_() -> %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, r3, 7, {y_reg, 31}, << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 69de ldr r6, [r3, #28]\n" + " 0: 69de ldr r6, [r3, #28]\n" + " 2: 6947 ldr r7, [r0, #20]\n" " 4: 67fe str r6, [r7, #124] ; 0x7c" >>) end), @@ -2459,8 +2469,8 @@ move_array_element_test_() -> move_array_element_test0(State1, r3, {free, Reg}, {y_reg, 31}, << " 0: 691f ldr r7, [r3, #16]\n" " 2: 00bf lsls r7, r7, #2\n" - " 4: 6946 ldr r6, [r0, #20]\n" - " 6: 59df ldr r7, [r3, r7]\n" + " 4: 59df ldr r7, [r3, r7]\n" + " 6: 6946 ldr r6, [r0, #20]\n" " 8: 67f7 str r7, [r6, #124] ; 0x7c" >>) end), @@ -2542,8 +2552,8 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r3, r4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 68bf ldr r7, [r7, #8]\n" + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" " 4: 4626 mov r6, r4\n" " 6: 00b6 lsls r6, r6, #2\n" " 8: 519f str r7, [r3, r6]" @@ -2670,8 +2680,8 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(r7, Reg), Dump = << - " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 68ff ldr r7, [r7, #12]" + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 68f7 ldr r7, [r6, #12]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2716,8 +2726,8 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 6941 ldr r1, [r0, #20]\n" - " 2: 6889 ldr r1, [r1, #8]" + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 68b9 ldr r1, [r7, #8]" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2901,14 +2911,117 @@ set_args1_y_reg_test() -> " 2: 00bf lsls r7, r7, #2\n" " 4: 59d7 ldr r7, [r2, r7]\n" " 6: b405 push {r0, r2}\n" - " 8: 6940 ldr r0, [r0, #20]\n" - " a: 6a80 ldr r0, [r0, #40] ; 0x28\n" + " 8: 6946 ldr r6, [r0, #20]\n" + " a: 6970 ldr r0, [r6, #20]\n" " c: 47b8 blx r7\n" " e: 4607 mov r7, r0\n" " 10: bc05 pop {r0, r2}" >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test large Y register read (Y=32, offset=128, exceeds 124-byte limit) +large_y_reg_read_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move from a large Y register (32 * 4 = 128 bytes, exceeds 124-byte immediate limit) + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 32}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp register since offset 128 > 124 + Dump = << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 2780 movs r7, #128 ; 0x80\n" + " 4: 4437 add r7, r6\n" + " 6: 683f ldr r7, [r7, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg). + +%% Test large Y register write with available temp registers +large_y_reg_write_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a native register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Move to a large Y register (40 * 4 = 160 bytes) + State2 = ?BACKEND:move_to_vm_register(State1, SrcReg, {y_reg, 40}), + Stream = ?BACKEND:stream(State2), + % Expected: uses helper with two temp registers since we have registers available + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 25a0 movs r5, #160 ; 0xa0\n" + " 6: 4435 add r5, r6\n" + " 8: 602f str r7, [r5, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read with limited registers (uses IP_REG fallback) +large_y_reg_read_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper) + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback + {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses IP_REG (r12) fallback sequence + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6941 ldr r1, [r0, #20]\n" + " c: 468c mov ip, r1\n" + " e: 218c movs r1, #140 ; 0x8c\n" + " 10: 4461 add r1, ip\n" + " 12: 6809 ldr r1, [r1, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r1, ResultReg). + +%% Test large Y register write with register exhaustion (uses IP_REG fallback) +large_y_reg_write_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a source register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Allocate most remaining registers to simulate exhaustion + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Try to write to large Y register when only one temp register is available + StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses IP_REG (r12) fallback sequence + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6941 ldr r1, [r0, #20]\n" + " c: 468c mov ip, r1\n" + " e: 21c8 movs r1, #200 ; 0xc8\n" + " 10: 4461 add r1, ip\n" + " 12: 600f str r7, [r1, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing) +y_reg_boundary_direct_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}), + Stream = ?BACKEND:stream(State1), + % Expected: uses direct addressing since 31 * 4 = 124 <= 124 + Dump = << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg). + %% Test debugger function debugger_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -3274,8 +3387,8 @@ add_beam_test() -> " 8a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" % {init_yregs,{list,[{y,0}]}}. %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), - " 8c: 6947 ldr r7, [r0, #20]\n" - " 8e: 263b movs r6, #59 ; 0x3b\n" + " 8c: 263b movs r6, #59 ; 0x3b\n" + " 8e: 6947 ldr r7, [r0, #20]\n" " 90: 603e str r6, [r7, #0]\n" % {call,1,{f,3}} %% call_or_schedule_next(State9, 3), @@ -3308,7 +3421,7 @@ add_beam_test() -> " c6: 46b6 mov lr, r6\n" " c8: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " ca: 0000 movs r0, r0\n" - "cc: 0340 lsls r0, r0, #13\n" + " cc: 0340 lsls r0, r0, #13\n" " ce: 0000 movs r0, r0\n" %% (continuation) " d0: b5f2 push {r1, r4, r5, r6, r7, lr}\n" From c9fc0199654a65bce0bdfcce2a6fe5d2c7d054f2 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 4 Sep 2025 07:26:55 +0200 Subject: [PATCH 51/97] armv6m: fix thumb bit in a long jump Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 3 ++- tests/libs/jit/jit_armv6m_tests.erl | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index d62c2c4f8b..daa223accc 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -726,7 +726,8 @@ branch_to_label_code( % Calculate the literal value: target - PC_at_add_instruction % The add instruction is at Offset + 2, so PC = Offset + 2 + 4 = Offset + 6 - AddPCValue = Offset + 6, + % We also need to set thumb bit to 1, so eventually we only subtract 5. + AddPCValue = Offset + 5, LiteralValue = LabelOffset - AddPCValue, if diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index cf6bb0cc43..53c0af3dfc 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1757,7 +1757,7 @@ is_boolean_far_known_test() -> " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" " c: 447e add r6, pc\n" " e: 4730 bx r6\n" - " 10: 0ff0 lsrs r0, r6, #31\n" + " 10: 0ff1 lsrs r1, r6, #31\n" " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1793,7 +1793,7 @@ is_boolean_far_known_unaligned_test() -> " e: 447e add r6, pc\n" " 10: 4730 bx r6\n" " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0fee lsrs r6, r5, #31\n" + " 14: 0fef lsrs r7, r5, #31\n" " 16: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 5c1aa67e6e2abfc0f0c7878f73c159aa37637fec Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 4 Sep 2025 21:37:06 +0200 Subject: [PATCH 52/97] armv6m: fix offset calculation with set_continuation_to_label Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 37 +++++--------- tests/libs/jit/jit_armv6m_tests.erl | 76 ++++++++++++++--------------- 2 files changed, 51 insertions(+), 62 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index daa223accc..d962dde93f 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2206,42 +2206,31 @@ set_continuation_to_label( } = State, Label ) -> - Offset = StreamModule:offset(Stream0), % Calculate jump table entry offset JumpTableEntryOffset = Label * ?JUMP_TABLE_ENTRY_SIZE, - % Assume mov_immediate will be at most 10 bytes - MaxMovImmediateSize = 10, - EstimatedAdrOffset = Offset + MaxMovImmediateSize, - % +4 for adr base, +4 for minimum adr offset - EstimatedAdrPC = (EstimatedAdrOffset band (bnot 3)) + 4 + 4, - RelativeOffset = JumpTableEntryOffset - EstimatedAdrPC, - - % Generate mov_immediate with the relative offset + 1 (to set thumb bit) - State1 = mov_immediate(State, Temp2, RelativeOffset + 1), - Stream1 = State1#state.stream, - ActualMovImmediateSize = StreamModule:offset(Stream1) - Offset, + % First emit the adr instruction with a known offset (we'll use 4 for now) + I1 = jit_armv6m_asm:adr(Temp1, 4), + Stream1 = StreamModule:append(Stream0, I1), - % Calculate where adr instruction will actually be - ActualAdrOffset = Offset + ActualMovImmediateSize, - ActualAdrPC = (ActualAdrOffset band (bnot 3)) + 4, + % Calculate the actual ADR PC: ADR reads from (PC+4) aligned to 4-byte boundary + AdrOffset = StreamModule:offset(Stream1), + AdrPC = (AdrOffset + 4) band (bnot 3), - % Calculate the correct adr offset: ActualAdrPC + (AdrOffset - 4) + RelativeOffset = JumpTableEntryOffset - % So: AdrOffset = JumpTableEntryOffset - ActualAdrPC - RelativeOffset + 4 - AdrOffset = JumpTableEntryOffset - ActualAdrPC - RelativeOffset + 4, - % Ensure adr offset is multiple of 4 and within range - AdrOffset = ((AdrOffset + 3) div 4) * 4, + % Calculate what we need to load: JumpTableEntryOffset - (AdrPC + 4) + 1 (for thumb bit) + % Since ADR will add AdrPC + 4, we need to subtract 4 from our immediate + ImmediateValue = JumpTableEntryOffset - AdrPC - 3, - % Get PC address using adr - I1 = jit_armv6m_asm:adr(Temp1, AdrOffset), + % Generate mov_immediate to load the calculated offset + State1 = mov_immediate(State#state{stream = Stream1}, Temp2, ImmediateValue), % Add PC + offset (with thumb bit set), load jit_state, and store continuation I2 = jit_armv6m_asm:adds(Temp2, Temp2, Temp1), I3 = jit_armv6m_asm:ldr(Temp1, {sp, ?STACK_OFFSET_JITSTATE}), I4 = jit_armv6m_asm:str(Temp2, ?JITSTATE_CONTINUATION(Temp1)), - Code = <>, - Stream2 = StreamModule:append(Stream1, Code), + Code = <>, + Stream2 = StreamModule:append(State1#state.stream, Code), State1#state{stream = Stream2}. %% @doc Set the contination to a given offset diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 53c0af3dfc..6a712256b9 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1212,9 +1212,9 @@ call_only_or_schedule_next_and_label_relocation_test() -> " 32: 46c0 nop ; (mov r8, r8)\n" " 34: 46c0 nop ; (mov r8, r8)\n" " 36: 46c0 nop ; (mov r8, r8)\n" - " 38: 262f movs r6, #47 ; 0x2f\n" - " 3a: 4276 negs r6, r6\n" - " 3c: a702 add r7, pc, #8 ; (adr r7, 0x48)\n" + " 38: a700 add r7, pc, #0 ; (adr r7, 0x3c)\n" + " 3a: 2627 movs r6, #39 ; 0x27\n" + " 3c: 4276 negs r6, r6\n" " 3e: 19f6 adds r6, r6, r7\n" " 40: 9f00 ldr r7, [sp, #0]\n" " 42: 607e str r6, [r7, #4]\n" @@ -1285,9 +1285,9 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> " 36: 46c0 nop ; (mov r8, r8)\n" " 38: 46c0 nop ; (mov r8, r8)\n" " 3a: 46c0 nop ; (mov r8, r8)\n" - " 3c: 2633 movs r6, #51 ; 0x33\n" - " 3e: 4276 negs r6, r6\n" - " 40: a702 add r7, pc, #8 ; (adr r7, 0x4c)\n" + " 3c: a700 add r7, pc, #0 ; (adr r7, 0x40)\n" + " 3e: 262b movs r6, #43 ; 0x2b\n" + " 40: 4276 negs r6, r6\n" " 42: 19f6 adds r6, r6, r7\n" " 44: 9f00 ldr r7, [sp, #0]\n" " 46: 607e str r6, [r7, #4]\n" @@ -1346,19 +1346,19 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" - " 138: 4e00 ldr r6, [pc, #0] ; (0x13c)\n" - " 13a: e001 b.n 0x140\n" - " 13c: fed1 ffff mrc2 15, 6, pc, cr1, cr15, {7}\n" - " 140: a701 add r7, pc, #4 ; (adr r7, 0x148)\n" - " 142: 19f6 adds r6, r6, r7\n" - " 144: 9f00 ldr r7, [sp, #0]\n" - " 146: 607e str r6, [r7, #4]\n" - " 148: 6897 ldr r7, [r2, #8]\n" - " 14a: 9e05 ldr r6, [sp, #20]\n" - " 14c: 9705 str r7, [sp, #20]\n" - " 14e: 46b6 mov lr, r6\n" - " 150: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 152: 46c0 nop ; (mov r8, r8)\n" + " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" + " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" + " 13c: e002 b.n 0x144\n" + " 13e: 0000 movs r0, r0\n" + " 140: fed9 ffff mrc2 15, 6, pc, cr9, cr15, {7}\n" + " 144: 19f6 adds r6, r6, r7\n" + " 146: 9f00 ldr r7, [sp, #0]\n" + " 148: 607e str r6, [r7, #4]\n" + " 14a: 6897 ldr r7, [r2, #8]\n" + " 14c: 9e05 ldr r6, [sp, #20]\n" + " 14e: 9705 str r7, [sp, #20]\n" + " 150: 46b6 mov lr, r6\n" + " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 154: 6817 ldr r7, [r2, #0]\n" " 156: 9e05 ldr r6, [sp, #20]\n" " 158: 9705 str r7, [sp, #20]\n" @@ -1410,19 +1410,19 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" - " 138: 4e00 ldr r6, [pc, #0] ; (0x13c)\n" - " 13a: e001 b.n 0x140\n" - " 13c: fed1 ffff mrc2 15, 6, pc, cr1, cr15, {7}\n" - " 140: a701 add r7, pc, #4 ; (adr r7, 0x148)\n" - " 142: 19f6 adds r6, r6, r7\n" - " 144: 9f00 ldr r7, [sp, #0]\n" - " 146: 607e str r6, [r7, #4]\n" - " 148: 6897 ldr r7, [r2, #8]\n" - " 14a: 9e05 ldr r6, [sp, #20]\n" - " 14c: 9705 str r7, [sp, #20]\n" - " 14e: 46b6 mov lr, r6\n" - " 150: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 152: 46c0 nop ; (mov r8, r8)\n" + " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" + " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" + " 13c: e002 b.n 0x144\n" + " 13e: 0000 movs r0, r0\n" + " 140: fed9 ffff mrc2 15, 6, pc, cr9, cr15, {7}\n" + " 144: 19f6 adds r6, r6, r7\n" + " 146: 9f00 ldr r7, [sp, #0]\n" + " 148: 607e str r6, [r7, #4]\n" + " 14a: 6897 ldr r7, [r2, #8]\n" + " 14c: 9e05 ldr r6, [sp, #20]\n" + " 14e: 9705 str r7, [sp, #20]\n" + " 150: 46b6 mov lr, r6\n" + " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 154: 6817 ldr r7, [r2, #0]\n" " 156: 9e05 ldr r6, [sp, #20]\n" " 158: 9705 str r7, [sp, #20]\n" @@ -3351,9 +3351,9 @@ add_beam_test() -> " 46: 46c0 nop ; (mov r8, r8)\n" " 48: 46c0 nop ; (mov r8, r8)\n" " 4a: 46c0 nop ; (mov r8, r8)\n" - " 4c: 2643 movs r6, #67 ; 0x43\n" - " 4e: 4276 negs r6, r6\n" - " 50: a702 add r7, pc, #8 ; (adr r7, 0x5c)\n" + " 4c: a700 add r7, pc, #0 ; (adr r7, 0x50)\n" + " 4e: 263b movs r6, #59 ; 0x3b\n" + " 50: 4276 negs r6, r6\n" " 52: 19f6 adds r6, r6, r7\n" " 54: 9f00 ldr r7, [sp, #0]\n" " 56: 607e str r6, [r7, #4]\n" @@ -3409,9 +3409,9 @@ add_beam_test() -> " ae: 46c0 nop ; (mov r8, r8)\n" " b0: 46c0 nop ; (mov r8, r8)\n" " b2: 46c0 nop ; (mov r8, r8)\n" - " b4: 259f movs r5, #159 ; 0x9f\n" - " b6: 426d negs r5, r5\n" - " b8: a702 add r7, pc, #8 ; (adr r7, 0xc4)\n" + " b4: a700 add r7, pc, #0 ; (adr r7, 0xb8)\n" + " b6: 2597 movs r5, #151 ; 0x97\n" + " b8: 426d negs r5, r5\n" " ba: 19ed adds r5, r5, r7\n" " bc: 9f00 ldr r7, [sp, #0]\n" " be: 607d str r5, [r7, #4]\n" From 4b285666171a1cca55b71898fdf421b8831721b5 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 5 Sep 2025 06:10:56 +0200 Subject: [PATCH 53/97] armv6m: fix reverse condition in if_block optimization Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 8 ++++---- tests/libs/jit/jit_armv6m_tests.erl | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index d962dde93f..b77ccdb688 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1130,8 +1130,8 @@ if_block_cond( % Low bits mask: use lsls to shift high bits away ShiftAmount = 32 - BitCount, TestCode0 = jit_armv6m_asm:lsls(Temp, Reg, ShiftAmount), - % branch if not zero (any low bit was set) - {TestCode0, ne}; + % branch if zero (no low bit was set) + {TestCode0, eq}; no_optimization -> % General case: use mov+tst TestCode0 = jit_armv6m_asm:movs(Temp, Val), @@ -2614,11 +2614,12 @@ call_only_or_schedule_next( % Look up label once to avoid duplicate lookup in helper LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + BccOffset = StreamModule:offset(Stream1), + State4 = case LabelLookupResult of {Label, LabelOffset} -> % Label is known, check if we can optimize the conditional branch - BccOffset = StreamModule:offset(Stream1), % After bcc instruction BranchOffset = BccOffset + 2, Rel = LabelOffset - BranchOffset, @@ -2647,7 +2648,6 @@ call_only_or_schedule_next( end; false -> % Label not known, get the far branch size for the skip - BccOffset = StreamModule:offset(Stream1), FarSeqOffset = BccOffset + 2, {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), FarSeqSize = byte_size(FarCodeBlock), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 6a712256b9..3686c1b5be 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -874,7 +874,7 @@ if_block_test_() -> " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" " 4: 077d lsls r5, r7, #29\n" - " 6: d100 bne.n 0xa\n" + " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), @@ -913,7 +913,7 @@ if_block_test_() -> " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" " 4: 077d lsls r5, r7, #29\n" - " 6: d100 bne.n 0xa\n" + " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), @@ -1050,7 +1050,7 @@ bitwise_and_optimization_test_() -> " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" " 4: 07bd lsls r5, r7, #30\n" - " 6: d100 bne.n 0xa\n" + " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), @@ -1070,7 +1070,7 @@ bitwise_and_optimization_test_() -> " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" " 4: 073d lsls r5, r7, #28\n" - " 6: d100 bne.n 0xa\n" + " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), @@ -1090,7 +1090,7 @@ bitwise_and_optimization_test_() -> " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" " 4: 06bd lsls r5, r7, #26\n" - " 6: d100 bne.n 0xa\n" + " 6: d000 beq.n 0xa\n" " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), From 5959345e91684e5c004f7d246a7d5c90569c5c61 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 08:32:02 +0200 Subject: [PATCH 54/97] armv6m: fix offset in cp to jump table Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 14 +++++++------- tests/libs/jit/jit_armv6m_tests.erl | 12 ++++++------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b77ccdb688..9c2ff082bf 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2202,24 +2202,24 @@ set_continuation_to_label( #state{ stream_module = StreamModule, stream = Stream0, + offset = JumpTableOffset, available_regs = [Temp1, Temp2 | _] } = State, Label ) -> % Calculate jump table entry offset - JumpTableEntryOffset = Label * ?JUMP_TABLE_ENTRY_SIZE, + JumpTableEntryOffset = (Label * ?JUMP_TABLE_ENTRY_SIZE) + JumpTableOffset, - % First emit the adr instruction with a known offset (we'll use 4 for now) + AdrOffset = StreamModule:offset(Stream0), + % ADR Temp, +.4 means we're storing PC value in Temp1. + % For example, if AdrOffset is 0x0808034c, Temp1 will contain 0x08080350 I1 = jit_armv6m_asm:adr(Temp1, 4), Stream1 = StreamModule:append(Stream0, I1), - % Calculate the actual ADR PC: ADR reads from (PC+4) aligned to 4-byte boundary - AdrOffset = StreamModule:offset(Stream1), AdrPC = (AdrOffset + 4) band (bnot 3), - % Calculate what we need to load: JumpTableEntryOffset - (AdrPC + 4) + 1 (for thumb bit) - % Since ADR will add AdrPC + 4, we need to subtract 4 from our immediate - ImmediateValue = JumpTableEntryOffset - AdrPC - 3, + % Calculate what we need to load: JumpTableEntryOffset - AdrPC + 1 (for thumb bit) + ImmediateValue = JumpTableEntryOffset + 1 - AdrPC, % Generate mov_immediate to load the calculated offset State1 = mov_immediate(State#state{stream = Stream1}, Temp2, ImmediateValue), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 3686c1b5be..9ae9fa4d50 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1213,7 +1213,7 @@ call_only_or_schedule_next_and_label_relocation_test() -> " 34: 46c0 nop ; (mov r8, r8)\n" " 36: 46c0 nop ; (mov r8, r8)\n" " 38: a700 add r7, pc, #0 ; (adr r7, 0x3c)\n" - " 3a: 2627 movs r6, #39 ; 0x27\n" + " 3a: 2623 movs r6, #35 ; 0x23\n" " 3c: 4276 negs r6, r6\n" " 3e: 19f6 adds r6, r6, r7\n" " 40: 9f00 ldr r7, [sp, #0]\n" @@ -1286,7 +1286,7 @@ call_only_or_schedule_next_and_label_relocation_unaligned_test() -> " 38: 46c0 nop ; (mov r8, r8)\n" " 3a: 46c0 nop ; (mov r8, r8)\n" " 3c: a700 add r7, pc, #0 ; (adr r7, 0x40)\n" - " 3e: 262b movs r6, #43 ; 0x2b\n" + " 3e: 2627 movs r6, #39 ; 0x27\n" " 40: 4276 negs r6, r6\n" " 42: 19f6 adds r6, r6, r7\n" " 44: 9f00 ldr r7, [sp, #0]\n" @@ -1350,7 +1350,7 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" " 13c: e002 b.n 0x144\n" " 13e: 0000 movs r0, r0\n" - " 140: fed9 ffff mrc2 15, 6, pc, cr9, cr15, {7}\n" + " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" " 144: 19f6 adds r6, r6, r7\n" " 146: 9f00 ldr r7, [sp, #0]\n" " 148: 607e str r6, [r7, #4]\n" @@ -1414,7 +1414,7 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" " 13c: e002 b.n 0x144\n" " 13e: 0000 movs r0, r0\n" - " 140: fed9 ffff mrc2 15, 6, pc, cr9, cr15, {7}\n" + " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" " 144: 19f6 adds r6, r6, r7\n" " 146: 9f00 ldr r7, [sp, #0]\n" " 148: 607e str r6, [r7, #4]\n" @@ -3352,7 +3352,7 @@ add_beam_test() -> " 48: 46c0 nop ; (mov r8, r8)\n" " 4a: 46c0 nop ; (mov r8, r8)\n" " 4c: a700 add r7, pc, #0 ; (adr r7, 0x50)\n" - " 4e: 263b movs r6, #59 ; 0x3b\n" + " 4e: 2637 movs r6, #55 ; 0x37\n" " 50: 4276 negs r6, r6\n" " 52: 19f6 adds r6, r6, r7\n" " 54: 9f00 ldr r7, [sp, #0]\n" @@ -3410,7 +3410,7 @@ add_beam_test() -> " b0: 46c0 nop ; (mov r8, r8)\n" " b2: 46c0 nop ; (mov r8, r8)\n" " b4: a700 add r7, pc, #0 ; (adr r7, 0xb8)\n" - " b6: 2597 movs r5, #151 ; 0x97\n" + " b6: 2593 movs r5, #147 ; 0x93\n" " b8: 426d negs r5, r5\n" " ba: 19ed adds r5, r5, r7\n" " bc: 9f00 ldr r7, [sp, #0]\n" From 2f9360b354f064cf82c122515b94bc4f66c4fb6a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:25:57 +0200 Subject: [PATCH 55/97] armv6m: fix register usage with if_block_cond Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 5 +++-- tests/libs/jit/jit_armv6m_tests.erl | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 9c2ff082bf..2f2a077ca2 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -957,8 +957,9 @@ if_block_cond( I2/binary >>, Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1}, - {State1, ge, byte_size(I1)}; + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} ) -> diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 9ae9fa4d50..46ec6ae77d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -998,7 +998,7 @@ if_block_test_() -> " 8: 3602 adds r6, #2" >>, ?assertEqual(dump_to_bin(Dump), Stream), - ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) end), ?_test(begin State1 = ?BACKEND:if_block( From 3bc1de2ecaddc43cf13076f59f71f71fcd3df583 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:26:17 +0200 Subject: [PATCH 56/97] armv6m: add get_array_element with {free, Reg} Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 2f2a077ca2..e8f7995f3e 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1946,8 +1946,19 @@ move_array_element( }. %% @doc move reg[x] to a vm or native register --spec get_array_element(state(), armv6m_register(), non_neg_integer()) -> +-spec get_array_element(state(), armv6m_register() | {free, armv6m_register()}, non_neg_integer()) -> {state(), armv6m_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; get_array_element( #state{ stream_module = StreamModule, From ecb6caef6132de9809e5e65f1e07b32c697b65fd Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:26:41 +0200 Subject: [PATCH 57/97] armv6m: add move_to_native_register/2,3 with {x_reg, extra} Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index e8f7995f3e..04a29fab99 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2070,6 +2070,18 @@ move_to_native_register( -> State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register( #state{ stream_module = StreamModule, @@ -2130,6 +2142,12 @@ move_to_native_register( I1 = jit_armv6m_asm:ldr(RegDst, {Reg, 0}), Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; move_to_native_register( #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst ) when From 6d668cb750dca17676ad00ac469d1e63c97c73a7 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:27:01 +0200 Subject: [PATCH 58/97] armv6m: fix register usage with get_module_index Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 6 +++++- tests/libs/jit/jit_armv6m_tests.erl | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 04a29fab99..d5facdeb6c 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2323,7 +2323,11 @@ get_module_index( Code = <>, Stream1 = StreamModule:append(Stream0, Code), { - State#state{stream = Stream1, available_regs = AvailableT, used_regs = [Reg | UsedRegs0]}, + State#state{ + stream = Stream1, + available_regs = [TempJitState | AvailableT], + used_regs = [Reg | UsedRegs0] + }, Reg }. diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 46ec6ae77d..835355e973 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2010,8 +2010,8 @@ call_ext_test() -> " 20: 6837 ldr r7, [r6, #0]\n" " 22: 683f ldr r7, [r7, #0]\n" " 24: 063f lsls r7, r7, #24\n" - " 26: 4d07 ldr r5, [pc, #28] ; (0x44)\n" - " 28: 432f orrs r7, r5\n" + " 26: 4e07 ldr r6, [pc, #28] ; (0x44)\n" + " 28: 4337 orrs r7, r6\n" " 2a: 65c7 str r7, [r0, #92] ; 0x5c\n" " 2c: 6917 ldr r7, [r2, #16]\n" " 2e: b082 sub sp, #8\n" @@ -2120,8 +2120,8 @@ call_fun_test() -> " 70: 682e ldr r6, [r5, #0]\n" " 72: 6836 ldr r6, [r6, #0]\n" " 74: 0636 lsls r6, r6, #24\n" - " 76: 4c05 ldr r4, [pc, #20] ; (0x8c)\n" - " 78: 4326 orrs r6, r4\n" + " 76: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" + " 78: 432e orrs r6, r5\n" " 7a: 65c6 str r6, [r0, #92] ; 0x5c\n" " 7c: 2680 movs r6, #128 ; 0x80\n" " 7e: 5996 ldr r6, [r2, r6]\n" @@ -3396,13 +3396,13 @@ add_beam_test() -> " 94: 6837 ldr r7, [r6, #0]\n" " 96: 683f ldr r7, [r7, #0]\n" " 98: 063f lsls r7, r7, #24\n" - " 9a: 4d0c ldr r5, [pc, #48] ; (0xcc)\n" - " 9c: 432f orrs r7, r5\n" + " 9a: 4e0c ldr r6, [pc, #48] ; (0xcc)\n" + " 9c: 4337 orrs r7, r6\n" " 9e: 65c7 str r7, [r0, #92] ; 0x5c\n" - " a0: 9d00 ldr r5, [sp, #0]\n" - " a2: 68af ldr r7, [r5, #8]\n" + " a0: 9e00 ldr r6, [sp, #0]\n" + " a2: 68b7 ldr r7, [r6, #8]\n" " a4: 3f01 subs r7, #1\n" - " a6: 60af str r7, [r5, #8]\n" + " a6: 60b7 str r7, [r6, #8]\n" " a8: d004 beq.n 0xb4\n" " aa: e013 b.n 0xd4\n" " ac: 46c0 nop ; (mov r8, r8)\n" @@ -3410,11 +3410,11 @@ add_beam_test() -> " b0: 46c0 nop ; (mov r8, r8)\n" " b2: 46c0 nop ; (mov r8, r8)\n" " b4: a700 add r7, pc, #0 ; (adr r7, 0xb8)\n" - " b6: 2593 movs r5, #147 ; 0x93\n" - " b8: 426d negs r5, r5\n" - " ba: 19ed adds r5, r5, r7\n" + " b6: 2693 movs r6, #147 ; 0x93\n" + " b8: 4276 negs r6, r6\n" + " ba: 19f6 adds r6, r6, r7\n" " bc: 9f00 ldr r7, [sp, #0]\n" - " be: 607d str r5, [r7, #4]\n" + " be: 607e str r6, [r7, #4]\n" " c0: 6897 ldr r7, [r2, #8]\n" " c2: 9e05 ldr r6, [sp, #20]\n" " c4: 9705 str r7, [sp, #20]\n" From a48a2f4bc6a9baf0055eec02e0cca822eb2f3f95 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 9 Sep 2025 23:51:20 +0200 Subject: [PATCH 59/97] armv6m: fix branch target with call_only Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index d5facdeb6c..fd179703ad 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2655,8 +2655,7 @@ call_only_or_schedule_next( {Label, LabelOffset} -> % Label is known, check if we can optimize the conditional branch % After bcc instruction - BranchOffset = BccOffset + 2, - Rel = LabelOffset - BranchOffset, + Rel = LabelOffset - BccOffset, if Rel >= -252 andalso Rel =< 258 andalso (Rel rem 2) =:= 0 -> From 539ed8c6b5baa51c6d9ebb986d15208f9a24dc79 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 13 Sep 2025 19:37:38 +0200 Subject: [PATCH 60/97] armv6m: fix int64 arguments for 32 bits platforms Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 54 ++++++++++++++++++++++++++--- tests/libs/jit/jit_armv6m_tests.erl | 44 +++++++++++++++++++++++ 2 files changed, 94 insertions(+), 4 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index fd179703ad..e8dbab10c6 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -144,7 +144,7 @@ -type vm_register() :: {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, armv6m_register()}. -type value() :: immediate() | vm_register() | armv6m_register() | {ptr, armv6m_register()}. --type arg() :: ctx | jit_state | offset | value() | {free, value()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. -type maybe_free_armv6m_register() :: {free, armv6m_register()} | armv6m_register(). @@ -1354,7 +1354,7 @@ call_func_ptr( Args ), SavedRegsForTemps0 = SavedRegs -- [?CTX_REG, ?NATIVE_INTERFACE_REG] -- ArgsRegs, - ParameterRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ParameterRegs = parameter_regs(Args), {State1, FuncPtrReg} = case FuncPtrTuple of {free, FuncPtrReg0} -> @@ -1575,6 +1575,21 @@ set_args_registers_only( parameter_regs(Args) -> parameter_regs0(Args, ?PARAMETER_REGS, []). +% AAPCS32 helper: align to even register for 64-bit arguments + +% r0 is even, use (r0,r1) +align_to_even_register([r0, r1 | Rest]) -> [r0, r1 | Rest]; +% r1 is odd, skip to (r2,r3) +align_to_even_register([r1, r2 | Rest]) -> [r2, r3 | Rest]; +% r2 is even, use (r2,r3) +align_to_even_register([r2, r3 | Rest]) -> [r2, r3 | Rest]; +% r3 is odd, no pair available +align_to_even_register([r3]) -> []; +% No registers available +align_to_even_register([]) -> []; +% Other cases +align_to_even_register(_) -> []. + parameter_regs0([], _, Acc) -> lists:reverse(Acc); parameter_regs0([Special | T], [GPReg | GPRegsT], Acc) when @@ -1596,6 +1611,16 @@ parameter_regs0([{fp_reg, _} | T], [GPRegA, GPRegB | GPRegsT], Acc) -> parameter_regs0(T, GPRegsT, [GPRegB, GPRegA | Acc]); parameter_regs0([Int | T], [GPReg | GPRegsT], Acc) when is_integer(Int) -> parameter_regs0(T, GPRegsT, [GPReg | Acc]); +% AAPCS32: 64-bit arguments require double-word alignment (even register number) +parameter_regs0([{avm_int64_t, _} | T], GPRegs, Acc) -> + % Find the next even-numbered register position for AAPCS32 alignment + case align_to_even_register(GPRegs) of + [GPRegA, GPRegB | GPRegsT] -> + parameter_regs0(T, GPRegsT, [GPRegB, GPRegA | Acc]); + _ -> + % Not enough registers available, use stack + parameter_regs0(T, [], [stack, stack | Acc]) + end; % Handle stack parameters when we run out of registers parameter_regs0([_Arg | T], [], Acc) -> parameter_regs0(T, [], [stack | Acc]). @@ -1666,6 +1691,23 @@ set_args0(State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], Av false = lists:member(?CTX_REG, ArgsRegs), State1 = set_args1(State, Arg, ?CTX_REG), set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to AAPCS32 +set_args0( + State, + [{avm_int64_t, Value} | ArgsT], + [_ArgReg | ArgsRegs], + [ParamRegLo, ParamRegHi | ParamRegs], + AvailGP, + StackOffset +) when is_integer(Value) -> + % Split the 64-bit value into two 32-bit parts + LowPart = Value band 16#FFFFFFFF, + HighPart = (Value bsr 32) band 16#FFFFFFFF, + % Set up the low 32 bits in the first register + State1 = set_args1(State, LowPart, ParamRegLo), + % Set up the high 32 bits in the second register + State2 = set_args1(State1, HighPart, ParamRegHi), + set_args0(State2, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); set_args0( #state{stream_module = StreamModule} = State, [Arg | ArgsT], @@ -1716,7 +1758,10 @@ set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, ArgReg Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; set_args1(State, Arg, Reg) when is_integer(Arg) -> - mov_immediate(State, Reg, Arg). + mov_immediate(State, Reg, Arg); +set_args1(State, {avm_int64_t, Value}, Reg) when is_integer(Value) -> + % For now, just store the lower 32 bits - this needs proper AAPCS32 register pair support + mov_immediate(State, Reg, Value band 16#FFFFFFFF). %%----------------------------------------------------------------------------- %% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) @@ -2911,7 +2956,8 @@ args_regs(Args) -> ({fp_reg, _}) -> ?CTX_REG; ({free, {x_reg, _}}) -> ?CTX_REG; ({free, {y_reg, _}}) -> ?CTX_REG; - ({free, {fp_reg, _}}) -> ?CTX_REG + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm end, Args ). diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 835355e973..c261ed7564 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -3090,6 +3090,50 @@ jump_table_large_labels_test() -> Stream = ?BACKEND:stream(State1), ?assertEqual((512 + 1) * 12, byte_size(Stream)). +alloc_boxed_integer_fragment_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 42} + ]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6bd7 ldr r7, [r2, #60] ; 0x3c\n" + " 2: b405 push {r0, r2}\n" + " 4: 222a movs r2, #42 ; 0x2a\n" + " 6: 2300 movs r3, #0\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +alloc_boxed_integer_fragment_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 16#123456789ABCDEF0} + ]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6bd7 ldr r7, [r2, #60] ; 0x3c\n" + " 2: b405 push {r0, r2}\n" + " 4: 4a00 ldr r2, [pc, #0] ; (0x8)\n" + " 6: e001 b.n 0xc\n" + " 8: def0 udf #240 ; 0xf0\n" + " a: 9abc ldr r2, [sp, #752] ; 0x2f0\n" + " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" + " e: e001 b.n 0x14\n" + " 10: 5678 ldrsb r0, [r7, r1]\n" + " 12: 1234 asrs r4, r6, #8\n" + " 14: 47b8 blx r7\n" + " 16: 4607 mov r7, r0\n" + " 18: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From d809b756885e4ac0fdb96f50e18331c145feeab1 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 15 Sep 2025 00:04:04 +0200 Subject: [PATCH 61/97] armv6m: fix float registers Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 4 ++-- tests/libs/jit/jit_armv6m_tests.erl | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index e8dbab10c6..c8f1452e07 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1845,9 +1845,9 @@ move_to_vm_register( {fp_reg, F} ) -> I1 = jit_armv6m_asm:ldr(Temp1, ?FP_REGS), - I2 = jit_armv6m_asm:ldr(Temp2, {Reg, 8}), + I2 = jit_armv6m_asm:ldr(Temp2, {Reg, 4}), I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8}), - I4 = jit_armv6m_asm:ldr(Temp2, {Reg, 12}), + I4 = jit_armv6m_asm:ldr(Temp2, {Reg, 8}), I5 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8 + 4}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index c261ed7564..ded1af1665 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -2741,9 +2741,9 @@ move_to_native_register_test_() -> Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 6e06 ldr r6, [r0, #96] ; 0x60\n" - " 4: 68bd ldr r5, [r7, #8]\n" + " 4: 687d ldr r5, [r7, #4]\n" " 6: 61b5 str r5, [r6, #24]\n" - " 8: 68fd ldr r5, [r7, #12]\n" + " 8: 68bd ldr r5, [r7, #8]\n" " a: 61f5 str r5, [r6, #28]" >>, ?assertEqual(dump_to_bin(Dump), Stream) From f6111a324d92807a669fd4ba7474a189de1f232f Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 16 Sep 2025 23:08:58 +0200 Subject: [PATCH 62/97] armv6m: fix register allocation when calling functions Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 226 ++++++++++++++++++++++------ tests/libs/jit/jit_armv6m_tests.erl | 18 +-- 2 files changed, 185 insertions(+), 59 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index c8f1452e07..b10151affd 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -428,7 +428,11 @@ update_branches( % Keep far branch sequence, calculate correct ldr immediate and update literal % Calculate where the literal should be placed (same logic as generation) - LdrOffset = Offset, + LdrOffset = + case TempReg of + ?IP_REG -> Offset + 2; + _ -> Offset + end, % ldr + add + bx = 6 bytes AfterInstructionsOffset = Offset + 6, AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), @@ -443,12 +447,40 @@ update_branches( % This is the offset from the add instruction's PC to the target % The add instruction is at Offset + 2, so PC = Offset + 2 + 4 = Offset + 6 % We also need to set thumb bit to 1, so eventually we only substract 5. - AddPCOffset = Offset + 5, + % If IP_REG, add is at Offset + 8 + AddPCOffset = + case TempReg of + ?IP_REG -> Offset + 11; + _ -> Offset + 5 + end, % Set thumb bit for bx instruction - target address must be odd for Thumb mode RelativeOffset = LabelOffset - AddPCOffset, - if - Size =:= 12 -> + case {TempReg, Size} of + {?IP_REG, 18} -> + % 18-byte sequence with alignment + I1 = jit_armv6m_asm:push([r0]), + I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + I7 = jit_armv6m_asm:nop(), + I8 = <>, + <>; + {?IP_REG, 16} -> + % 16-byte sequence without alignment + I1 = jit_armv6m_asm:push([r0]), + I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + I7 = <>, + <>; + {_, 12} -> % 12-byte sequence with alignment I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), I2 = jit_armv6m_asm:add(TempReg, pc), @@ -456,8 +488,7 @@ update_branches( I4 = jit_armv6m_asm:nop(), I5 = <>, <>; - % Size =:= 10 - true -> + {_, 10} -> % 10-byte sequence without alignment I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), I2 = jit_armv6m_asm:add(TempReg, pc), @@ -793,8 +824,61 @@ branch_to_label_code( Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; +branch_to_label_code( + #state{available_regs = [], branches = Branches} = State0, Offset, Label, false +) -> + % Calculate alignment for literal pool + LdrOffset = Offset + 2, + % push + ldr + mov + pop + add + bx = 12 bytes + AfterInstructionsOffset = Offset + 12, + % Round up to 4-byte boundary + AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), + PaddingSize = AlignedLiteralOffset - AfterInstructionsOffset, + + % Calculate PC-relative offset for ldr instruction + % For ldr rd, [pc, #imm]: effective address = (PC+4 aligned to 4) + imm + + % PC aligned down + PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), + LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, + + {CodeBlock, SequenceSize} = + if + PaddingSize > 0 -> + % Need alignment padding + I1 = jit_armv6m_asm:push([r0]), + I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + I7 = jit_armv6m_asm:nop(), + % Placeholder offset + I8 = <<0:32/little>>, + Seq = + <>, + {Seq, byte_size(Seq)}; + true -> + % No alignment padding needed + I1 = jit_armv6m_asm:push([r0]), + I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + % Placeholder offset + I7 = <<0:32/little>>, + Seq = + <>, + {Seq, byte_size(Seq)} + end, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, ?IP_REG}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> - error(no_available_registers). + error({no_available_registers, _LabelLookup}). %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally @@ -1342,106 +1426,148 @@ call_func_ptr( Stream1 = push_registers(SavedRegs, StreamModule, Stream0), % Set up arguments following ARM AAPCS calling convention - % Since we pushed registers to stack, those saved registers can now be used as temporaries - - ArgsRegs = lists:flatmap( + % First four args are passed in r0-r4, but 5th and 6th are passed + % on the stack. + {RegArgs, StackArgs} = + case Args of + [Arg1, Arg2, Arg3, Arg4 | StackArgs0] -> {[Arg1, Arg2, Arg3, Arg4], StackArgs0}; + _ -> {Args, []} + end, + RegArgsRegs = lists:flatmap( fun ({free, {ptr, Reg}}) -> [Reg]; ({free, Reg}) when is_atom(Reg) -> [Reg]; (Reg) when is_atom(Reg) -> [Reg]; (_) -> [] end, - Args + RegArgs + ), + StackArgsRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (Reg) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + StackArgs ), - SavedRegsForTemps0 = SavedRegs -- [?CTX_REG, ?NATIVE_INTERFACE_REG] -- ArgsRegs, + + % We pushed registers to stack, so we can use these registers we saved + % and the currently available registers to push values to the stack. + SetArgsPushStackAvailableArgs = (UsedRegs1 -- (RegArgsRegs ++ StackArgsRegs)) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = SetArgsPushStackAvailableArgs, + used_regs = ?AVAILABLE_REGS -- SetArgsPushStackAvailableArgs, + stream = Stream1 + }, + State2 = + case StackArgs of + [] -> State1; + [Arg5] -> set_args_push_stack(State1, Arg5, undefined); + [Arg5, Args6] -> set_args_push_stack(State1, Arg5, Args6) + end, + + SetArgsRegsOnlyAvailableArgs = State2#state.available_regs, ParameterRegs = parameter_regs(Args), - {State1, FuncPtrReg} = + {Stream3, SetArgsAvailableRegs, FuncPtrReg} = case FuncPtrTuple of {free, FuncPtrReg0} -> % If FuncPtrReg is in parameter regs, we must swap it with a free reg. case lists:member(FuncPtrReg0, ParameterRegs) of true -> - [FuncPtrReg1 | _] = SavedRegsForTemps0 -- ArgsRegs, + case SetArgsRegsOnlyAvailableArgs of + [] -> + io:format( + "UsedRegs1 = ~p\nAvailableRegs0 = ~p\nArgs = ~p\nSavedRegs = ~p\nFuncPtrReg0 = ~p\n", + [UsedRegs1, AvailableRegs0, Args, SavedRegs, FuncPtrReg0] + ); + _ -> + ok + end, + [FuncPtrReg1 | _] = SetArgsRegsOnlyAvailableArgs, MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), - SavedRegsForTemps1 = SavedRegsForTemps0 -- [FuncPtrReg1], + SetArgsAvailableArgs1 = + SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1] ++ [FuncPtrReg0], { - State0#state{ - stream = StreamModule:append(Stream1, MovInstr), - available_regs = - SavedRegsForTemps1 ++ [FuncPtrReg0] ++ AvailableRegs0 - }, + StreamModule:append(State2#state.stream, MovInstr), + SetArgsAvailableArgs1, FuncPtrReg1 }; false -> - SavedRegsForTemps1 = SavedRegsForTemps0 -- [FuncPtrReg0], - { - State0#state{ - stream = Stream1, - available_regs = SavedRegsForTemps1 ++ AvailableRegs0 - }, - FuncPtrReg0 - } + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State2#state.stream, SetArgsAvailableArgs1, FuncPtrReg0} end; {primitive, Primitive} -> - [FuncPtrReg0 | _] = - ((SavedRegsForTemps0 ++ AvailableRegs0) -- ArgsRegs) -- ParameterRegs, - SetArgsAvailableRegs = SavedRegsForTemps0 ++ AvailableRegs0 -- [FuncPtrReg0], + [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), - Stream2 = StreamModule:append(Stream1, PrepCall), - {State0#state{stream = Stream2, available_regs = SetArgsAvailableRegs}, FuncPtrReg0} + Stream2 = StreamModule:append(State2#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0} end, - State2 = set_args(State1, Args, length(SavedRegs) * 4), - #state{stream = Stream3} = State2, + State3 = State2#state{ + available_regs = SetArgsAvailableRegs, + used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + stream = Stream3 + }, + + % Exclude argument registers from available_regs to prevent mov_immediate from overwriting them + StackOffset = + case StackArgs of + [] -> length(SavedRegs) * 4; + _ -> length(SavedRegs) * 4 + 8 + end, + State4 = set_args_registers_only(State3, RegArgs, StackOffset), + Stream4 = State4#state.stream, % Call the function pointer (using BLX for call with return) Call = jit_armv6m_asm:blx(FuncPtrReg), - Stream4 = StreamModule:append(Stream3, Call), + Stream5 = StreamModule:append(Stream4, Call), % For result, we need a free register (including FuncPtrReg) but ideally % not the one used for padding. If none are available (all 8 registers % were pushed to the stack), we write the result to the stack position % of FuncPtrReg - {Stream5, UsedRegs2} = + {Stream6, UsedRegs2} = case length(SavedRegs) of 8 when element(1, FuncPtrTuple) =:= free -> % We use FuncPtrReg then as we know it's available. % Calculate stack offset: register number * 4 bytes ResultReg = FuncPtrReg, - StackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, - StoreResult = jit_armv6m_asm:str(r0, {sp, StackOffset}), - {StreamModule:append(Stream4, StoreResult), [ResultReg | UsedRegs1]}; + StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, + StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; 8 when PaddingReg =/= undefined -> % We use PaddingReg then as we know it's available. % Calculate stack offset: register number * 4 bytes ResultReg = PaddingReg, - StackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, - StoreResult = jit_armv6m_asm:str(r0, {sp, StackOffset}), - {StreamModule:append(Stream4, StoreResult), [PaddingReg | UsedRegs1]}; + StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, + StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}), + {StreamModule:append(Stream5, StoreResult), [PaddingReg | UsedRegs1]}; _ -> % Use any free that is not in SavedRegs [ResultReg | _] = AvailableRegs1 -- SavedRegs, MoveResult = jit_armv6m_asm:mov(ResultReg, r0), - {StreamModule:append(Stream4, MoveResult), [ResultReg | UsedRegs1]} + {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} end, % Deallocate stack space if we allocated it for 5+ arguments - Stream6 = + Stream7 = case length(Args) >= 5 of true -> DeallocateArgs = jit_armv6m_asm:add(sp, 8), - StreamModule:append(Stream5, DeallocateArgs); + StreamModule:append(Stream6, DeallocateArgs); false -> - Stream5 + Stream6 end, - Stream7 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream6), + Stream8 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream7), AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), { - State2#state{ - stream = Stream7, + State4#state{ + stream = Stream8, available_regs = AvailableRegs3, used_regs = UsedRegs2 }, diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index ded1af1665..9952e538b2 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -3134,13 +3134,6 @@ alloc_boxed_integer_fragment_large_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -dump_to_bin(Dump) -> - dump_to_bin0(Dump, addr, []). - --define(IS_HEX_DIGIT(C), - ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) -). - %% Test for stack alignment issue in call_func_ptr %% When we have an odd number of saved registers, the stack becomes misaligned %% before the function call, violating ARM AAPCS which requires 8-byte alignment @@ -3257,8 +3250,8 @@ call_func_ptr_register_exhaustion_test_() -> " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" " e: b082 sub sp, #8\n" - " 10: 2101 movs r1, #1\n" - " 12: 9100 str r1, [sp, #0]\n" + " 10: 2401 movs r4, #1\n" + " 12: 9400 str r4, [sp, #0]\n" " 14: 9908 ldr r1, [sp, #32]\n" " 16: 461a mov r2, r3\n" " 18: 460b mov r3, r1\n" @@ -3486,6 +3479,13 @@ add_beam_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> dump_to_bin0(Tail, hex, Acc); dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> From 5eaa412c7b7a2eaa9bdc67e12ed5d507981bd4ff Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 17 Sep 2025 08:19:07 +0200 Subject: [PATCH 63/97] armv6m: fix comment in static assert Signed-off-by: Paul Guyot --- src/libAtomVM/jit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index accf990802..6e4a2b8a11 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -85,7 +85,7 @@ _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_aarch64.erl"); #elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M _Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_armv6m.erl"); -_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x30 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_armv6m.erl"); _Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_armv6m.erl"); _Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_armv6m.erl"); _Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_armv6m.erl"); From aefbd9010531bd71710a97eb4ea31a5835fa2b1e Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 17 Sep 2025 08:57:16 +0200 Subject: [PATCH 64/97] armv6m: fix mvns encoding Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m_asm.erl | 4 ++-- tests/libs/jit/jit_armv6m_asm_tests.erl | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl index 7ad73f311e..6410e03952 100644 --- a/libs/jit/src/jit_armv6m_asm.erl +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -331,8 +331,8 @@ mvns(Rd, Rm) when -> RdNum = reg_to_num(Rd), RmNum = reg_to_num(Rm), - %% Thumb MOVS register: 0000000000mmmdddd - <<(16#43D0 bor (RmNum bsl 3) bor RdNum):16/little>>. + %% Thumb MVNS register: 0100001111mmmdddd + <<(16#43C0 bor (RmNum bsl 3) bor RdNum):16/little>>. %% ARMv6-M Thumb MOV instruction - handle both immediate and register moves -spec mov(arm_gpr_register(), arm_gpr_register() | arm_gpr_register()) -> binary(). diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl index a46e370f36..75c66019cc 100644 --- a/tests/libs/jit/jit_armv6m_asm_tests.erl +++ b/tests/libs/jit/jit_armv6m_asm_tests.erl @@ -597,6 +597,24 @@ bkpt_test_() -> ) ]. +mvns_test_() -> + [ + %% ARMv6-M Thumb MVNS instructions (register only, low registers) + %% MVNS Rd, Rm - bitwise NOT (performs ~Rm -> Rd, sets flags) + ?_assertEqual( + asm(<<16#43e3:16/little>>, "mvns r3, r4"), + jit_armv6m_asm:mvns(r3, r4) + ), + ?_assertEqual( + asm(<<16#43f3:16/little>>, "mvns r3, r6"), + jit_armv6m_asm:mvns(r3, r6) + ), + ?_assertEqual( + asm(<<16#43c8:16/little>>, "mvns r0, r1"), + jit_armv6m_asm:mvns(r0, r1) + ) + ]. + asm(Bin, Str) -> case erlang:system_info(machine) of "ATOM" -> From 63cd6c18204d327d1e6c8817040f74977269429d Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 17 Sep 2025 21:18:10 +0200 Subject: [PATCH 65/97] armv6m: use binutils helper for asm tests Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_asm_tests.erl | 2 - tests/libs/jit/jit_armv6m_asm_tests.erl | 614 ++++++----------------- 2 files changed, 143 insertions(+), 473 deletions(-) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index cf053da995..11c7fe392f 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -20,9 +20,7 @@ -module(jit_aarch64_asm_tests). --ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). --endif. -export([ list_to_integer/1, diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl index 75c66019cc..eefe6781ef 100644 --- a/tests/libs/jit/jit_armv6m_asm_tests.erl +++ b/tests/libs/jit/jit_armv6m_asm_tests.erl @@ -20,125 +20,71 @@ -module(jit_armv6m_asm_tests). --ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). --endif. + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(arm, Bin, Str), Value) +). adds_test_() -> [ - ?_assertEqual( - asm(<<16#3038:16/little>>, "adds r0, #56"), jit_armv6m_asm:adds(r0, 56) - ), - ?_assertEqual( - asm(<<16#3038:16/little>>, "adds r0, r0, #56"), jit_armv6m_asm:adds(r0, r0, 56) - ), - ?_assertEqual( - asm(<<16#3000:16/little>>, "adds r0, #0"), jit_armv6m_asm:adds(r0, 0) - ), - ?_assertEqual( - asm(<<16#3101:16/little>>, "adds r1, #1"), jit_armv6m_asm:adds(r1, 1) - ), - ?_assertEqual( - asm(<<16#1C42:16/little>>, "adds r2, r0, #1"), jit_armv6m_asm:adds(r2, r0, 1) - ), - ?_assertEqual( - asm(<<16#18c9:16/little>>, "adds r1, r1, r3"), jit_armv6m_asm:adds(r1, r1, r3) - ), - ?_assertEqual( - asm(<<16#1850:16/little>>, "adds r0, r2, r1"), jit_armv6m_asm:adds(r0, r2, r1) - ) + ?_assertAsmEqual(<<16#3038:16/little>>, "adds r0, #56", jit_armv6m_asm:adds(r0, 56)), + ?_assertAsmEqual( + <<16#3038:16/little>>, "adds r0, r0, #56", jit_armv6m_asm:adds(r0, r0, 56) + ), + ?_assertAsmEqual(<<16#3000:16/little>>, "adds r0, #0", jit_armv6m_asm:adds(r0, 0)), + ?_assertAsmEqual(<<16#3101:16/little>>, "adds r1, #1", jit_armv6m_asm:adds(r1, 1)), + ?_assertAsmEqual(<<16#1C42:16/little>>, "adds r2, r0, #1", jit_armv6m_asm:adds(r2, r0, 1)), + ?_assertAsmEqual(<<16#18c9:16/little>>, "adds r1, r1, r3", jit_armv6m_asm:adds(r1, r1, r3)), + ?_assertAsmEqual(<<16#1850:16/little>>, "adds r0, r2, r1", jit_armv6m_asm:adds(r0, r2, r1)) ]. add_test_() -> [ %% ARMv6-M Thumb ADD instructions (register, high registers supported) %% ADD Rd, Rm - adds register value to register (supports PC) - ?_assertEqual( - asm(<<16#449f:16/little>>, "add pc, r3"), jit_armv6m_asm:add(pc, r3) - ), - ?_assertEqual( - asm(<<16#4440:16/little>>, "add r0, r8"), jit_armv6m_asm:add(r0, r8) - ), - ?_assertEqual( - asm(<<16#4488:16/little>>, "add r8, r1"), jit_armv6m_asm:add(r8, r1) - ), - ?_assertEqual( - asm(<<16#44c9:16/little>>, "add r9, r9"), jit_armv6m_asm:add(r9, r9) - ), - ?_assertEqual( - asm(<<16#4419:16/little>>, "add r1, r3"), jit_armv6m_asm:add(r1, r3) - ) + ?_assertAsmEqual(<<16#449f:16/little>>, "add pc, r3", jit_armv6m_asm:add(pc, r3)), + ?_assertAsmEqual(<<16#4440:16/little>>, "add r0, r8", jit_armv6m_asm:add(r0, r8)), + ?_assertAsmEqual(<<16#4488:16/little>>, "add r8, r1", jit_armv6m_asm:add(r8, r1)), + ?_assertAsmEqual(<<16#44c9:16/little>>, "add r9, r9", jit_armv6m_asm:add(r9, r9)), + ?_assertAsmEqual(<<16#4419:16/little>>, "add r1, r3", jit_armv6m_asm:add(r1, r3)) ]. subs_test_() -> [ - ?_assertEqual( - asm(<<16#3f38:16/little>>, "subs r7, #56"), jit_armv6m_asm:subs(r7, 56) - ), - ?_assertEqual( - asm(<<16#3f38:16/little>>, "subs r7, r7, #56"), jit_armv6m_asm:subs(r7, r7, 56) + ?_assertAsmEqual(<<16#3f38:16/little>>, "subs r7, #56", jit_armv6m_asm:subs(r7, 56)), + ?_assertAsmEqual( + <<16#3f38:16/little>>, "subs r7, r7, #56", jit_armv6m_asm:subs(r7, r7, 56) ), - ?_assertEqual( - asm(<<16#3800:16/little>>, "subs r0, #0"), jit_armv6m_asm:subs(r0, 0) - ), - ?_assertEqual( - asm(<<16#1e42:16/little>>, "subs r2, r0, #1"), jit_armv6m_asm:subs(r2, r0, 1) - ), - ?_assertEqual( - asm(<<16#1ad1:16/little>>, "subs r1, r2, r3"), jit_armv6m_asm:subs(r1, r2, r3) - ) + ?_assertAsmEqual(<<16#3800:16/little>>, "subs r0, #0", jit_armv6m_asm:subs(r0, 0)), + ?_assertAsmEqual(<<16#1e42:16/little>>, "subs r2, r0, #1", jit_armv6m_asm:subs(r2, r0, 1)), + ?_assertAsmEqual(<<16#1ad1:16/little>>, "subs r1, r2, r3", jit_armv6m_asm:subs(r1, r2, r3)) ]. sub_test_() -> [ - ?_assertEqual( - asm(<<16#B082:16/little>>, "sub sp, #8"), jit_armv6m_asm:sub(sp, 8) - ), - ?_assertEqual( - asm(<<16#B082:16/little>>, "sub sp, sp, #8"), jit_armv6m_asm:sub(sp, sp, 8) - ), - ?_assertEqual( - asm(<<16#B080:16/little>>, "sub sp, #0"), jit_armv6m_asm:sub(sp, 0) - ), - ?_assertEqual( - asm(<<16#B084:16/little>>, "sub sp, #16"), jit_armv6m_asm:sub(sp, 16) - ), - ?_assertEqual( - asm(<<16#B0FF:16/little>>, "sub sp, #508"), jit_armv6m_asm:sub(sp, 508) - ) + ?_assertAsmEqual(<<16#B082:16/little>>, "sub sp, #8", jit_armv6m_asm:sub(sp, 8)), + ?_assertAsmEqual(<<16#B082:16/little>>, "sub sp, sp, #8", jit_armv6m_asm:sub(sp, sp, 8)), + ?_assertAsmEqual(<<16#B080:16/little>>, "sub sp, #0", jit_armv6m_asm:sub(sp, 0)), + ?_assertAsmEqual(<<16#B084:16/little>>, "sub sp, #16", jit_armv6m_asm:sub(sp, 16)), + ?_assertAsmEqual(<<16#B0FF:16/little>>, "sub sp, #508", jit_armv6m_asm:sub(sp, 508)) ]. muls_test_() -> [ - ?_assertEqual( - asm(<<16#4359:16/little>>, "muls r1, r3"), jit_armv6m_asm:muls(r1, r3) - ), - ?_assertEqual( - asm(<<16#4348:16/little>>, "muls r0, r1"), jit_armv6m_asm:muls(r0, r1) - ) + ?_assertAsmEqual(<<16#4359:16/little>>, "muls r1, r3", jit_armv6m_asm:muls(r1, r3)), + ?_assertAsmEqual(<<16#4348:16/little>>, "muls r0, r1", jit_armv6m_asm:muls(r0, r1)) ]. b_test_() -> [ %% Thumb B (unconditional) encoding tests - ARMv6-M 16-bit only - ?_assertEqual( - asm(<<16#E7FE:16/little>>, "b .+0"), jit_armv6m_asm:b(0) - ), - ?_assertEqual( - asm(<<16#E006:16/little>>, "b .+16"), jit_armv6m_asm:b(16) - ), - ?_assertEqual( - asm(<<16#E7DE:16/little>>, "b .-64"), jit_armv6m_asm:b(-64) - ), - ?_assertEqual( - asm(<<16#E000:16/little>>, "b .+4"), jit_armv6m_asm:b(4) - ), - ?_assertEqual( - asm(<<16#E3FF:16/little>>, "b .+2050"), jit_armv6m_asm:b(2050) - ), - ?_assertEqual( - asm(<<16#E400:16/little>>, "b .-2044"), jit_armv6m_asm:b(-2044) - ), + ?_assertAsmEqual(<<16#E7FE:16/little>>, "b .+0", jit_armv6m_asm:b(0)), + ?_assertAsmEqual(<<16#E006:16/little>>, "b .+16", jit_armv6m_asm:b(16)), + ?_assertAsmEqual(<<16#E7DE:16/little>>, "b .-64", jit_armv6m_asm:b(-64)), + ?_assertAsmEqual(<<16#E000:16/little>>, "b .+4", jit_armv6m_asm:b(4)), + ?_assertAsmEqual(<<16#E3FF:16/little>>, "b .+2050", jit_armv6m_asm:b(2050)), + ?_assertAsmEqual(<<16#E400:16/little>>, "b .-2044", jit_armv6m_asm:b(-2044)), %% Test error cases for offsets too large for ARMv6-M ?_assertError({unencodable_offset, 2052}, jit_armv6m_asm:b(2052)), ?_assertError({unencodable_offset, -2046}, jit_armv6m_asm:b(-2046)) @@ -147,60 +93,42 @@ b_test_() -> blx_test_() -> [ %% Thumb BLX (register) encoding tests - ?_assertEqual( - asm(<<16#4780:16/little>>, "blx r0"), jit_armv6m_asm:blx(r0) - ), - ?_assertEqual( - asm(<<16#4788:16/little>>, "blx r1"), jit_armv6m_asm:blx(r1) - ), - ?_assertEqual( - asm(<<16#47E8:16/little>>, "blx r13"), jit_armv6m_asm:blx(r13) - ) + ?_assertAsmEqual(<<16#4780:16/little>>, "blx r0", jit_armv6m_asm:blx(r0)), + ?_assertAsmEqual(<<16#4788:16/little>>, "blx r1", jit_armv6m_asm:blx(r1)), + ?_assertAsmEqual(<<16#47E8:16/little>>, "blx r13", jit_armv6m_asm:blx(r13)) ]. bx_test_() -> [ %% Thumb BX (branch exchange) encoding tests - ?_assertEqual( - asm(<<16#4700:16/little>>, "bx r0"), jit_armv6m_asm:bx(r0) - ), - ?_assertEqual( - asm(<<16#4708:16/little>>, "bx r1"), jit_armv6m_asm:bx(r1) - ), - ?_assertEqual( - asm(<<16#4768:16/little>>, "bx r13"), jit_armv6m_asm:bx(r13) - ) + ?_assertAsmEqual(<<16#4700:16/little>>, "bx r0", jit_armv6m_asm:bx(r0)), + ?_assertAsmEqual(<<16#4708:16/little>>, "bx r1", jit_armv6m_asm:bx(r1)), + ?_assertAsmEqual(<<16#4768:16/little>>, "bx r13", jit_armv6m_asm:bx(r13)) ]. ldr_test_() -> [ %% ARMv6-M Thumb LDR immediate offset (0-124, multiple of 4) - ?_assertEqual( - asm(<<16#6889:16/little>>, "ldr r1, [r1, #8]"), - jit_armv6m_asm:ldr(r1, {r1, 8}) + ?_assertAsmEqual( + <<16#6889:16/little>>, "ldr r1, [r1, #8]", jit_armv6m_asm:ldr(r1, {r1, 8}) ), - ?_assertEqual( - asm(<<16#6982:16/little>>, "ldr r2, [r0, #24]"), - jit_armv6m_asm:ldr(r2, {r0, 24}) + ?_assertAsmEqual( + <<16#6982:16/little>>, "ldr r2, [r0, #24]", jit_armv6m_asm:ldr(r2, {r0, 24}) ), %% SP-relative load (0-1020, multiple of 4) - ?_assertEqual( - asm(<<16#9f00:16/little>>, "ldr r7, [sp, #0]"), - jit_armv6m_asm:ldr(r7, {sp, 0}) + ?_assertAsmEqual( + <<16#9f00:16/little>>, "ldr r7, [sp, #0]", jit_armv6m_asm:ldr(r7, {sp, 0}) ), - ?_assertEqual( - asm(<<16#9801:16/little>>, "ldr r0, [sp, #4]"), - jit_armv6m_asm:ldr(r0, {sp, 4}) + ?_assertAsmEqual( + <<16#9801:16/little>>, "ldr r0, [sp, #4]", jit_armv6m_asm:ldr(r0, {sp, 4}) ), %% PC-relative load (0-1020, multiple of 4) - ?_assertEqual( - asm(<<16#4a18:16/little>>, "ldr r2, [pc, #96]"), - jit_armv6m_asm:ldr(r2, {pc, 96}) + ?_assertAsmEqual( + <<16#4a18:16/little>>, "ldr r2, [pc, #96]", jit_armv6m_asm:ldr(r2, {pc, 96}) ), %% Register offset - ?_assertEqual( - asm(<<16#58d1:16/little>>, "ldr r1, [r2, r3]"), - jit_armv6m_asm:ldr(r1, {r2, r3}) + ?_assertAsmEqual( + <<16#58d1:16/little>>, "ldr r1, [r2, r3]", jit_armv6m_asm:ldr(r1, {r2, r3}) ) ]. @@ -208,293 +136,141 @@ movs_test_() -> [ %% ARMv6-M Thumb MOVS instructions (sets flags) %% MOVS immediate (8-bit only, 0-255) - ?_assertEqual( - asm(<<16#2000:16/little>>, "movs r0, #0"), - jit_armv6m_asm:movs(r0, 0) - ), - ?_assertEqual( - asm(<<16#2101:16/little>>, "movs r1, #1"), - jit_armv6m_asm:movs(r1, 1) - ), - ?_assertEqual( - asm(<<16#22ff:16/little>>, "movs r2, #255"), - jit_armv6m_asm:movs(r2, 255) - ), + ?_assertAsmEqual(<<16#2000:16/little>>, "movs r0, #0", jit_armv6m_asm:movs(r0, 0)), + ?_assertAsmEqual(<<16#2101:16/little>>, "movs r1, #1", jit_armv6m_asm:movs(r1, 1)), + ?_assertAsmEqual(<<16#22ff:16/little>>, "movs r2, #255", jit_armv6m_asm:movs(r2, 255)), %% MOVS register - low registers only (r0-r7) - ?_assertEqual( - asm(<<16#0008:16/little>>, "movs r0, r1"), - jit_armv6m_asm:movs(r0, r1) - ), - ?_assertEqual( - asm(<<16#001a:16/little>>, "movs r2, r3"), - jit_armv6m_asm:movs(r2, r3) - ) + ?_assertAsmEqual(<<16#0008:16/little>>, "movs r0, r1", jit_armv6m_asm:movs(r0, r1)), + ?_assertAsmEqual(<<16#001a:16/little>>, "movs r2, r3", jit_armv6m_asm:movs(r2, r3)) ]. mov_test_() -> [ %% ARMv6-M Thumb MOV instructions (no flags, for high registers) %% MOV register - requires at least one high register (r8-r15) - ?_assertEqual( - asm(<<16#4680:16/little>>, "mov r8, r0"), - jit_armv6m_asm:mov(r8, r0) - ), - ?_assertEqual( - asm(<<16#4640:16/little>>, "mov r0, r8"), - jit_armv6m_asm:mov(r0, r8) - ), - ?_assertEqual( - asm(<<16#46c8:16/little>>, "mov r8, r9"), - jit_armv6m_asm:mov(r8, r9) - ), - ?_assertEqual( - asm(<<16#46c0:16/little>>, "mov r8, r8"), - jit_armv6m_asm:mov(r8, r8) - ), - ?_assertEqual( - asm(<<16#4619:16/little>>, "mov r1, r3"), - jit_armv6m_asm:mov(r1, r3) - ), - ?_assertEqual( - asm(<<16#46c0:16/little>>, "nop"), - jit_armv6m_asm:nop() - ) + ?_assertAsmEqual(<<16#4680:16/little>>, "mov r8, r0", jit_armv6m_asm:mov(r8, r0)), + ?_assertAsmEqual(<<16#4640:16/little>>, "mov r0, r8", jit_armv6m_asm:mov(r0, r8)), + ?_assertAsmEqual(<<16#46c8:16/little>>, "mov r8, r9", jit_armv6m_asm:mov(r8, r9)), + ?_assertAsmEqual(<<16#46c0:16/little>>, "mov r8, r8", jit_armv6m_asm:mov(r8, r8)), + ?_assertAsmEqual(<<16#4619:16/little>>, "mov r1, r3", jit_armv6m_asm:mov(r1, r3)), + ?_assertAsmEqual(<<16#46c0:16/little>>, "nop", jit_armv6m_asm:nop()) ]. str_test_() -> [ %% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4) - ?_assertEqual( - asm(<<16#6089:16/little>>, "str r1, [r1, #8]"), - jit_armv6m_asm:str(r1, {r1, 8}) + ?_assertAsmEqual( + <<16#6089:16/little>>, "str r1, [r1, #8]", jit_armv6m_asm:str(r1, {r1, 8}) ), - ?_assertEqual( - asm(<<16#6182:16/little>>, "str r2, [r0, #24]"), - jit_armv6m_asm:str(r2, {r0, 24}) + ?_assertAsmEqual( + <<16#6182:16/little>>, "str r2, [r0, #24]", jit_armv6m_asm:str(r2, {r0, 24}) ), %% SP-relative store (0-1020, multiple of 4) - ?_assertEqual( - asm(<<16#9700:16/little>>, "str r7, [sp, #0]"), - jit_armv6m_asm:str(r7, {sp, 0}) + ?_assertAsmEqual( + <<16#9700:16/little>>, "str r7, [sp, #0]", jit_armv6m_asm:str(r7, {sp, 0}) ), - ?_assertEqual( - asm(<<16#9001:16/little>>, "str r0, [sp, #4]"), - jit_armv6m_asm:str(r0, {sp, 4}) + ?_assertAsmEqual( + <<16#9001:16/little>>, "str r0, [sp, #4]", jit_armv6m_asm:str(r0, {sp, 4}) ), %% Register offset - ?_assertEqual( - asm(<<16#50d1:16/little>>, "str r1, [r2, r3]"), - jit_armv6m_asm:str(r1, {r2, r3}) + ?_assertAsmEqual( + <<16#50d1:16/little>>, "str r1, [r2, r3]", jit_armv6m_asm:str(r1, {r2, r3}) ) ]. cmp_test_() -> [ %% ARMv6-M Thumb CMP register (low registers only) - ?_assertEqual( - asm(<<16#4288:16/little>>, "cmp r0, r1"), - jit_armv6m_asm:cmp(r0, r1) - ), - ?_assertEqual( - asm(<<16#42bb:16/little>>, "cmp r3, r7"), - jit_armv6m_asm:cmp(r3, r7) - ), + ?_assertAsmEqual(<<16#4288:16/little>>, "cmp r0, r1", jit_armv6m_asm:cmp(r0, r1)), + ?_assertAsmEqual(<<16#42bb:16/little>>, "cmp r3, r7", jit_armv6m_asm:cmp(r3, r7)), %% ARMv6-M Thumb CMP immediate (8-bit, 0-255, low registers only) - ?_assertEqual( - asm(<<16#2800:16/little>>, "cmp r0, #0"), - jit_armv6m_asm:cmp(r0, 0) - ), - ?_assertEqual( - asm(<<16#2805:16/little>>, "cmp r0, #5"), - jit_armv6m_asm:cmp(r0, 5) - ), - ?_assertEqual( - asm(<<16#2fff:16/little>>, "cmp r7, #255"), - jit_armv6m_asm:cmp(r7, 255) - ) + ?_assertAsmEqual(<<16#2800:16/little>>, "cmp r0, #0", jit_armv6m_asm:cmp(r0, 0)), + ?_assertAsmEqual(<<16#2805:16/little>>, "cmp r0, #5", jit_armv6m_asm:cmp(r0, 5)), + ?_assertAsmEqual(<<16#2fff:16/little>>, "cmp r7, #255", jit_armv6m_asm:cmp(r7, 255)) ]. ands_test_() -> [ %% ARMv6-M Thumb ANDS register (2-operand: Rd = Rd AND Rm) - ?_assertEqual( - asm(<<16#4008:16/little>>, "ands r0, r1"), - jit_armv6m_asm:ands(r0, r1) - ), - ?_assertEqual( - asm(<<16#4011:16/little>>, "ands r1, r2"), - jit_armv6m_asm:ands(r1, r2) - ), - ?_assertEqual( - asm(<<16#401a:16/little>>, "ands r2, r3"), - jit_armv6m_asm:ands(r2, r3) - ) + ?_assertAsmEqual(<<16#4008:16/little>>, "ands r0, r1", jit_armv6m_asm:ands(r0, r1)), + ?_assertAsmEqual(<<16#4011:16/little>>, "ands r1, r2", jit_armv6m_asm:ands(r1, r2)), + ?_assertAsmEqual(<<16#401a:16/little>>, "ands r2, r3", jit_armv6m_asm:ands(r2, r3)) ]. orrs_test_() -> [ %% ARMv6-M Thumb ORRS register (2-operand: Rd = Rd OR Rm, sets flags) - ?_assertEqual( - asm(<<16#4308:16/little>>, "orrs r0, r1"), - jit_armv6m_asm:orrs(r0, r1) - ), - ?_assertEqual( - asm(<<16#4311:16/little>>, "orrs r1, r2"), - jit_armv6m_asm:orrs(r1, r2) - ), - ?_assertEqual( - asm(<<16#431a:16/little>>, "orrs r2, r3"), - jit_armv6m_asm:orrs(r2, r3) - ) + ?_assertAsmEqual(<<16#4308:16/little>>, "orrs r0, r1", jit_armv6m_asm:orrs(r0, r1)), + ?_assertAsmEqual(<<16#4311:16/little>>, "orrs r1, r2", jit_armv6m_asm:orrs(r1, r2)), + ?_assertAsmEqual(<<16#431a:16/little>>, "orrs r2, r3", jit_armv6m_asm:orrs(r2, r3)) ]. bics_test_() -> [ - ?_assertEqual( - asm(<<16#4391:16/little>>, "bics r1, r2"), - jit_armv6m_asm:bics(r1, r2) - ), - ?_assertEqual( - asm(<<16#43a3:16/little>>, "bics r3, r4"), - jit_armv6m_asm:bics(r3, r4) - ) + ?_assertAsmEqual(<<16#4391:16/little>>, "bics r1, r2", jit_armv6m_asm:bics(r1, r2)), + ?_assertAsmEqual(<<16#43a3:16/little>>, "bics r3, r4", jit_armv6m_asm:bics(r3, r4)) ]. negs_test_() -> [ - ?_assertEqual( - asm(<<16#4251:16/little>>, "negs r1, r2"), - jit_armv6m_asm:negs(r1, r2) - ), - ?_assertEqual( - asm(<<16#4263:16/little>>, "negs r3, r4"), - jit_armv6m_asm:negs(r3, r4) - ) + ?_assertAsmEqual(<<16#4251:16/little>>, "negs r1, r2", jit_armv6m_asm:negs(r1, r2)), + ?_assertAsmEqual(<<16#4263:16/little>>, "negs r3, r4", jit_armv6m_asm:negs(r3, r4)) ]. rsbs_test_() -> [ - ?_assertEqual( - asm(<<16#4251:16/little>>, "rsbs r1, r2, 0"), - jit_armv6m_asm:rsbs(r1, r2, 0) - ), - ?_assertEqual( - asm(<<16#4263:16/little>>, "rsbs r3, r4, 0"), - jit_armv6m_asm:rsbs(r3, r4, 0) - ) + ?_assertAsmEqual(<<16#4251:16/little>>, "rsbs r1, r2, 0", jit_armv6m_asm:rsbs(r1, r2, 0)), + ?_assertAsmEqual(<<16#4263:16/little>>, "rsbs r3, r4, 0", jit_armv6m_asm:rsbs(r3, r4, 0)) ]. lsls_test_() -> [ %% ARMv6-M Thumb LSLS immediate shift (1-31) - ?_assertEqual( - asm(<<16#0148:16/little>>, "lsls r0, r1, #5"), - jit_armv6m_asm:lsls(r0, r1, 5) - ), - ?_assertEqual( - asm(<<16#0212:16/little>>, "lsls r2, r2, #8"), - jit_armv6m_asm:lsls(r2, r2, 8) - ), + ?_assertAsmEqual(<<16#0148:16/little>>, "lsls r0, r1, #5", jit_armv6m_asm:lsls(r0, r1, 5)), + ?_assertAsmEqual(<<16#0212:16/little>>, "lsls r2, r2, #8", jit_armv6m_asm:lsls(r2, r2, 8)), %% LSLS register shift - ?_assertEqual( - asm(<<16#409a:16/little>>, "lsls r2, r3"), - jit_armv6m_asm:lsls(r2, r3) - ) + ?_assertAsmEqual(<<16#409a:16/little>>, "lsls r2, r3", jit_armv6m_asm:lsls(r2, r3)) ]. lsrs_test_() -> [ %% ARMv6-M Thumb LSRS immediate shift (1-32) - ?_assertEqual( - asm(<<16#0948:16/little>>, "lsrs r0, r1, #5"), - jit_armv6m_asm:lsrs(r0, r1, 5) - ), - ?_assertEqual( - asm(<<16#0a12:16/little>>, "lsrs r2, r2, #8"), - jit_armv6m_asm:lsrs(r2, r2, 8) - ), + ?_assertAsmEqual(<<16#0948:16/little>>, "lsrs r0, r1, #5", jit_armv6m_asm:lsrs(r0, r1, 5)), + ?_assertAsmEqual(<<16#0a12:16/little>>, "lsrs r2, r2, #8", jit_armv6m_asm:lsrs(r2, r2, 8)), %% LSRS register shift - ?_assertEqual( - asm(<<16#40da:16/little>>, "lsrs r2, r3"), - jit_armv6m_asm:lsrs(r2, r3) - ) + ?_assertAsmEqual(<<16#40da:16/little>>, "lsrs r2, r3", jit_armv6m_asm:lsrs(r2, r3)) ]. tst_test_() -> [ %% ARMv6-M Thumb TST instructions (register only, low registers) %% TST Rn, Rm - test bits (performs Rn & Rm, updates flags) - ?_assertEqual( - asm(<<16#4208:16/little>>, "tst r0, r1"), - jit_armv6m_asm:tst(r0, r1) - ), - ?_assertEqual( - asm(<<16#421a:16/little>>, "tst r2, r3"), - jit_armv6m_asm:tst(r2, r3) - ), - ?_assertEqual( - asm(<<16#4239:16/little>>, "tst r1, r7"), - jit_armv6m_asm:tst(r1, r7) - ) + ?_assertAsmEqual(<<16#4208:16/little>>, "tst r0, r1", jit_armv6m_asm:tst(r0, r1)), + ?_assertAsmEqual(<<16#421a:16/little>>, "tst r2, r3", jit_armv6m_asm:tst(r2, r3)), + ?_assertAsmEqual(<<16#4239:16/little>>, "tst r1, r7", jit_armv6m_asm:tst(r1, r7)) ]. bcc_test_() -> [ %% Thumb conditional branch encoding tests - ARMv6-M 16-bit only - ?_assertEqual( - asm(<<16#D0FE:16/little>>, "beq .+0"), jit_armv6m_asm:bcc(eq, 0) - ), - ?_assertEqual( - asm(<<16#D1FE:16/little>>, "bne .+0"), jit_armv6m_asm:bcc(ne, 0) - ), - ?_assertEqual( - asm(<<16#D1DE:16/little>>, "bne .-64"), jit_armv6m_asm:bcc(ne, -64) - ), - ?_assertEqual( - asm(<<16#D03E:16/little>>, "beq .+128"), jit_armv6m_asm:bcc(eq, 128) - ), - ?_assertEqual( - asm(<<16#D23E:16/little>>, "bcs .+128"), jit_armv6m_asm:bcc(cs, 128) - ), - ?_assertEqual( - asm(<<16#D33E:16/little>>, "bcc .+128"), jit_armv6m_asm:bcc(cc, 128) - ), - ?_assertEqual( - asm(<<16#D43E:16/little>>, "bmi .+128"), jit_armv6m_asm:bcc(mi, 128) - ), - ?_assertEqual( - asm(<<16#D53E:16/little>>, "bpl .+128"), jit_armv6m_asm:bcc(pl, 128) - ), - ?_assertEqual( - asm(<<16#D63E:16/little>>, "bvs .+128"), jit_armv6m_asm:bcc(vs, 128) - ), - ?_assertEqual( - asm(<<16#D83E:16/little>>, "bhi .+128"), jit_armv6m_asm:bcc(hi, 128) - ), - ?_assertEqual( - asm(<<16#D93E:16/little>>, "bls .+128"), jit_armv6m_asm:bcc(ls, 128) - ), - ?_assertEqual( - asm(<<16#DA3E:16/little>>, "bge .+128"), jit_armv6m_asm:bcc(ge, 128) - ), - ?_assertEqual( - asm(<<16#DB3E:16/little>>, "blt .+128"), jit_armv6m_asm:bcc(lt, 128) - ), - ?_assertEqual( - asm(<<16#DC3E:16/little>>, "bgt .+128"), jit_armv6m_asm:bcc(gt, 128) - ), - ?_assertEqual( - asm(<<16#DD3E:16/little>>, "ble .+128"), jit_armv6m_asm:bcc(le, 128) - ), - ?_assertEqual( - asm(<<16#E03E:16/little>>, "bal .+128"), jit_armv6m_asm:bcc(al, 128) - ), - ?_assertEqual( - asm(<<16#D07F:16/little>>, "beq .+258"), jit_armv6m_asm:bcc(eq, 258) - ), - ?_assertEqual( - asm(<<16#D180:16/little>>, "bne .-252"), jit_armv6m_asm:bcc(ne, -252) - ), + ?_assertAsmEqual(<<16#D0FE:16/little>>, "beq .+0", jit_armv6m_asm:bcc(eq, 0)), + ?_assertAsmEqual(<<16#D1FE:16/little>>, "bne .+0", jit_armv6m_asm:bcc(ne, 0)), + ?_assertAsmEqual(<<16#D1DE:16/little>>, "bne .-64", jit_armv6m_asm:bcc(ne, -64)), + ?_assertAsmEqual(<<16#D03E:16/little>>, "beq .+128", jit_armv6m_asm:bcc(eq, 128)), + ?_assertAsmEqual(<<16#D23E:16/little>>, "bcs .+128", jit_armv6m_asm:bcc(cs, 128)), + ?_assertAsmEqual(<<16#D33E:16/little>>, "bcc .+128", jit_armv6m_asm:bcc(cc, 128)), + ?_assertAsmEqual(<<16#D43E:16/little>>, "bmi .+128", jit_armv6m_asm:bcc(mi, 128)), + ?_assertAsmEqual(<<16#D53E:16/little>>, "bpl .+128", jit_armv6m_asm:bcc(pl, 128)), + ?_assertAsmEqual(<<16#D63E:16/little>>, "bvs .+128", jit_armv6m_asm:bcc(vs, 128)), + ?_assertAsmEqual(<<16#D83E:16/little>>, "bhi .+128", jit_armv6m_asm:bcc(hi, 128)), + ?_assertAsmEqual(<<16#D93E:16/little>>, "bls .+128", jit_armv6m_asm:bcc(ls, 128)), + ?_assertAsmEqual(<<16#DA3E:16/little>>, "bge .+128", jit_armv6m_asm:bcc(ge, 128)), + ?_assertAsmEqual(<<16#DB3E:16/little>>, "blt .+128", jit_armv6m_asm:bcc(lt, 128)), + ?_assertAsmEqual(<<16#DC3E:16/little>>, "bgt .+128", jit_armv6m_asm:bcc(gt, 128)), + ?_assertAsmEqual(<<16#DD3E:16/little>>, "ble .+128", jit_armv6m_asm:bcc(le, 128)), + ?_assertAsmEqual(<<16#E03E:16/little>>, "bal .+128", jit_armv6m_asm:bcc(al, 128)), + ?_assertAsmEqual(<<16#D07F:16/little>>, "beq .+258", jit_armv6m_asm:bcc(eq, 258)), + ?_assertAsmEqual(<<16#D180:16/little>>, "bne .-252", jit_armv6m_asm:bcc(ne, -252)), %% Test error cases for offsets too large for ARMv6-M ?_assertError({unencodable_offset, 260}, jit_armv6m_asm:bcc(eq, 260)), ?_assertError({unencodable_offset, -254}, jit_armv6m_asm:bcc(ne, -254)) @@ -505,51 +281,28 @@ adr_test_() -> %% ARMv6-M Thumb ADR (PC-relative address) - implemented as ADD Rd, PC, #imm %% adr(Rd, N) means "Rd = current_PC + N" where PC is instruction address %% Range: 4-1024, must be multiple of 4 - ?_assertEqual( - asm(<<16#a000:16/little>>, "adr r0, .+4"), - jit_armv6m_asm:adr(r0, 4) - ), - ?_assertEqual( - asm(<<16#a101:16/little>>, "adr r1, .+8"), - jit_armv6m_asm:adr(r1, 8) - ), - ?_assertEqual( - asm(<<16#a202:16/little>>, "adr r2, .+12"), - jit_armv6m_asm:adr(r2, 12) - ), - ?_assertEqual( - asm(<<16#a708:16/little>>, "adr r7, .+36"), - jit_armv6m_asm:adr(r7, 36) - ), + ?_assertAsmEqual(<<16#a000:16/little>>, "adr r0, .+4", jit_armv6m_asm:adr(r0, 4)), + ?_assertAsmEqual(<<16#a101:16/little>>, "adr r1, .+8", jit_armv6m_asm:adr(r1, 8)), + ?_assertAsmEqual(<<16#a202:16/little>>, "adr r2, .+12", jit_armv6m_asm:adr(r2, 12)), + ?_assertAsmEqual(<<16#a708:16/little>>, "adr r7, .+36", jit_armv6m_asm:adr(r7, 36)), %% Test maximum offset value (1024 bytes) - ?_assertEqual( - asm(<<16#a0ff:16/little>>, "adr r0, .+1024"), - jit_armv6m_asm:adr(r0, 1024) - ) + ?_assertAsmEqual(<<16#a0ff:16/little>>, "adr r0, .+1024", jit_armv6m_asm:adr(r0, 1024)) ]. push_test_() -> [ %% ARMv6-M Thumb PUSH instruction (low registers + optional LR) %% Single register push - ?_assertEqual( - asm(<<16#b401:16/little>>, "push {r0}"), - jit_armv6m_asm:push([r0]) - ), + ?_assertAsmEqual(<<16#b401:16/little>>, "push {r0}", jit_armv6m_asm:push([r0])), %% Multiple register push - ?_assertEqual( - asm(<<16#b407:16/little>>, "push {r0, r1, r2}"), - jit_armv6m_asm:push([r0, r1, r2]) + ?_assertAsmEqual( + <<16#b407:16/little>>, "push {r0, r1, r2}", jit_armv6m_asm:push([r0, r1, r2]) ), %% Push with LR - ?_assertEqual( - asm(<<16#b500:16/little>>, "push {lr}"), - jit_armv6m_asm:push([lr]) - ), + ?_assertAsmEqual(<<16#b500:16/little>>, "push {lr}", jit_armv6m_asm:push([lr])), %% Push registers + LR - ?_assertEqual( - asm(<<16#b507:16/little>>, "push {r0, r1, r2, lr}"), - jit_armv6m_asm:push([r0, r1, r2, lr]) + ?_assertAsmEqual( + <<16#b507:16/little>>, "push {r0, r1, r2, lr}", jit_armv6m_asm:push([r0, r1, r2, lr]) ) ]. @@ -557,115 +310,34 @@ pop_test_() -> [ %% ARMv6-M Thumb POP instruction (low registers + optional PC) %% Single register pop - ?_assertEqual( - asm(<<16#bc01:16/little>>, "pop {r0}"), - jit_armv6m_asm:pop([r0]) - ), + ?_assertAsmEqual(<<16#bc01:16/little>>, "pop {r0}", jit_armv6m_asm:pop([r0])), %% Multiple register pop - ?_assertEqual( - asm(<<16#bc07:16/little>>, "pop {r0, r1, r2}"), - jit_armv6m_asm:pop([r0, r1, r2]) + ?_assertAsmEqual( + <<16#bc07:16/little>>, "pop {r0, r1, r2}", jit_armv6m_asm:pop([r0, r1, r2]) ), %% Pop with PC - ?_assertEqual( - asm(<<16#bd00:16/little>>, "pop {pc}"), - jit_armv6m_asm:pop([pc]) - ), + ?_assertAsmEqual(<<16#bd00:16/little>>, "pop {pc}", jit_armv6m_asm:pop([pc])), %% Pop registers + PC - ?_assertEqual( - asm(<<16#bd07:16/little>>, "pop {r0, r1, r2, pc}"), - jit_armv6m_asm:pop([r0, r1, r2, pc]) + ?_assertAsmEqual( + <<16#bd07:16/little>>, "pop {r0, r1, r2, pc}", jit_armv6m_asm:pop([r0, r1, r2, pc]) ) ]. bkpt_test_() -> [ %% BKPT #0 - ?_assertEqual( - asm(<<16#be00:16/little>>, "bkpt #0"), - jit_armv6m_asm:bkpt(0) - ), + ?_assertAsmEqual(<<16#be00:16/little>>, "bkpt #0", jit_armv6m_asm:bkpt(0)), %% BKPT #1 - ?_assertEqual( - asm(<<16#be01:16/little>>, "bkpt #1"), - jit_armv6m_asm:bkpt(1) - ), + ?_assertAsmEqual(<<16#be01:16/little>>, "bkpt #1", jit_armv6m_asm:bkpt(1)), %% BKPT #255 - ?_assertEqual( - asm(<<16#beff:16/little>>, "bkpt #255"), - jit_armv6m_asm:bkpt(255) - ) + ?_assertAsmEqual(<<16#beff:16/little>>, "bkpt #255", jit_armv6m_asm:bkpt(255)) ]. mvns_test_() -> [ %% ARMv6-M Thumb MVNS instructions (register only, low registers) %% MVNS Rd, Rm - bitwise NOT (performs ~Rm -> Rd, sets flags) - ?_assertEqual( - asm(<<16#43e3:16/little>>, "mvns r3, r4"), - jit_armv6m_asm:mvns(r3, r4) - ), - ?_assertEqual( - asm(<<16#43f3:16/little>>, "mvns r3, r6"), - jit_armv6m_asm:mvns(r3, r6) - ), - ?_assertEqual( - asm(<<16#43c8:16/little>>, "mvns r0, r1"), - jit_armv6m_asm:mvns(r0, r1) - ) + ?_assertAsmEqual(<<16#43e3:16/little>>, "mvns r3, r4", jit_armv6m_asm:mvns(r3, r4)), + ?_assertAsmEqual(<<16#43f3:16/little>>, "mvns r3, r6", jit_armv6m_asm:mvns(r3, r6)), + ?_assertAsmEqual(<<16#43c8:16/little>>, "mvns r0, r1", jit_armv6m_asm:mvns(r0, r1)) ]. - -asm(Bin, Str) -> - case erlang:system_info(machine) of - "ATOM" -> - Bin; - "BEAM" -> - case os:cmd("which arm-elf-as") of - [] -> - Bin; - _ -> - ok = file:write_file( - "test.S", ".arch armv6-m\n.thumb\n.syntax unified\n" ++ Str ++ "\n" - ), - Dump = os:cmd( - "arm-elf-as -c test.S -o test.o && arm-elf-objdump -j .text -D test.o" - ), - DumpBin = list_to_binary(Dump), - DumpLines = binary:split(DumpBin, <<"\n">>, [global]), - AsmBin = asm_lines(DumpLines, <<>>), - if - AsmBin =:= Bin -> - ok; - true -> - io:format( - "-------------------------------------------\n" - "~s\n" - "-------------------------------------------\n", - [Dump] - ) - end, - ?assertEqual(AsmBin, Bin), - Bin - end - end. - -asm_lines([<<" ", Tail/binary>> | T], Acc) -> - [_Offset, HexStr0] = binary:split(Tail, <<":\t">>), - [HexStr, _] = binary:split(HexStr0, <<"\t">>), - AssembledBin = hex_to_bin(HexStr, <<>>), - asm_lines(T, <>); -asm_lines([_OtherLine | T], Acc) -> - asm_lines(T, Acc); -asm_lines([], Acc) -> - Acc. - -hex_to_bin(<<>>, Acc) -> - Acc; -hex_to_bin(<<" ", Tail/binary>>, Acc) -> - hex_to_bin(Tail, Acc); -hex_to_bin(HexStr, Acc) -> - [HexChunk, Rest] = binary:split(HexStr, <<" ">>), - NumBits = byte_size(HexChunk) * 4, - HexVal = binary_to_integer(HexChunk, 16), - NewAcc = <>, - hex_to_bin(Rest, NewAcc). From 2998842f2fcee0b36945ab0f8291b8ff5f13a7d4 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 19 Sep 2025 07:08:42 +0200 Subject: [PATCH 66/97] armv6m: fix register parameters Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 388 +++++++++++----------------- tests/libs/jit/jit_armv6m_tests.erl | 59 +++-- 2 files changed, 198 insertions(+), 249 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b10151affd..b4b3e938a1 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -194,6 +194,9 @@ -define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). -define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). -define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). %% ARMv6-M register allocation: %% - r0: context pointer (reserved) @@ -599,42 +602,51 @@ call_primitive_last( PrepCall = load_primitive_ptr(Primitive, Temp), Stream1 = StreamModule:append(Stream0, PrepCall), + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + + % Preprocess offset special arg + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + % Handle arguments differently for 5+ arguments - use direct call without register preservation - case length(Args) of - NumArgs when NumArgs >= 5 -> - % For 5+ args, call directly without preserving registers since we return immediately - State1 = set_args( - State0#state{ - stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs - }, - Args, - 0 - ), - #state{stream = Stream2} = State1, - % Call the function pointer directly - Call = jit_armv6m_asm:blx(Temp), - Stream3 = StreamModule:append(Stream2, Call), - % Deallocate stack space that was allocated for 5+ arguments - DeallocateArgs = jit_armv6m_asm:add(sp, sp, 8), - Stream4 = StreamModule:append(Stream3, DeallocateArgs), - % Return: pop prolog registers and return - PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), - Stream5 = StreamModule:append(Stream4, PopCode), - State3 = State1#state{stream = Stream5}; - _ -> - % For 4 or fewer args, use tail call - [FirstArg, jit_state | ArgsT] = Args, - ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], - State1 = set_args( - State0#state{ - stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs - }, - ArgsForTailCall, - 0 - ), - State3 = tail_call_with_jit_state_registers_only(State1, Temp) - end, - State3#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + State4 = + case Args1 of + [Arg1, Arg2, Arg3, Arg4, Arg5 | Arg6L] -> + State2 = + case Arg6L of + [Arg6] -> + set_stack_args(State1, Arg5, Arg6); + [] -> + set_stack_args(State1, Arg5, undefined) + end, + State3 = set_registers_args(State2, [Arg1, Arg2, Arg3, Arg4], 8), + #state{stream = Stream2} = State3, + % Call the function pointer directly + Call = jit_armv6m_asm:blx(Temp), + Stream3 = StreamModule:append(Stream2, Call), + % Deallocate stack space that was allocated for 5+ arguments + DeallocateArgs = jit_armv6m_asm:add(sp, sp, 8), + Stream4 = StreamModule:append(Stream3, DeallocateArgs), + % Return: pop prolog registers and return + PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + Stream5 = StreamModule:append(Stream4, PopCode), + State3#state{stream = Stream5}; + [FirstArg, jit_state | ArgsT] -> + % For 4 or fewer args, use tail call + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + State2 = set_registers_args(State1, ArgsForTailCall, 0), + tail_call_with_jit_state_registers_only(State2, Temp) + end, + State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. %%----------------------------------------------------------------------------- %% @doc Tail call to address in register, restoring prolog registers including @@ -1428,29 +1440,22 @@ call_func_ptr( % Set up arguments following ARM AAPCS calling convention % First four args are passed in r0-r4, but 5th and 6th are passed % on the stack. + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), {RegArgs, StackArgs} = - case Args of + case Args1 of [Arg1, Arg2, Arg3, Arg4 | StackArgs0] -> {[Arg1, Arg2, Arg3, Arg4], StackArgs0}; _ -> {Args, []} end, - RegArgsRegs = lists:flatmap( - fun - ({free, {ptr, Reg}}) -> [Reg]; - ({free, Reg}) when is_atom(Reg) -> [Reg]; - (Reg) when is_atom(Reg) -> [Reg]; - (_) -> [] - end, - RegArgs - ), - StackArgsRegs = lists:flatmap( - fun - ({free, {ptr, Reg}}) -> [Reg]; - ({free, Reg}) when is_atom(Reg) -> [Reg]; - (Reg) when is_atom(Reg) -> [Reg]; - (_) -> [] - end, - StackArgs - ), + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs), + StackArgsRegs = lists:flatmap(fun arg_to_reg_list/1, StackArgs), % We pushed registers to stack, so we can use these registers we saved % and the currently available registers to push values to the stack. @@ -1463,31 +1468,22 @@ call_func_ptr( State2 = case StackArgs of [] -> State1; - [Arg5] -> set_args_push_stack(State1, Arg5, undefined); - [Arg5, Args6] -> set_args_push_stack(State1, Arg5, Args6) + [Arg5] -> set_stack_args(State1, Arg5, undefined); + [Arg5, Args6] -> set_stack_args(State1, Arg5, Args6) end, SetArgsRegsOnlyAvailableArgs = State2#state.available_regs, - ParameterRegs = parameter_regs(Args), + ParameterRegs = parameter_regs(RegArgs), {Stream3, SetArgsAvailableRegs, FuncPtrReg} = case FuncPtrTuple of {free, FuncPtrReg0} -> % If FuncPtrReg is in parameter regs, we must swap it with a free reg. case lists:member(FuncPtrReg0, ParameterRegs) of true -> - case SetArgsRegsOnlyAvailableArgs of - [] -> - io:format( - "UsedRegs1 = ~p\nAvailableRegs0 = ~p\nArgs = ~p\nSavedRegs = ~p\nFuncPtrReg0 = ~p\n", - [UsedRegs1, AvailableRegs0, Args, SavedRegs, FuncPtrReg0] - ); - _ -> - ok - end, [FuncPtrReg1 | _] = SetArgsRegsOnlyAvailableArgs, MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), SetArgsAvailableArgs1 = - SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1] ++ [FuncPtrReg0], + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ [FuncPtrReg0], { StreamModule:append(State2#state.stream, MovInstr), SetArgsAvailableArgs1, @@ -1511,13 +1507,12 @@ call_func_ptr( stream = Stream3 }, - % Exclude argument registers from available_regs to prevent mov_immediate from overwriting them StackOffset = case StackArgs of [] -> length(SavedRegs) * 4; _ -> length(SavedRegs) * 4 + 8 end, - State4 = set_args_registers_only(State3, RegArgs, StackOffset), + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), Stream4 = State4#state.stream, % Call the function pointer (using BLX for call with return) @@ -1574,6 +1569,11 @@ call_func_ptr( ResultReg }. +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + push_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> StreamModule:append(Stream0, jit_armv6m_asm:push(SavedRegs)); push_registers([], _StreamModule, Stream0) -> @@ -1585,27 +1585,10 @@ pop_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> pop_registers([], _StreamModule, Stream0) -> Stream0. --spec set_args(state(), [arg()], non_neg_integer()) -> state(). -% Handle 5 parameters: handle 5th on stack first, then first 4 in registers r0-r3 -set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5], StackOffset) -> - % Handle 5th argument on stack first (with alignment) - this may free registers - State1 = set_args_push_stack(State, Arg5, undefined), - % Then set up first 4 arguments in registers using existing logic - set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset + 8); -% Handle 6 parameters: handle 5th and 6th on stack first, then first 4 in registers r0-r3 -set_args(State, [Arg1, Arg2, Arg3, Arg4, Arg5, Arg6], StackOffset) -> - % Handle 5th and 6th arguments on stack first (no alignment needed) - this may free registers - State1 = set_args_push_stack(State, Arg5, Arg6), - % Then set up first 4 arguments in registers using existing logic - set_args_registers_only(State1, [Arg1, Arg2, Arg3, Arg4], StackOffset + 8); -% Handle up to 4 parameters: all in registers r0-r3 -set_args(State, Args, StackOffset) when length(Args) =< 4 -> - set_args_registers_only(State, Args, StackOffset). - %% @doc Handle 5th and optionally 6th arguments on stack. %% For 5 args: push 5th arg at sp+0 with 4-byte padding at sp+4 for 8-byte alignment %% For 6 args: push 5th arg at sp+0, 6th arg at sp+4 (2×4 bytes = 8-byte aligned, no padding) -set_args_push_stack( +set_stack_args( #state{stream_module = StreamModule, stream = Stream0} = State0, Arg5, Arg6 ) -> % Decrement stack pointer by 8 bytes once @@ -1664,24 +1647,21 @@ set_args_push_stack( end, State2. -set_args_registers_only( - #state{stream = Stream0, stream_module = StreamModule, used_regs = UsedRegs} = State0, +set_registers_args(State0, Args, StackOffset) -> + ParamRegs = parameter_regs(Args), + set_registers_args(State0, Args, ParamRegs, StackOffset). + +set_registers_args( + #state{used_regs = UsedRegs} = State0, Args, + ParamRegs, StackOffset ) -> - ParamRegs = parameter_regs(Args), ArgsRegs = args_regs(Args), - AvailableScratchGP = - ?SCRATCH_REGS -- ParamRegs -- ArgsRegs -- UsedRegs, - Offset = StreamModule:offset(Stream0), - Args1 = [ - case Arg of - offset -> Offset; - _ -> Arg - end - || Arg <- Args - ], - State1 = set_args0(State0, Args1, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset), + AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), Stream1 = State1#state.stream, NewUsedRegs = lists:foldl( fun @@ -1701,55 +1681,17 @@ set_args_registers_only( parameter_regs(Args) -> parameter_regs0(Args, ?PARAMETER_REGS, []). -% AAPCS32 helper: align to even register for 64-bit arguments - -% r0 is even, use (r0,r1) -align_to_even_register([r0, r1 | Rest]) -> [r0, r1 | Rest]; -% r1 is odd, skip to (r2,r3) -align_to_even_register([r1, r2 | Rest]) -> [r2, r3 | Rest]; -% r2 is even, use (r2,r3) -align_to_even_register([r2, r3 | Rest]) -> [r2, r3 | Rest]; -% r3 is odd, no pair available -align_to_even_register([r3]) -> []; -% No registers available -align_to_even_register([]) -> []; -% Other cases -align_to_even_register(_) -> []. - +% AAPCS32: 64-bit arguments require double-word alignment (even register number) parameter_regs0([], _, Acc) -> lists:reverse(Acc); -parameter_regs0([Special | T], [GPReg | GPRegsT], Acc) when - Special =:= ctx orelse Special =:= jit_state orelse Special =:= jit_state_tail_call orelse - Special =:= offset --> - parameter_regs0(T, GPRegsT, [GPReg | Acc]); -parameter_regs0([{free, Free} | T], GPRegs, Acc) -> - parameter_regs0([Free | T], GPRegs, Acc); -parameter_regs0([{ptr, Reg} | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> - parameter_regs0(T, GPRegsT, [GPReg | Acc]); -parameter_regs0([Reg | T], [GPReg | GPRegsT], Acc) when ?IS_GPR(Reg) -> - parameter_regs0(T, GPRegsT, [GPReg | Acc]); -parameter_regs0([{x_reg, _} | T], [GPReg | GPRegsT], Acc) -> - parameter_regs0(T, GPRegsT, [GPReg | Acc]); -parameter_regs0([{y_reg, _} | T], [GPReg | GPRegsT], Acc) -> - parameter_regs0(T, GPRegsT, [GPReg | Acc]); -parameter_regs0([{fp_reg, _} | T], [GPRegA, GPRegB | GPRegsT], Acc) -> - parameter_regs0(T, GPRegsT, [GPRegB, GPRegA | Acc]); -parameter_regs0([Int | T], [GPReg | GPRegsT], Acc) when is_integer(Int) -> - parameter_regs0(T, GPRegsT, [GPReg | Acc]); -% AAPCS32: 64-bit arguments require double-word alignment (even register number) -parameter_regs0([{avm_int64_t, _} | T], GPRegs, Acc) -> - % Find the next even-numbered register position for AAPCS32 alignment - case align_to_even_register(GPRegs) of - [GPRegA, GPRegB | GPRegsT] -> - parameter_regs0(T, GPRegsT, [GPRegB, GPRegA | Acc]); - _ -> - % Not enough registers available, use stack - parameter_regs0(T, [], [stack, stack | Acc]) - end; -% Handle stack parameters when we run out of registers -parameter_regs0([_Arg | T], [], Acc) -> - parameter_regs0(T, [], [stack | Acc]). +parameter_regs0([{avm_int64_t, _} | T], [r0, r1 | Rest], Acc) -> + parameter_regs0(T, Rest, [r1, r0 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [r1, r2, r3 | Rest], Acc) -> + parameter_regs0(T, Rest, [r3, r2 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [r2, r3 | Rest], Acc) -> + parameter_regs0(T, Rest, [r3, r2 | Acc]); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). replace_reg(Args, Reg1, Reg2) -> replace_reg0(Args, Reg1, Reg2, []). @@ -1761,133 +1703,113 @@ replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> replace_reg0([Other | T], Reg, Replacement, Acc) -> replace_reg0(T, Reg, Replacement, [Other | Acc]). -set_args0(State, [], [], [], _AvailGP, _StackOffset) -> +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> State; -set_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> - set_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); -set_args0( +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset ) -> - set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); -set_args0( - #state{stream_module = StreamModule, stream = Stream0} = State, - [jit_state | ArgsT], - [jit_state | ArgsRegs], - [ParamReg | ParamRegs], - AvailGP, - StackOffset -) -> - % jit_state is loaded from a fixed stack location, so we don't need to check - % for register conflicts like other arguments - it can overwrite any existing - % register content since it comes from stack - % After stack space allocation for parameters, jit_state is at higher offset - JitStateOffset = ?STACK_OFFSET_JITSTATE + StackOffset, - I = jit_armv6m_asm:ldr(ParamReg, {sp, JitStateOffset}), - Stream1 = StreamModule:append(Stream0, I), - set_args0(State#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); -set_args0( - State, - [jit_state_tail_call | ArgsT], - [jit_state | ArgsRegs], - [ParamReg | ParamRegs], - AvailGP, - StackOffset -) -> - false = lists:member(ParamReg, ArgsRegs), - % For tail calls, jit_state will be restored by pop - skip generating load instruction - set_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); -% Handle stack parameters - load argument into temp register and push to stack -set_args0( - #state{stream_module = StreamModule} = State, - [Arg | ArgsT], - [stack | ArgsRegs], - [stack | ParamRegs], - [TempReg | _] = AvailGP, - StackOffset -) -> - % Generate code to set up argument in temp register - State1 = set_args1(State, Arg, TempReg), - % Decrement stack pointer by 4 bytes and store argument - DecSP = jit_armv6m_asm:sub(sp, sp, 4), - StoreInstr = jit_armv6m_asm:str(TempReg, {sp, 0}), - Stream1 = StreamModule:append(State1#state.stream, <>), - set_args0(State1#state{stream = Stream1}, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); -% ctx is special as we need it to access x_reg/y_reg/fp_reg -set_args0(State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset) -> - false = lists:member(?CTX_REG, ArgsRegs), - State1 = set_args1(State, Arg, ?CTX_REG), - set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); % Handle 64-bit arguments that need two registers according to AAPCS32 -set_args0( +set_registers_args0( State, [{avm_int64_t, Value} | ArgsT], - [_ArgReg | ArgsRegs], - [ParamRegLo, ParamRegHi | ParamRegs], + ArgsRegs, + ParamRegs, AvailGP, StackOffset ) when is_integer(Value) -> - % Split the 64-bit value into two 32-bit parts LowPart = Value band 16#FFFFFFFF, HighPart = (Value bsr 32) band 16#FFFFFFFF, - % Set up the low 32 bits in the first register - State1 = set_args1(State, LowPart, ParamRegLo), - % Set up the high 32 bits in the second register - State2 = set_args1(State1, HighPart, ParamRegHi), - set_args0(State2, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); -set_args0( - #state{stream_module = StreamModule} = State, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, [Arg | ArgsT], - [_ArgReg | ArgsRegs], - [ParamReg | ParamRegs], - [Avail | AvailGPT] = AvailGP, + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, StackOffset ) -> - State1 = set_args1(State, Arg, ParamReg), - case lists:member(ParamReg, ArgsRegs) of + case lists:member(ParamReg, ArgsRegsT) of false -> - set_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); true -> + [Avail | AvailGPT] = AvailGP, I = jit_armv6m_asm:mov(Avail, ParamReg), - Stream1 = StreamModule:append(State1#state.stream, I), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), NewArgsT = replace_reg(ArgsT, ParamReg, Avail), - set_args0( - State1#state{stream = Stream1}, NewArgsT, ArgsRegs, ParamRegs, AvailGPT, StackOffset + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset ) end. -set_args1(State, Reg, Reg) -> +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, jit_state, ParamReg, StackOffset +) -> + JitStateOffset = ?STACK_OFFSET_JITSTATE + StackOffset, + I = jit_armv6m_asm:ldr(ParamReg, {sp, JitStateOffset}), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +% For tail calls, jit_state will be restored by pop - skip generating load instruction +set_registers_args1(State, jit_state_tail_call, r1, _StackOffset) -> State; -set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, Reg) -> +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> I = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; -set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg) -> +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> I = jit_armv6m_asm:ldr(Reg, ?X_REG(X)), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; -set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg) -> +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> I = jit_armv6m_asm:ldr(Reg, {Source, 0}), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; -set_args1( +set_registers_args1( #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, {y_reg, X}, - Reg + Reg, + _StackOffset ) -> Code = ldr_y_reg(Reg, X, AvailRegs), Stream1 = StreamModule:append(Stream0, Code), State#state{stream = Stream1}; -set_args1(#state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg) when +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when ?IS_GPR(ArgReg) -> I = jit_armv6m_asm:mov(Reg, ArgReg), Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}; -set_args1(State, Arg, Reg) when is_integer(Arg) -> - mov_immediate(State, Reg, Arg); -set_args1(State, {avm_int64_t, Value}, Reg) when is_integer(Value) -> - % For now, just store the lower 32 bits - this needs proper AAPCS32 register pair support - mov_immediate(State, Reg, Value band 16#FFFFFFFF). +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). %%----------------------------------------------------------------------------- %% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 9952e538b2..0a7c52f69c 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -209,7 +209,7 @@ call_ext_only_test() -> " 28: 2602 movs r6, #2\n" " 2a: 9600 str r6, [sp, #0]\n" " 2c: 9902 ldr r1, [sp, #8]\n" - " 2e: 222c movs r2, #44 ; 0x2c\n" + " 2e: 2220 movs r2, #32\n" " 30: 2302 movs r3, #2\n" " 32: 47b8 blx r7\n" " 34: b002 add sp, #8\n" @@ -251,7 +251,7 @@ call_ext_only_unaligned_test() -> " 28: 2602 movs r6, #2\n" " 2a: 9600 str r6, [sp, #0]\n" " 2c: 9902 ldr r1, [sp, #8]\n" - " 2e: 222c movs r2, #44 ; 0x2c\n" + " 2e: 2220 movs r2, #32\n" " 30: 2302 movs r3, #2\n" " 32: 47b8 blx r7\n" " 34: b002 add sp, #8\n" @@ -274,7 +274,7 @@ call_primitive_last_5_args_test() -> " 4: b082 sub sp, #8\n" " 6: 9700 str r7, [sp, #0]\n" " 8: 9902 ldr r1, [sp, #8]\n" - " a: 2208 movs r2, #8\n" + " a: 2204 movs r2, #4\n" " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" " e: e001 b.n 0x14\n" " 10: 02cb lsrs r3, r1, #16\n" @@ -308,14 +308,14 @@ call_ext_last_test() -> " 1a: 46c0 nop ; (mov r8, r8)\n" " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), - " 1e: 6917 ldr r7, [r2, #16]\n" + "1e: 6917 ldr r7, [r2, #16]\n" " 20: b082 sub sp, #8\n" " 22: 260a movs r6, #10\n" " 24: 9601 str r6, [sp, #4]\n" " 26: 2602 movs r6, #2\n" " 28: 9600 str r6, [sp, #0]\n" " 2a: 9902 ldr r1, [sp, #8]\n" - " 2c: 222a movs r2, #42 ; 0x2a\n" + " 2c: 2220 movs r2, #32\n" " 2e: 2302 movs r3, #2\n" " 30: 47b8 blx r7\n" " 32: b002 add sp, #8\n" @@ -1911,7 +1911,7 @@ return_labels_and_lines_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -%% Test return_labels_and_lines/2 with unaligned offset - should fail +%% Test return_labels_and_lines/2 with unaligned offset return_labels_and_lines_unaligned_test() -> % Create a new state with a 2-byte instruction already in the stream % to simulate starting at an odd offset (offset 2 instead of 0) @@ -1952,7 +1952,7 @@ return_labels_and_lines_unaligned_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -%% Test call_primitive with {free, {x_reg, X}} that causes the jit_precompile bug +%% Test call_primitive with {free, {x_reg, X}} gc_bif2_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), @@ -1984,6 +1984,32 @@ gc_bif2_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test case where parameter value is in r1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, r1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 27b0 movs r7, #176 ; 0xb0\n" + " 2: 59d7 ldr r7, [r2, r7]\n" + " 4: b405 push {r0, r2}\n" + " 6: b082 sub sp, #8\n" + " 8: 2601 movs r6, #1\n" + " a: 9600 str r6, [sp, #0]\n" + " c: 460e mov r6, r1\n" + " e: 9904 ldr r1, [sp, #16]\n" + " 10: 4632 mov r2, r6\n" + " 12: 2304 movs r3, #4\n" + " 14: 47b8 blx r7\n" + " 16: 4607 mov r7, r0\n" + " 18: b002 add sp, #8\n" + " 1a: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -2087,7 +2113,7 @@ call_fun_test() -> " 2e: b082 sub sp, #8\n" " 30: 9600 str r6, [sp, #0]\n" " 32: 9902 ldr r1, [sp, #8]\n" - " 34: 2232 movs r2, #50 ; 0x32\n" + " 34: 222e movs r2, #46 ; 0x2e\n" " 36: 4b01 ldr r3, [pc, #4] ; (0x3c)\n" " 38: e002 b.n 0x40\n" " 3a: 0000 movs r0, r0\n" @@ -2108,7 +2134,7 @@ call_fun_test() -> " 58: b082 sub sp, #8\n" " 5a: 9600 str r6, [sp, #0]\n" " 5c: 9902 ldr r1, [sp, #8]\n" - " 5e: 225c movs r2, #92 ; 0x5c\n" + " 5e: 2258 movs r2, #88 ; 0x58\n" " 60: 4b00 ldr r3, [pc, #0] ; (0x64)\n" " 62: e001 b.n 0x68\n" " 64: 018b lsls r3, r1, #6\n" @@ -3252,13 +3278,14 @@ call_func_ptr_register_exhaustion_test_() -> " e: b082 sub sp, #8\n" " 10: 2401 movs r4, #1\n" " 12: 9400 str r4, [sp, #0]\n" - " 14: 9908 ldr r1, [sp, #32]\n" - " 16: 461a mov r2, r3\n" - " 18: 460b mov r3, r1\n" - " 1a: 47b0 blx r6\n" - " 1c: 4606 mov r6, r0\n" - " 1e: b002 add sp, #8\n" - " 20: bcb7 pop {r0, r1, r2, r4, r5, r7}" + " 14: 460f mov r7, r1\n" + " 16: 9908 ldr r1, [sp, #32]\n" + " 18: 461a mov r2, r3\n" + " 1a: 463b mov r3, r7\n" + " 1c: 47b0 blx r6\n" + " 1e: 4606 mov r6, r0\n" + " 20: b002 add sp, #8\n" + " 22: bcb7 pop {r0, r1, r2, r4, r5, r7}" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(r6, ResultReg) From 125df6a862b7e1cd062f04454920401cbd22adbe Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 19 Sep 2025 18:39:19 +0200 Subject: [PATCH 67/97] armv6m: fix result register bug Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 4 +- tests/libs/jit/jit_armv6m_tests.erl | 64 ++++++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b4b3e938a1..863b844d17 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1526,9 +1526,9 @@ call_func_ptr( {Stream6, UsedRegs2} = case length(SavedRegs) of 8 when element(1, FuncPtrTuple) =:= free -> - % We use FuncPtrReg then as we know it's available. + % We use original FuncPtrReg then as we know it's available. % Calculate stack offset: register number * 4 bytes - ResultReg = FuncPtrReg, + ResultReg = element(2, FuncPtrTuple), StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}), {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 0a7c52f69c..6baf0eb385 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1870,6 +1870,68 @@ wait_timeout_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop ; (mov r8, r8)\n" + " 8: 0000 movs r0, r0\n" + " a: 0000 movs r0, r0\n" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 0000 movs r0, r0\n" + " 16: 0000 movs r0, r0\n" + " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" + " 20: 0000 movs r0, r0\n" + " 22: 0000 movs r0, r0\n" + " 24: 4b01 ldr r3, [pc, #4] ; (0x2c)\n" + " 26: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 28: 449f add pc, r3\n" + " 2a: 46c0 nop ; (mov r8, r8)\n" + " 2c: 0000 movs r0, r0\n" + " 2e: 0000 movs r0, r0\n" + " 30: 4b01 ldr r3, [pc, #4] ; (0x38)\n" + " 32: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 34: 449f add pc, r3\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: 0000 movs r0, r0\n" + " 3a: 0000 movs r0, r0\n" + " 3c: 4b01 ldr r3, [pc, #4] ; (0x44)\n" + " 3e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 40: 449f add pc, r3\n" + " 42: 46c0 nop ; (mov r8, r8)\n" + " 44: 0000 movs r0, r0\n" + " 46: 0000 movs r0, r0\n" + " 48: a700 add r7, pc, #0 ; (adr r7, 0x4c)\n" + " 4a: 2633 movs r6, #51 ; 0x33\n" + " 4c: 4276 negs r6, r6\n" + " 4e: 19f6 adds r6, r6, r7\n" + " 50: 9f00 ldr r7, [sp, #0]\n" + " 52: 607e str r6, [r7, #4]\n" + " 54: 6f57 ldr r7, [r2, #116] ; 0x74\n" + " 56: 9e05 ldr r6, [sp, #20]\n" + " 58: 9705 str r7, [sp, #20]\n" + " 5a: 46b6 mov lr, r6\n" + " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + %% Test return_labels_and_lines/2 function return_labels_and_lines_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -3310,7 +3372,7 @@ call_func_ptr_register_exhaustion_test_() -> " 10: 4630 mov r0, r6\n" " 12: 4619 mov r1, r3\n" " 14: 47a0 blx r4\n" - " 16: 9004 str r0, [sp, #16]\n" + " 16: 9001 str r0, [sp, #4]\n" " 18: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" >>, ?assertEqual(dump_to_bin(Dump), Stream) From 4827fb190e6f9d7cdecab19cc89f430754df6a7f Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 19 Sep 2025 18:39:44 +0200 Subject: [PATCH 68/97] armv6m: pack uf2 binaries Signed-off-by: Paul Guyot --- CMakeModules/BuildErlang.cmake | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/CMakeModules/BuildErlang.cmake b/CMakeModules/BuildErlang.cmake index e19fb6d622..76cfa4c2f0 100644 --- a/CMakeModules/BuildErlang.cmake +++ b/CMakeModules/BuildErlang.cmake @@ -194,6 +194,24 @@ macro(pack_lib avm_name) ) set(target_deps ${target_deps} ${avm_name}-pico.uf2 ${avm_name}-pico2.uf2) + if(NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) + add_custom_command( + OUTPUT ${avm_name}-armv6m-pico.uf2 + DEPENDS ${avm_name}-armv6m.avm UF2Tool + COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m-pico.uf2 -s 0x10100000 ${avm_name}-armv6m.avm + COMMENT "Creating UF2 file ${avm_name}-armv6m.uf2" + VERBATIM + ) + add_custom_command( + OUTPUT ${avm_name}-armv6m-pico2.uf2 + DEPENDS ${avm_name}-armv6m.avm UF2Tool + COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m-pico2.uf2 -f data -s 0x10100000 ${avm_name}-armv6m.avm + COMMENT "Creating UF2 file ${avm_name}-armv6m.uf2" + VERBATIM + ) + set(target_deps ${target_deps} ${avm_name}-armv6m-pico.uf2 ${avm_name}-armv6m-pico2.uf2) + endif() + add_custom_target( ${avm_name} ALL DEPENDS ${target_deps} From 22a9bb8e7c75c85d962892c8c75dae66236cb7ba Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 11:07:31 +0200 Subject: [PATCH 69/97] armv6m: add an assert for sizeof(size_t) Signed-off-by: Paul Guyot --- src/libAtomVM/jit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 6e4a2b8a11..c0d63cf9a5 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -94,6 +94,10 @@ _Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_armv6m.erl"); _Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_armv6m.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_armv6m.erl"); + +_Static_assert(sizeof(avm_float_t) == 0x8, "sizeof(avm_float_t) is 0x8 in jit/src/jit_armv6m.erl"); +_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); + #else #error Unknown jit target #endif From 485f4d6bd4eba7f74c87685d280bb7d64750794b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 12 Oct 2025 11:34:58 +0200 Subject: [PATCH 70/97] armv6m: Add target to test.c Signed-off-by: Paul Guyot --- tests/test.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test.c b/tests/test.c index 79aa2ec121..577572c967 100644 --- a/tests/test.c +++ b/tests/test.c @@ -708,6 +708,11 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) perror("Error: cannot find aarch64 directory"); return EXIT_FAILURE; } +#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + if (chdir("armv6m") != 0) { + perror("Error: cannot find armv6m directory"); + return EXIT_FAILURE; + } #else #error Unknown JIT target #endif From 9715d9f92b0b9438766cb699deb7ce049ebd269e Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 26 Sep 2025 22:58:51 +0200 Subject: [PATCH 71/97] armv6m: Fix offset for unaligned far branches Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 137 ++++++++-------------------- tests/libs/jit/jit_armv6m_tests.erl | 8 +- 2 files changed, 43 insertions(+), 102 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 863b844d17..cc4dd1287c 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -430,72 +430,58 @@ update_branches( true -> % Keep far branch sequence, calculate correct ldr immediate and update literal - % Calculate where the literal should be placed (same logic as generation) - LdrOffset = - case TempReg of - ?IP_REG -> Offset + 2; - _ -> Offset - end, - % ldr + add + bx = 6 bytes - AfterInstructionsOffset = Offset + 6, - AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), - - % Calculate correct PC-relative offset for ldr instruction - - % PC aligned down - PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), - LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, - - % Calculate the relative offset for the literal value - % This is the offset from the add instruction's PC to the target - % The add instruction is at Offset + 2, so PC = Offset + 2 + 4 = Offset + 6 - % We also need to set thumb bit to 1, so eventually we only substract 5. - % If IP_REG, add is at Offset + 8 - AddPCOffset = - case TempReg of - ?IP_REG -> Offset + 11; - _ -> Offset + 5 - end, % Set thumb bit for bx instruction - target address must be odd for Thumb mode - RelativeOffset = LabelOffset - AddPCOffset, + % So we substract 1 less + % ldr requires align PC + % add rx, pc doesn't and reads pc+4 whatever the alignment case {TempReg, Size} of {?IP_REG, 18} -> % 18-byte sequence with alignment + % Unaligned I1 = jit_armv6m_asm:push([r0]), - I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + % Aligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), I3 = jit_armv6m_asm:mov(?IP_REG, r0), I4 = jit_armv6m_asm:pop([r0]), I5 = jit_armv6m_asm:add(?IP_REG, pc), I6 = jit_armv6m_asm:bx(?IP_REG), I7 = jit_armv6m_asm:nop(), + RelativeOffset = LabelOffset - Offset - 11, I8 = <>, <>; {?IP_REG, 16} -> % 16-byte sequence without alignment + % Aligned I1 = jit_armv6m_asm:push([r0]), - I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + % Unaligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), I3 = jit_armv6m_asm:mov(?IP_REG, r0), I4 = jit_armv6m_asm:pop([r0]), I5 = jit_armv6m_asm:add(?IP_REG, pc), I6 = jit_armv6m_asm:bx(?IP_REG), + RelativeOffset = LabelOffset - Offset - 11, I7 = <>, <>; {_, 12} -> % 12-byte sequence with alignment - I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + % Aligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), I4 = jit_armv6m_asm:nop(), + RelativeOffset = LabelOffset - Offset - 5, I5 = <>, <>; {_, 10} -> % 10-byte sequence without alignment - I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + % Unaligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), + RelativeOffset = LabelOffset - Offset - 5, I4 = <>, <> end @@ -752,42 +738,23 @@ branch_to_label_code( #state{available_regs = [TempReg | _]} = State0, Offset, Label, {Label, LabelOffset} ) -> % Far branch: use register-based sequence, need temporary register - % Calculate alignment for literal pool - LdrOffset = Offset, - % ldr + add + bx = 6 bytes - AfterInstructionsOffset = Offset + 6, - % Round up to 4-byte boundary - AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), - PaddingSize = AlignedLiteralOffset - AfterInstructionsOffset, - - % Calculate PC-relative offset for ldr instruction - % For ldr rd, [pc, #imm]: effective address = (PC+4 aligned to 4) + imm - - % PC aligned down - PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), - LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, - - % Calculate the literal value: target - PC_at_add_instruction - % The add instruction is at Offset + 2, so PC = Offset + 2 + 4 = Offset + 6 - % We also need to set thumb bit to 1, so eventually we only subtract 5. - AddPCValue = Offset + 5, - LiteralValue = LabelOffset - AddPCValue, - if - PaddingSize > 0 -> - % Need alignment padding - I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + Offset rem 4 =:= 0 -> + % Aligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), - % Padding + % Unaligned : need nop I4 = jit_armv6m_asm:nop(), + LiteralValue = LabelOffset - Offset - 5, I5 = <>, CodeBlock = <>; true -> - % No alignment padding needed - I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + % Unaligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), + LiteralValue = LabelOffset - Offset - 5, I4 = <>, CodeBlock = <> end, @@ -795,36 +762,22 @@ branch_to_label_code( branch_to_label_code( #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false ) -> - % Calculate alignment for literal pool - LdrOffset = Offset, - % ldr + add + bx = 6 bytes - AfterInstructionsOffset = Offset + 6, - % Round up to 4-byte boundary - AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), - PaddingSize = AlignedLiteralOffset - AfterInstructionsOffset, - - % Calculate PC-relative offset for ldr instruction - % For ldr rd, [pc, #imm]: effective address = (PC+4 aligned to 4) + imm - - % PC aligned down - PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), - LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, - {CodeBlock, SequenceSize} = if - PaddingSize > 0 -> - % Need alignment padding - I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + Offset rem 4 =:= 0 -> + % Aligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), + % Unaligned : need nop I4 = jit_armv6m_asm:nop(), % Placeholder offset I5 = <<0:32/little>>, Seq = <>, {Seq, byte_size(Seq)}; true -> - % No alignment padding needed - I1 = jit_armv6m_asm:ldr(TempReg, {pc, LdrImmediate}), + % Unaligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), % Placeholder offset @@ -839,31 +792,18 @@ branch_to_label_code( branch_to_label_code( #state{available_regs = [], branches = Branches} = State0, Offset, Label, false ) -> - % Calculate alignment for literal pool - LdrOffset = Offset + 2, - % push + ldr + mov + pop + add + bx = 12 bytes - AfterInstructionsOffset = Offset + 12, - % Round up to 4-byte boundary - AlignedLiteralOffset = ((AfterInstructionsOffset + 3) band (bnot 3)), - PaddingSize = AlignedLiteralOffset - AfterInstructionsOffset, - - % Calculate PC-relative offset for ldr instruction - % For ldr rd, [pc, #imm]: effective address = (PC+4 aligned to 4) + imm - - % PC aligned down - PCAtLdrExecution = (LdrOffset + 4) band (bnot 3), - LdrImmediate = AlignedLiteralOffset - PCAtLdrExecution, - {CodeBlock, SequenceSize} = if - PaddingSize > 0 -> - % Need alignment padding + Offset rem 4 =/= 0 -> + % Unaligned I1 = jit_armv6m_asm:push([r0]), - I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + % Aligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), I3 = jit_armv6m_asm:mov(?IP_REG, r0), I4 = jit_armv6m_asm:pop([r0]), I5 = jit_armv6m_asm:add(?IP_REG, pc), I6 = jit_armv6m_asm:bx(?IP_REG), + % Unaligned : need nop I7 = jit_armv6m_asm:nop(), % Placeholder offset I8 = <<0:32/little>>, @@ -872,9 +812,10 @@ branch_to_label_code( I8/binary>>, {Seq, byte_size(Seq)}; true -> - % No alignment padding needed + % Aligned I1 = jit_armv6m_asm:push([r0]), - I2 = jit_armv6m_asm:ldr(r0, {pc, LdrImmediate}), + % Unaligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), I3 = jit_armv6m_asm:mov(?IP_REG, r0), I4 = jit_armv6m_asm:pop([r0]), I5 = jit_armv6m_asm:add(?IP_REG, pc), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 6baf0eb385..10d4b5f418 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1721,15 +1721,15 @@ is_boolean_far_unaligned_test() -> Dump = << " 0: 4770 bx lr\n" " 2: 6987 ldr r7, [r0, #24]\n" - " 4: 2f4b cmp r7, #75 ; 0x4b\n" + " 4: 2f4b cmp r7, #75 @ 0x4b\n" " 6: d007 beq.n 0x18\n" " 8: 2f0b cmp r7, #11\n" " a: d005 beq.n 0x18\n" - " c: 4e01 ldr r6, [pc, #4] ; (0x14)\n" + " c: 4e01 ldr r6, [pc, #4] @ (0x14)\n" " e: 447e add r6, pc\n" " 10: 4730 bx r6\n" - " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0fef lsrs r6, r5, #31\n" + " 12: 46c0 nop @ (mov r8, r8)\n" + " 14: 0fef lsrs r7, r5, #31\n" " 16: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 3639f6fa92c1b1b6c4b657d0f6a45d18a282ffb8 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 28 Sep 2025 07:45:44 +0200 Subject: [PATCH 72/97] armv6m: Fix if_block test of equality of two free regs Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index cc4dd1287c..205df4bcbe 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1058,6 +1058,19 @@ if_block_cond( State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + % Compare two free registers: cmp RegA, RegB; beq + I1 = jit_armv6m_asm:cmp(RegA, RegB), + Stream1 = StreamModule:append(Stream0, I1), + I2 = jit_armv6m_asm:bcc(ne, 0), + Stream2 = StreamModule:append(Stream1, I2), + State1 = State0#state{stream = Stream2}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, {RegOrTuple, '==', Val} From 8dcae31429ac025e377e9a709f361815d291bdd8 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 28 Sep 2025 18:14:02 +0200 Subject: [PATCH 73/97] armv6m: fix call to func ptr when registers are exhausted Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 53 +++++++++++++++++++++-------- tests/libs/jit/jit_armv6m_tests.erl | 37 ++++++++++++++++++++ 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 205df4bcbe..94f7132a57 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1403,12 +1403,12 @@ call_func_ptr( end, Args ), - {RegArgs, StackArgs} = + {RegArgs0, StackArgs} = case Args1 of [Arg1, Arg2, Arg3, Arg4 | StackArgs0] -> {[Arg1, Arg2, Arg3, Arg4], StackArgs0}; _ -> {Args, []} end, - RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs), + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), StackArgsRegs = lists:flatmap(fun arg_to_reg_list/1, StackArgs), % We pushed registers to stack, so we can use these registers we saved @@ -1427,32 +1427,55 @@ call_func_ptr( end, SetArgsRegsOnlyAvailableArgs = State2#state.available_regs, - ParameterRegs = parameter_regs(RegArgs), - {Stream3, SetArgsAvailableRegs, FuncPtrReg} = + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = case FuncPtrTuple of {free, FuncPtrReg0} -> % If FuncPtrReg is in parameter regs, we must swap it with a free reg. case lists:member(FuncPtrReg0, ParameterRegs) of true -> - [FuncPtrReg1 | _] = SetArgsRegsOnlyAvailableArgs, - MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), - SetArgsAvailableArgs1 = - (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ [FuncPtrReg0], - { - StreamModule:append(State2#state.stream, MovInstr), - SetArgsAvailableArgs1, - FuncPtrReg1 - }; + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> + % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + % that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_armv6m_asm:mov(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State2#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State2#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; false -> SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], - {State2#state.stream, SetArgsAvailableArgs1, FuncPtrReg0} + {State2#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} end; {primitive, Primitive} -> [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), Stream2 = StreamModule:append(State2#state.stream, PrepCall), - {Stream2, SetArgsAvailableRegs1, FuncPtrReg0} + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} end, State3 = State2#state{ diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 10d4b5f418..287e8f9acc 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -180,6 +180,43 @@ call_primitive_extended_regs_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, r7} = ?BACKEND:move_to_native_register(State0, 1), + {State2, r6} = ?BACKEND:move_to_native_register(State1, 2), + {State3, r5} = ?BACKEND:move_to_native_register(State2, 3), + {State4, r4} = ?BACKEND:move_to_native_register(State3, 4), + {State5, r3} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + r6, r7, {free, r4}, r5, {free, r3} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, r6, r7, r5]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: 2701 movs r7, #1\n" + " 2: 2602 movs r6, #2\n" + " 4: 2503 movs r5, #3\n" + " 6: 2404 movs r4, #4\n" + " 8: 2305 movs r3, #5\n" + " a: 21e4 movs r1, #228 @ 0xe4\n" + " c: 5851 ldr r1, [r2, r1]\n" + " e: b4e7 push {r0, r1, r2, r5, r6, r7}\n" + " 10: b082 sub sp, #8\n" + " 12: 9300 str r3, [sp, #0]\n" + " 14: 4633 mov r3, r6\n" + " 16: 460e mov r6, r1\n" + " 18: 4618 mov r0, r3\n" + " 1a: 4639 mov r1, r7\n" + " 1c: 4622 mov r2, r4\n" + " 1e: 462b mov r3, r5\n" + " 20: 47b0 blx r6\n" + " 22: 4604 mov r4, r0\n" + " 24: b002 add sp, #8\n" + " 26: bce7 pop {r0, r1, r2, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), From 208d025c27fe558c14bad7ff514ef5a5a8e423f6 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 6 Oct 2025 23:13:38 +0200 Subject: [PATCH 74/97] armv6m: Run tests on armhf+jit Signed-off-by: Paul Guyot --- .github/workflows/build-and-test.yaml | 17 ++++++++++++++++- CMakeLists.txt | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index fcc45fd033..1f1a70d3c6 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -321,7 +321,7 @@ jobs: cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" jit_target_arch: "aarch64" - # armhf build + # armhf builds - os: "ubuntu-24.04" cc: "arm-linux-gnueabihf-gcc" cxx: "arm-linux-gnueabihf-g++" @@ -336,6 +336,21 @@ jobs: arch: "armhf" library-arch: arm-linux-gnueabihf + - os: "ubuntu-24.04" + cc: "arm-linux-gnueabihf-gcc" + cxx: "arm-linux-gnueabihf-g++" + # -D_FILE_OFFSET_BITS=64 is required for making atomvm:posix_readdir/1 test work + # otherwise readdir will fail due to 64 bits inode numbers with 32 bit ino_t + cflags: "-mcpu=cortex-a7 -mfloat-abi=hard -O2 -mthumb -mthumb-interwork -D_FILE_OFFSET_BITS=64" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_JIT_TARGET_ARCH=armv6m -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/armhf_toolchain.cmake" + compiler_pkgs: "crossbuild-essential-armhf libc6-dbg:armhf zlib1g-dev:armhf libmbedtls-dev:armhf qemu-user qemu-user-binfmt binfmt-support" + arch: "armhf" + library-arch: arm-linux-gnueabihf + jit_target_arch: "armv6m" + # s390x build - os: "ubuntu-24.04" cc: "s390x-linux-gnu-gcc" diff --git a/CMakeLists.txt b/CMakeLists.txt index 490aaccdc0..add17a3cd6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") set(AVM_JIT_TARGET_ARCH "armv6m") else() - message(FATAL "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") + message(FATAL_ERROR "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") endif() endif() From 7509d3117dd418fd9899b24165dddb6165739a7d Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 29 Sep 2025 21:50:09 +0200 Subject: [PATCH 75/97] armv6m: optimize and 0xFFFFFF Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 94f7132a57..827384f04d 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -2405,6 +2405,11 @@ get_module_index( %% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to %% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool %% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> + I1 = jit_armv6m_asm:lsls(Reg, Reg, 8), + I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, From 5088b64d6abff9ececb08c102ecd25d449c47471 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 14 Oct 2025 08:29:27 +0200 Subject: [PATCH 76/97] JIT: bump timeout for test_jit.avm with valgrind Signed-off-by: Paul Guyot --- .github/workflows/build-and-test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 1f1a70d3c6..9ece61a36f 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -601,7 +601,7 @@ jobs: - name: "Test: test_jit.avm with valgrind" if: matrix.library-arch == '' && matrix.otp != '21' && matrix.otp != '22' - timeout-minutes: 30 + timeout-minutes: 60 working-directory: build run: | ulimit -c unlimited From e1f43561271f697d5d616412fd7905fcc6f010bb Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 20 Sep 2025 17:33:21 +0200 Subject: [PATCH 77/97] JIT: add float32 variant for single precision floats Signed-off-by: Paul Guyot --- CMakeLists.txt | 2 +- CMakeModules/BuildErlang.cmake | 42 +++++++++++++++++---------------- libs/jit/include/jit.hrl | 1 + libs/jit/src/jit.erl | 12 ++++++++-- libs/jit/src/jit_aarch64.erl | 17 +++++++++---- libs/jit/src/jit_armv6m.erl | 31 +++++++++++++++++------- libs/jit/src/jit_precompile.erl | 41 ++++++++++++++++++++++++++------ libs/jit/src/jit_x86_64.erl | 17 +++++++++---- src/libAtomVM/jit.c | 7 +++++- src/libAtomVM/jit.h | 1 + src/libAtomVM/module.c | 8 ++++++- src/libAtomVM/nifs.c | 19 +++++++++++++++ src/libAtomVM/nifs.gperf | 1 + 13 files changed, 151 insertions(+), 48 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index add17a3cd6..32484ee851 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR diff --git a/CMakeModules/BuildErlang.cmake b/CMakeModules/BuildErlang.cmake index 76cfa4c2f0..3b10565cc7 100644 --- a/CMakeModules/BuildErlang.cmake +++ b/CMakeModules/BuildErlang.cmake @@ -77,8 +77,10 @@ macro(pack_precompiled_archive avm_name) else() set(jit_deps "jit") endif() - foreach(jit_target_arch ${AVM_PRECOMPILED_TARGETS}) + foreach(jit_target_arch_variant ${AVM_PRECOMPILED_TARGETS}) set(pack_precompile_archive_${avm_name}_beams "") + # Extract base architecture for module dependencies + string(REGEX REPLACE "\\+.*$" "" jit_target_arch "${jit_target_arch_variant}") set(jit_compiler_modules ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam @@ -89,14 +91,14 @@ macro(pack_precompiled_archive avm_name) foreach(module_name IN LISTS ${PACK_ARCHIVE_MODULES} PACK_ARCHIVE_MODULES PACK_ARCHIVE_UNPARSED_ARGUMENTS) add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/${module_name}.beam - COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/ - && erl -pa ${CMAKE_BINARY_DIR}/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${jit_target_arch} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/ ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/${module_name}.beam + COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/ + && erl -pa ${CMAKE_BINARY_DIR}/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${jit_target_arch_variant} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/ ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam ${jit_compiler_modules} ${jit_deps} - COMMENT "Compiling ${module_name}.beam to ${jit_target_arch}" + COMMENT "Compiling ${module_name}.beam to ${jit_target_arch_variant}" VERBATIM ) - set(pack_precompile_archive_${avm_name}_beams ${pack_precompile_archive_${avm_name}_beams} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/${module_name}.beam) + set(pack_precompile_archive_${avm_name}_beams ${pack_precompile_archive_${avm_name}_beams} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/${module_name}.beam) endforeach() if(AVM_RELEASE) @@ -106,20 +108,20 @@ macro(pack_precompiled_archive avm_name) endif() add_custom_command( - OUTPUT ${avm_name}-${jit_target_arch}.avm + OUTPUT ${avm_name}-${jit_target_arch_variant}.avm DEPENDS ${pack_precompile_archive_${avm_name}_beams} PackBEAM - COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch}.avm ${pack_precompile_archive_${avm_name}_beams} - COMMENT "Packing archive ${avm_name}-${jit_target_arch}.avm" + COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch_variant}.avm ${pack_precompile_archive_${avm_name}_beams} + COMMENT "Packing archive ${avm_name}-${jit_target_arch_variant}.avm" VERBATIM ) add_custom_target( - ${avm_name}_${jit_target_arch} ALL - DEPENDS ${avm_name}-${jit_target_arch}.avm + ${avm_name}_${jit_target_arch_variant} ALL + DEPENDS ${avm_name}-${jit_target_arch_variant}.avm ) # Ensure source beams are built before precompilation - add_dependencies(${avm_name}_${jit_target_arch} ${avm_name}_emu) + add_dependencies(${avm_name}_${jit_target_arch_variant} ${avm_name}_emu) # Make main target depend on precompiled targets - add_dependencies(${avm_name} ${avm_name}_${jit_target_arch}) + add_dependencies(${avm_name} ${avm_name}_${jit_target_arch_variant}) endforeach() endif() endmacro() @@ -159,23 +161,23 @@ macro(pack_lib avm_name) set(target_deps ${avm_name}.avm) if(NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) - foreach(jit_target_arch ${AVM_PRECOMPILED_TARGETS}) + foreach(jit_target_arch_variant ${AVM_PRECOMPILED_TARGETS}) # Build JIT archives list for this specific target architecture - set(pack_lib_${avm_name}_jit_archives_${jit_target_arch} ${CMAKE_BINARY_DIR}/libs/jit/src/jit-${jit_target_arch}.avm) + set(pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant} ${CMAKE_BINARY_DIR}/libs/jit/src/jit-${jit_target_arch_variant}.avm) foreach(archive_name ${ARGN}) if(${archive_name} STREQUAL "estdlib") - set(pack_lib_${avm_name}_jit_archives_${jit_target_arch} ${pack_lib_${avm_name}_jit_archives_${jit_target_arch}} ${CMAKE_BINARY_DIR}/libs/${archive_name}/src/${archive_name}-${jit_target_arch}.avm) + set(pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant} ${pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant}} ${CMAKE_BINARY_DIR}/libs/${archive_name}/src/${archive_name}-${jit_target_arch_variant}.avm) endif() endforeach() add_custom_command( - OUTPUT ${avm_name}-${jit_target_arch}.avm + OUTPUT ${avm_name}-${jit_target_arch_variant}.avm DEPENDS ${pack_lib_${avm_name}_archive_targets} PackBEAM - COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch}.avm ${pack_lib_${avm_name}_jit_archives_${jit_target_arch}} ${pack_lib_${avm_name}_archives} - COMMENT "Packing lib ${avm_name}-${jit_target_arch}.avm" + COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch_variant}.avm ${pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant}} ${pack_lib_${avm_name}_archives} + COMMENT "Packing lib ${avm_name}-${jit_target_arch_variant}.avm" VERBATIM ) - set(target_deps ${target_deps} ${avm_name}-${jit_target_arch}.avm) + set(target_deps ${target_deps} ${avm_name}-${jit_target_arch_variant}.avm) endforeach() endif() add_custom_command( diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index 6c08a80661..b006c5f34f 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -25,5 +25,6 @@ -define(JIT_ARCH_ARMV6M, 3). -define(JIT_VARIANT_PIC, 1). +-define(JIT_VARIANT_FLOAT32, 2). -define(MAX_REG, 16). diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index d30f52e7ed..1a7ffa6160 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -30,7 +30,8 @@ % NIFs -export([ stream_module/0, - backend_module/0 + backend_module/0, + variant/0 ]). -export_type([ @@ -3702,9 +3703,16 @@ stream(MaxSize) -> backend_module() -> erlang:nif_error(undefined). +%% @doc Get the JIT variant suitable for runtime compilation +%% @return The JIT variant for this platform and float precision +-spec variant() -> non_neg_integer(). +variant() -> + erlang:nif_error(undefined). + %% @doc Instantiate backend for this platform %% @return A tuple with the backend module and the backend state for this platform backend({StreamModule, Stream}) -> BackendModule = ?MODULE:backend_module(), - BackendState = BackendModule:new(?JIT_VARIANT_PIC, StreamModule, Stream), + Variant = ?MODULE:variant(), + BackendState = BackendModule:new(Variant, StreamModule, Stream), {BackendModule, BackendState}. diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 3449a0a997..45289f7311 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -133,7 +133,8 @@ branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [aarch64_register()], used_regs :: [aarch64_register()], - labels :: [{integer() | reference(), integer()}] + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() }). -type state() :: #state{}. @@ -167,6 +168,13 @@ -define(X_REG(N), {?CTX_REG, 16#30 + (N * ?WORD_SIZE)}). -define(CP, {?CTX_REG, 16#B8}). -define(FP_REGS, {?CTX_REG, 16#C0}). +-define(FP_REG_OFFSET(State, F), + (F * + case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of + 0 -> 8; + _ -> 4 + end) +). -define(BS, {?CTX_REG, 16#C8}). -define(BS_OFFSET, {?CTX_REG, 16#D0}). -define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). @@ -216,7 +224,7 @@ word_size() -> ?WORD_SIZE. %% @return New backend state %%----------------------------------------------------------------------------- -spec new(any(), module(), stream()) -> state(). -new(_Variant, StreamModule, Stream) -> +new(Variant, StreamModule, Stream) -> #state{ stream_module = StreamModule, stream = Stream, @@ -224,7 +232,8 @@ new(_Variant, StreamModule, Stream) -> offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], - labels = [] + labels = [], + variant = Variant }. %%----------------------------------------------------------------------------- @@ -1273,7 +1282,7 @@ move_to_vm_register( ) -> I1 = jit_aarch64_asm:ldr(Reg, {Reg, ?WORD_SIZE}), I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), - I3 = jit_aarch64_asm:str(Reg, {Temp, F * ?WORD_SIZE}), + I3 = jit_aarch64_asm:str(Reg, {Temp, ?FP_REG_OFFSET(State0, F)}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State1 = free_native_register(State0, Reg), diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 827384f04d..f51e30d2c3 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -136,7 +136,8 @@ branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [armv6m_register()], used_regs :: [armv6m_register()], - labels :: [{integer() | reference(), integer()}] + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() }). -type state() :: #state{}. @@ -239,7 +240,7 @@ word_size() -> 4. %% @return New backend state %%----------------------------------------------------------------------------- -spec new(any(), module(), stream()) -> state(). -new(_Variant, StreamModule, Stream) -> +new(Variant, StreamModule, Stream) -> #state{ stream_module = StreamModule, stream = Stream, @@ -247,7 +248,8 @@ new(_Variant, StreamModule, Stream) -> offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], - labels = [] + labels = [], + variant = Variant }. %%----------------------------------------------------------------------------- @@ -1864,17 +1866,30 @@ move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, State1#state{available_regs = AR0}; % term_to_float move_to_vm_register( - #state{stream_module = StreamModule, available_regs = [Temp1, Temp2 | _], stream = Stream0} = + #state{ + stream_module = StreamModule, + available_regs = [Temp1, Temp2 | _], + stream = Stream0, + variant = Variant + } = State0, {free, {ptr, Reg, 1}}, {fp_reg, F} ) -> I1 = jit_armv6m_asm:ldr(Temp1, ?FP_REGS), I2 = jit_armv6m_asm:ldr(Temp2, {Reg, 4}), - I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8}), - I4 = jit_armv6m_asm:ldr(Temp2, {Reg, 8}), - I5 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8 + 4}), - Code = <>, + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8}), + I4 = jit_armv6m_asm:ldr(Temp2, {Reg, 8}), + I5 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8 + 4}), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 4}), + Code = <> + end, Stream1 = StreamModule:append(Stream0, Code), State1 = free_native_register(State0, Reg), State1#state{stream = Stream1}. diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index dfcb19dcc4..cd9646790d 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -28,6 +28,28 @@ start() -> [Target, Dir | Files] = init:get_plain_arguments(), lists:foreach(fun(File) -> compile(Target, Dir, File) end, Files). +%% @doc Parse target string to extract base architecture and requested variant +%% Examples: +%% "armv6m" -> {"armv6m", ?JIT_VARIANT_PIC} +%% "armv6m+float32" -> {"armv6m", ?JIT_VARIANT_PIC + ?JIT_VARIANT_FLOAT32} +%% "x86_64" -> {"x86_64", ?JIT_VARIANT_PIC} +parse_target(Target) -> + case string:split(Target, "+", all) of + [BaseTarget] -> + {BaseTarget, ?JIT_VARIANT_PIC}; + [BaseTarget | Variants] -> + RequestedVariant = lists:foldl( + fun(Variant, Acc) -> + case Variant of + "float32" -> Acc + ?JIT_VARIANT_FLOAT32 + end + end, + ?JIT_VARIANT_PIC, + Variants + ), + {BaseTarget, RequestedVariant} + end. + compile(Target, Dir, Path) -> try {ok, InitialBinary} = file:read_file(Path), @@ -62,28 +84,33 @@ compile(Target, Dir, Path) -> end, TypeResolver = type_resolver(TypesChunk), - Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = - CodeChunk, + % Parse target to extract arch and variant + {BaseTarget, RequestedVariant} = parse_target(Target), + Backend = list_to_atom("jit_" ++ BaseTarget), Arch = - case Target of + case BaseTarget of "x86_64" -> ?JIT_ARCH_X86_64; "aarch64" -> ?JIT_ARCH_AARCH64; "armv6m" -> ?JIT_ARCH_ARMV6M; _ -> error({unsupported_target, Target}) end, + Stream0 = jit_stream_binary:new(0), + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = + CodeChunk, + Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC) + Stream0, jit:beam_chunk_header(LabelsCount, Arch, RequestedVariant) ), - Backend = list_to_atom("jit_" ++ Target), - Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + + Stream2 = Backend:new(RequestedVariant, jit_stream_binary, Stream1), {LabelsCount, Stream3} = jit:compile( CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 ), NativeCode = Backend:stream(Stream3), UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}], + {ok, Binary} = beam_lib:build_module(UpdatedChunks), Basename = filename:basename(Path), UpdatedFile = filename:join(Dir, Basename), diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 5f54e6e512..bd39ad4fdd 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -114,7 +114,8 @@ branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [x86_64_register()], used_regs :: [x86_64_register()], - labels :: [{integer() | reference(), integer()}] + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() }). -type state() :: #state{}. @@ -156,6 +157,13 @@ -define(X_REG(N), {16#30 + (N * ?WORD_SIZE), ?CTX_REG}). -define(CP, {16#B8, ?CTX_REG}). -define(FP_REGS, {16#C0, ?CTX_REG}). +-define(FP_REG_OFFSET(State, F), + (F * + case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of + 0 -> 8; + _ -> 4 + end) +). -define(BS, {16#C8, ?CTX_REG}). -define(BS_OFFSET, {16#D0, ?CTX_REG}). -define(JITSTATE_MODULE, {0, ?JITSTATE_REG}). @@ -201,7 +209,7 @@ word_size() -> ?WORD_SIZE. %% @return New backend state %%----------------------------------------------------------------------------- -spec new(any(), module(), stream()) -> state(). -new(_Variant, StreamModule, Stream) -> +new(Variant, StreamModule, Stream) -> #state{ stream_module = StreamModule, stream = Stream, @@ -209,7 +217,8 @@ new(_Variant, StreamModule, Stream) -> offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], - labels = [] + labels = [], + variant = Variant }. %%----------------------------------------------------------------------------- @@ -1248,7 +1257,7 @@ move_to_vm_register( ) when is_atom(Reg) -> I1 = jit_x86_64_asm:movq({8, Reg}, Reg), I2 = jit_x86_64_asm:movq(?FP_REGS, Temp), - I3 = jit_x86_64_asm:movq(Reg, {F * 8, Temp}), + I3 = jit_x86_64_asm:movq(Reg, {?FP_REG_OFFSET(State0, F), Temp}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State1 = free_native_register(State0, Reg), diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index c0d63cf9a5..88b73e6bb5 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -95,13 +95,18 @@ _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in j _Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_armv6m.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_armv6m.erl"); -_Static_assert(sizeof(avm_float_t) == 0x8, "sizeof(avm_float_t) is 0x8 in jit/src/jit_armv6m.erl"); _Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); #else #error Unknown jit target #endif +#ifdef AVM_USE_SINGLE_PRECISION +_Static_assert(sizeof(avm_float_t) == 0x4, "sizeof(avm_float_t) is 0x4 for single precision"); +#else +_Static_assert(sizeof(avm_float_t) == 0x8, "sizeof(avm_float_t) is 0x8 for double precision"); +#endif + #define PROCESS_MAYBE_TRAP_RETURN_VALUE(return_value, offset) \ if (term_is_invalid_term(return_value)) { \ if (UNLIKELY(!context_get_flags(ctx, Trap))) { \ diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index 973a253a74..ee53259886 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -175,6 +175,7 @@ enum TrapAndLoadResult #define JIT_ARCH_ARMV6M 3 #define JIT_VARIANT_PIC 1 +#define JIT_VARIANT_FLOAT32 2 #ifndef AVM_NO_JIT diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 12961f121e..108d5027d8 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -336,7 +336,13 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary fprintf(stderr, "Unknown native code chunk version (%d)\n", ENDIAN_SWAP_16(native_code->version)); } else { for (int arch_index = 0; arch_index < ENDIAN_SWAP_16(native_code->architectures_count); arch_index++) { - if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == JIT_VARIANT_PIC) { + uint16_t runtime_variant; +#ifdef AVM_USE_SINGLE_PRECISION + runtime_variant = JIT_VARIANT_FLOAT32 | JIT_VARIANT_PIC; +#else + runtime_variant = JIT_VARIANT_PIC; +#endif + if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == runtime_variant) { size_t offset = ENDIAN_SWAP_32(native_code->info_size) + ENDIAN_SWAP_32(native_code->architectures[arch_index].offset) + sizeof(native_code->info_size); ModuleNativeEntryPoint module_entry_point = sys_map_native_code((const uint8_t *) &native_code->info_size, ENDIAN_SWAP_32(native_code->size), offset); module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), module_entry_point); diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 0b0d560dc5..e3d76eba41 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -209,6 +209,7 @@ static term nif_erlang_module_loaded(Context *ctx, int argc, term argv[]); static term nif_erlang_nif_error(Context *ctx, int argc, term argv[]); #ifndef AVM_NO_JIT static term nif_jit_backend_module(Context *ctx, int argc, term argv[]); +static term nif_jit_variant(Context *ctx, int argc, term argv[]); #endif static term nif_lists_reverse(Context *ctx, int argc, term argv[]); static term nif_lists_keyfind(Context *ctx, int argc, term argv[]); @@ -794,6 +795,11 @@ static const struct Nif jit_backend_module_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_jit_backend_module }; + +static const struct Nif jit_variant_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_variant +}; #endif static const struct Nif lists_reverse_nif = { @@ -5686,6 +5692,19 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) #error Unknown JIT target #endif } + +static term nif_jit_variant(Context *ctx, int argc, term argv[]) +{ + UNUSED(ctx); + UNUSED(argc); + UNUSED(argv); + +#ifdef AVM_USE_SINGLE_PRECISION + return term_from_int(JIT_VARIANT_FLOAT32 | JIT_VARIANT_PIC); +#else + return term_from_int(JIT_VARIANT_PIC); +#endif +} #endif static term nif_lists_reverse(Context *ctx, int argc, term argv[]) diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf index a647c1de04..a4a2591fa0 100644 --- a/src/libAtomVM/nifs.gperf +++ b/src/libAtomVM/nifs.gperf @@ -193,6 +193,7 @@ lists:keyfind/3, &lists_keyfind_nif lists:keymember/3, &lists_keymember_nif lists:member/2, &lists_member_nif jit:backend_module/0, IF_HAVE_JIT(&jit_backend_module_nif) +jit:variant/0, IF_HAVE_JIT(&jit_variant_nif) lists:reverse/1, &lists_reverse_nif lists:reverse/2, &lists_reverse_nif maps:from_keys/2, &maps_from_keys_nif From 4e7c3820c452322f99959b66fde5ff185a6ee2fa Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 21 Sep 2025 21:41:26 +0200 Subject: [PATCH 78/97] JIT: optimize op return Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 29 ++++++++-- libs/jit/src/jit_aarch64.erl | 66 ++++++++++++++++++++++- libs/jit/src/jit_armv6m.erl | 80 +++++++++++++++++++++++++++- libs/jit/src/jit_x86_64.erl | 62 ++++++++++++++++++++- tests/libs/jit/jit_aarch64_tests.erl | 15 ++++++ tests/libs/jit/jit_armv6m_tests.erl | 21 ++++++++ tests/libs/jit/jit_x86_64_tests.erl | 15 ++++++ 7 files changed, 277 insertions(+), 11 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 1a7ffa6160..af2f6b9457 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -351,11 +351,30 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_RETURN\n", []), - MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RETURN, [ - ctx, jit_state - ]), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest, MMod, MSt1, State0); + % Optimized return: check if returning within same module + {MSt1, CpReg} = MMod:move_to_native_register(MSt0, cp), + {MSt2, ModuleIndexReg} = MMod:get_module_index(MSt1), + % Extract module index from cp (upper 8 bits: cp >> 24) + MSt3 = MMod:shift_right(MSt2, CpReg, 24), + % Compare extracted module index with current module index + MSt4 = MMod:if_block( + MSt3, + {{free, CpReg}, '==', {free, ModuleIndexReg}}, + % Same module: fast intra-module return + fun(BSt0) -> + % Restore original cp value and extract offset (lower 24 bits) + {BSt1, CpReg2} = MMod:move_to_native_register(BSt0, cp), + % Mask to get lower 24 bits and shift right by 2 for offset + BSt2 = MMod:and_(BSt1, CpReg2, 16#FFFFFF), + BSt3 = MMod:shift_right(BSt2, CpReg2, 2), + % Jump to continuation (this is a tail call) + MMod:jump_to_continuation(BSt3, {free, CpReg2}) + end + ), + % Different module: use existing slow path + MSt5 = MMod:call_primitive_last(MSt4, ?PRIM_RETURN, [ctx, jit_state]), + ?ASSERT_ALL_NATIVE_FREE(MSt5), + first_pass(Rest, MMod, MSt5, State0); % 20 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 45289f7311..8e5410aa9b 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -37,6 +37,7 @@ call_primitive_with_cp/3, return_if_not_equal_to_ctx/2, jump_to_label/2, + jump_to_continuation/2, if_block/3, if_else_block/4, shift_right/3, @@ -156,7 +157,8 @@ | {'(int)', maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} | {'(bool)', maybe_free_aarch64_register(), '==', false} | {'(bool)', maybe_free_aarch64_register(), '!=', false} - | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()}. + | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, aarch64_register()}, '==', {free, aarch64_register()}}. % ctx->e is 0x28 % ctx->x is 0x30 @@ -529,6 +531,40 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +%%----------------------------------------------------------------------------- +%% @doc Jump to a continuation address stored in a register. +%% This is used for optimized intra-module returns. +%% @end +%% @param State current backend state +%% @param OffsetReg register containing the continuation offset +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + offset = BaseOffset, + available_regs = [TempReg | _] + } = State, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset, + + % Get native code base address into temporary register + I1 = jit_aarch64_asm:adr(TempReg, NetOffset), + % Add target offset to get final absolute address + I2 = jit_aarch64_asm:add(TempReg, TempReg, OffsetReg), + % Indirect branch to the calculated absolute address + I3 = jit_aarch64_asm:br(TempReg), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + %% @private -spec rewrite_branch_instruction( jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer() @@ -792,6 +828,20 @@ if_block_cond( State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, Reg1}, '==', {free, Reg2}} +) -> + % Compare two free registers + I1 = jit_aarch64_asm:cmp(Reg1, Reg2), + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free both registers + State1 = if_block_free_reg({free, Reg1}, State0), + State2 = if_block_free_reg({free, Reg2}, State1), + State3 = State2#state{stream = Stream1}, + {State3, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(bool)', RegOrTuple, '==', false} @@ -1559,7 +1609,19 @@ move_to_array_element( %% @param Value value to move (can be an immediate, vm register, pointer, or native register) %% @return Tuple of {Updated backend state, Native register containing the value} %%----------------------------------------------------------------------------- --spec move_to_native_register(state(), value()) -> {state(), aarch64_register()}. +-spec move_to_native_register(state(), value() | cp) -> {state(), aarch64_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?CP), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register(State, Reg) when is_atom(Reg) -> {State, Reg}; move_to_native_register( diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index f51e30d2c3..9409059a89 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -37,6 +37,7 @@ call_primitive_with_cp/3, return_if_not_equal_to_ctx/2, jump_to_label/2, + jump_to_continuation/2, if_block/3, if_else_block/4, shift_right/3, @@ -159,7 +160,8 @@ | {'(int)', maybe_free_armv6m_register(), '!=', armv6m_register() | integer()} | {'(bool)', maybe_free_armv6m_register(), '==', false} | {'(bool)', maybe_free_armv6m_register(), '!=', false} - | {maybe_free_armv6m_register(), '&', non_neg_integer(), '!=', integer()}. + | {maybe_free_armv6m_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, armv6m_register()}, '==', {free, armv6m_register()}}. % ctx->e is 0x28 % ctx->x is 0x30 @@ -729,6 +731,68 @@ jump_to_label( Stream1 = StreamModule:append(Stream0, CodeBlock), State1#state{stream = Stream1}. +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% The continuation points to a function prologue, so we need to compute +%% the target address using PIC and use function epilogue to jump. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + % ARM v6-M PIC implementation using one temp register: + % 1. Use ADR to get PC into temp register + % 2. Add PC to OffsetReg to get intermediate value + % 3. Load base offset immediate into temp + % 4. Add base offset to get final target address + % 5. Use function epilogue pattern to jump + + AdrOffset = StreamModule:offset(Stream0), + % ADR Temp, +4 stores PC+4 in Temp + I1 = jit_armv6m_asm:adr(Temp, 4), + + % Add PC to OffsetReg: OffsetReg = OffsetReg + PC + I2 = jit_armv6m_asm:adds(OffsetReg, OffsetReg, Temp), + + Stream1 = StreamModule:append(Stream0, <>), + + % PC is aligned down to 4-byte boundary + AdrPC = (AdrOffset + 4) band (bnot 3), + + % Calculate what we need to add: BaseOffset - AdrPC + 1 for thumb bit + ImmediateValue = BaseOffset - AdrPC + 1, + + % Generate mov_immediate to load the calculated base offset into Temp + State1 = mov_immediate(State0#state{stream = Stream1}, Temp, ImmediateValue), + + % Add base offset to get final target address: OffsetReg = OffsetReg + BaseOffset + I3 = jit_armv6m_asm:adds(OffsetReg, OffsetReg, Temp), + + % Function epilogue pattern: + % Load saved LR to temp register (LR is at sp+20) + I4 = jit_armv6m_asm:ldr(Temp, {sp, 20}), + % Store target address to LR position on stack + I5 = jit_armv6m_asm:str(OffsetReg, {sp, 20}), + % Move saved LR to LR register + I6 = jit_armv6m_asm:mov(lr, Temp), + % Pop prolog registers: {r1,r4,r5,r6,r7,lr} where lr is now target address + % This restores jit_state in r1 and branches to target via pc + I7 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + + Code = <>, + Stream2 = StreamModule:append(State1#state.stream, Code), + % Free all registers as this is a terminal instruction + State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}. + branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) when LabelOffset - Offset =< 2050, LabelOffset - Offset >= -2044 -> @@ -2135,7 +2199,19 @@ move_to_array_element( State2 = State1#state{stream = Stream1}, free_native_register(State2, ValueReg). --spec move_to_native_register(state(), value()) -> {state(), armv6m_register()}. +-spec move_to_native_register(state(), value() | cp) -> {state(), armv6m_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?CP), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register(State, Reg) when is_atom(Reg) -> {State, Reg}; move_to_native_register( diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index bd39ad4fdd..710b0063db 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -37,6 +37,7 @@ call_primitive_with_cp/3, return_if_not_equal_to_ctx/2, jump_to_label/2, + jump_to_continuation/2, if_block/3, if_else_block/4, shift_right/3, @@ -139,7 +140,8 @@ | {'(int)', maybe_free_x86_64_register(), '!=', x86_64_register() | integer()} | {'(bool)', maybe_free_x86_64_register(), '==', false} | {'(bool)', maybe_free_x86_64_register(), '!=', false} - | {maybe_free_x86_64_register(), '&', non_neg_integer(), '!=', integer()}. + | {maybe_free_x86_64_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, x86_64_register()}, '==', {free, x86_64_register()}}. -define(WORD_SIZE, 8). @@ -522,6 +524,42 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +%%----------------------------------------------------------------------------- +%% @doc Jump to a continuation address stored in a register. +%% This is used for optimized intra-module returns. +%% @end +%% @param State current backend state +%% @param OffsetReg register containing the continuation offset +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + offset = BaseOffset, + available_regs = [TempReg | _] + } = State, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + % Similar to aarch64 approach but using leaq for PC-relative addressing + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset - 7, + + % Get native code base address using PC-relative lea: leaq NetOffset(%rip), TempReg + I1 = jit_x86_64_asm:leaq({rip, NetOffset}, TempReg), + 7 = byte_size(I1), + % Add target offset to get final absolute address: addq OffsetReg, TempReg + I2 = jit_x86_64_asm:addq(OffsetReg, TempReg), + % Indirect jump to the calculated absolute address: jmpq *TempReg + I3 = jit_x86_64_asm:jmpq({TempReg}), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally %% execute a block. @@ -731,6 +769,14 @@ if_block_cond0( {RelocJZOffset, I3} = jit_x86_64_asm:jnz_rel8(1), State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJZOffset}; +if_block_cond0(State0, {{free, Reg1}, '==', {free, Reg2}}) -> + % Compare two free registers + I1 = jit_x86_64_asm:cmpq(Reg2, Reg1), + {RelocJNZOffset, I2} = jit_x86_64_asm:jnz_rel8(1), + % Free both registers + State1 = if_block_free_reg({free, Reg1}, State0), + State2 = if_block_free_reg({free, Reg2}, State1), + {State2, <>, byte_size(I1) + RelocJNZOffset}; if_block_cond0( State0, {'(int)', RegOrTuple, '==', Val} @@ -1571,7 +1617,19 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. --spec move_to_native_register(state(), value()) -> {state(), x86_64_register()}. +-spec move_to_native_register(state(), value() | cp) -> {state(), x86_64_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + I1 = jit_x86_64_asm:movq(?CP, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register(State, Reg) when is_atom(Reg) -> {State, Reg}; move_to_native_register( diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 18bdcf88cb..c6c164a640 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -1719,6 +1719,21 @@ mul_test_() -> ] end}. +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, r0}), + Stream = ?BACKEND:stream(State1), + % Expected: adr x7, NetOffset; add x7, x7, x0; br x7 + % With default offset 0, NetOffset = 0 - 0 = 0, temp register is r7 + Dump = + << + " 0: 10000007 adr x7, 0x0\n" + " 4: 8b0000e7 add x7, x7, x0\n" + " 8: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 287e8f9acc..5c15571412 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -3443,6 +3443,27 @@ call_func_ptr_register_exhaustion_test_() -> ] end}. +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, r0}), + Stream = ?BACKEND:stream(State1), + % Expected: armv6m PIC sequence with function epilogue pattern + % Based on actual generated output + Dump = + << + " 0: a700 add r7, pc, #0 ; (adr r7, 0x4)\n" + " 2: 19c0 adds r0, r0, r7\n" + " 4: 2703 movs r7, #3\n" + " 6: 427f negs r7, r7\n" + " 8: 19c0 adds r0, r0, r7\n" + " a: 9f05 ldr r7, [sp, #20]\n" + " c: 9005 str r0, [sp, #20]\n" + " e: 46be mov lr, r7\n" + " 10: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + %% Mimic part of add.beam add_beam_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index abdb0d6773..cc8e9ddf14 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -1559,6 +1559,21 @@ move_to_array_element_test_() -> ] end}. +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, rax}), + Stream = ?BACKEND:stream(State1), + % Expected: leaq -0x7(%rip), %rax; addq %rax, %rax; jmpq *%rax + % With default offset 0, NetOffset = 0 - 0 = 0, but RIP-relative needs adjustment for instruction length + Dump = + << + " 0: 48 8d 05 f9 ff ff ff lea -0x7(%rip),%rax\n" + " 7: 48 01 c0 add %rax,%rax\n" + " a: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From 4621b7d2fc3599d5306247c7dce2e091d84c9b3f Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 29 Sep 2025 08:59:35 +0200 Subject: [PATCH 79/97] JIT: optimize shift_right for platforms with 3 operands Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 184 +++++++++++++-------------- libs/jit/src/jit_aarch64.erl | 22 +++- libs/jit/src/jit_armv6m.erl | 21 ++- libs/jit/src/jit_x86_64.erl | 22 +++- tests/libs/jit/jit_aarch64_tests.erl | 39 ++++-- tests/libs/jit/jit_armv6m_tests.erl | 39 ++++-- tests/libs/jit/jit_x86_64_tests.erl | 40 ++++-- 7 files changed, 235 insertions(+), 132 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index af2f6b9457..1992bf5841 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -352,29 +352,28 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_RETURN\n", []), % Optimized return: check if returning within same module - {MSt1, CpReg} = MMod:move_to_native_register(MSt0, cp), + {MSt1, CpReg0} = MMod:move_to_native_register(MSt0, cp), {MSt2, ModuleIndexReg} = MMod:get_module_index(MSt1), % Extract module index from cp (upper 8 bits: cp >> 24) - MSt3 = MMod:shift_right(MSt2, CpReg, 24), + {MSt3, CpReg1} = MMod:shift_right(MSt2, CpReg0, 24), % Compare extracted module index with current module index MSt4 = MMod:if_block( MSt3, - {{free, CpReg}, '==', {free, ModuleIndexReg}}, + {{free, CpReg1}, '==', {free, ModuleIndexReg}}, % Same module: fast intra-module return fun(BSt0) -> - % Restore original cp value and extract offset (lower 24 bits) - {BSt1, CpReg2} = MMod:move_to_native_register(BSt0, cp), % Mask to get lower 24 bits and shift right by 2 for offset - BSt2 = MMod:and_(BSt1, CpReg2, 16#FFFFFF), - BSt3 = MMod:shift_right(BSt2, CpReg2, 2), + BSt1 = MMod:and_(BSt0, CpReg0, 16#FFFFFF), + {BSt3, CPReg1} = MMod:shift_right(BSt1, {free, CpReg0}, 2), % Jump to continuation (this is a tail call) - MMod:jump_to_continuation(BSt3, {free, CpReg2}) + MMod:jump_to_continuation(BSt3, {free, CPReg1}) end ), + MSt5 = MMod:free_native_registers(MSt4, [CpReg0]), % Different module: use existing slow path - MSt5 = MMod:call_primitive_last(MSt4, ?PRIM_RETURN, [ctx, jit_state]), - ?ASSERT_ALL_NATIVE_FREE(MSt5), - first_pass(Rest, MMod, MSt5, State0); + MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + ?ASSERT_ALL_NATIVE_FREE(MSt6), + first_pass(Rest, MMod, MSt6, State0); % 20 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -778,11 +777,10 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Reg} = MMod:move_to_native_register(MSt1, Arg1), MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:shift_right(MSt4, Reg, 6), - MSt6 = cond_jump_to_label({Reg, '!=', Arity}, Label, MMod, MSt5), - MSt7 = MMod:free_native_registers(MSt6, [Reg]), - ?ASSERT_ALL_NATIVE_FREE(MSt7), - first_pass(Rest3, MMod, MSt7, State0); + {MSt5, ArityReg} = MMod:shift_right(MSt4, {free, Reg}, 6), + MSt6 = cond_jump_to_label({{free, ArityReg}, '!=', Arity}, Label, MMod, MSt5), + ?ASSERT_ALL_NATIVE_FREE(MSt6), + first_pass(Rest3, MMod, MSt6, State0); % 59 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -1176,19 +1174,20 @@ first_pass(<>, MMod, MSt0, State0) -> MSt6, {IndexOrModuleReg, '&', ?TERM_IMMED2_TAG_MASK, '!=', ?TERM_IMMED2_ATOM}, fun(BSt0) -> - BSt1 = MMod:shift_right(BSt0, IndexOrModuleReg, 4), + {BSt1, IndexReg} = MMod:shift_right(BSt0, {free, IndexOrModuleReg}, 4), {BSt2, FunArity} = MMod:call_primitive(BSt1, ?PRIM_MODULE_GET_FUN_ARITY, [ - ModuleReg, IndexOrModuleReg + ModuleReg, {free, IndexReg} ]), cond_jump_to_label({'(int)', {free, FunArity}, '!=', Arity}, Label, MMod, BSt2) end, fun(BSt0) -> - {BSt1, FunArity} = MMod:get_array_element(BSt0, FuncPtr, 3), - BSt2 = MMod:shift_right(BSt1, FunArity, 4), - cond_jump_to_label({'(int)', {free, FunArity}, '!=', Arity}, Label, MMod, BSt2) + BSt1 = MMod:free_native_registers(BSt0, [IndexOrModuleReg]), + {BSt2, FunArity} = MMod:get_array_element(BSt1, FuncPtr, 3), + {BSt3, FunArityReg} = MMod:shift_right(BSt2, {free, FunArity}, 4), + cond_jump_to_label({'(int)', {free, FunArityReg}, '!=', Arity}, Label, MMod, BSt3) end ), - MSt8 = MMod:free_native_registers(MSt7, [FuncPtr, IndexOrModuleReg, ModuleReg, Arity]), + MSt8 = MMod:free_native_registers(MSt7, [FuncPtr, ModuleReg, Arity]), ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest3, MMod, MSt8, State0); % 117 @@ -1277,7 +1276,7 @@ first_pass(<>, MMod, MSt0, State0) -> {FlagsValue, Rest6} = decode_literal(Rest5), {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), {MSt4, BSBinaryReg0} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), - {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), + {MSt5, BSOffsetReg0} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), MSt6 = if Unit =/= 8 -> @@ -1291,22 +1290,22 @@ first_pass(<>, MMod, MSt0, State0) -> true -> MSt5 end, - MSt7 = MMod:if_block(MSt6, {BSOffsetReg, '&', 16#7, '!=', 0}, fun(BlockSt) -> + MSt7 = MMod:if_block(MSt6, {BSOffsetReg0, '&', 16#7, '!=', 0}, fun(BlockSt) -> MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ctx, jit_state, offset, ?BADARG_ATOM]) end), - MSt8 = MMod:shift_right(MSt7, BSOffsetReg, 3), + {MSt8, BSOffsetReg1} = MMod:shift_right(MSt7, {free, BSOffsetReg0}, 3), MSt9 = MMod:and_(MSt8, BSBinaryReg0, ?TERM_PRIMARY_CLEAR_MASK), {MSt10, SizeReg} = MMod:get_array_element(MSt9, {free, BSBinaryReg0}, 1), {MSt13, SizeValue} = if Size =:= ?ALL_ATOM -> - MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg), + MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg1), {MSt11, SizeReg}; is_integer(Size) -> % SizeReg is binary size % SizeVal is a constant MSt11 = MMod:sub(MSt10, SizeReg, Size bsl 4), - MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg}, Fail, MMod, MSt11), + MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg1}, Fail, MMod, MSt11), {MSt12, Size bsl 4}; true -> {MSt11, SizeValReg} = MMod:move_to_native_register(MSt10, Size), @@ -1314,20 +1313,20 @@ first_pass(<>, MMod, MSt0, State0) -> MSt11, {SizeValReg, '==', ?ALL_ATOM}, fun(BSt0) -> - BSt1 = MMod:sub(BSt0, SizeReg, BSOffsetReg), + BSt1 = MMod:sub(BSt0, SizeReg, BSOffsetReg1), MMod:free_native_registers(BSt1, [SizeValReg]) end, fun(BSt0) -> {BSt1, SizeValReg} = term_to_int(SizeValReg, 0, MMod, BSt0), BSt2 = MMod:sub(BSt1, SizeReg, SizeValReg), - BSt3 = cond_jump_to_label({SizeReg, '<', BSOffsetReg}, Fail, MMod, BSt2), + BSt3 = cond_jump_to_label({SizeReg, '<', BSOffsetReg1}, Fail, MMod, BSt2), BSt4 = MMod:move_to_native_register(BSt3, SizeValReg, SizeReg), MMod:free_native_registers(BSt4, [SizeValReg]) end ), {MSt12, SizeReg} end, - {MSt14, NewOffsetReg} = MMod:copy_to_native_register(MSt13, BSOffsetReg), + {MSt14, NewOffsetReg} = MMod:copy_to_native_register(MSt13, BSOffsetReg1), MSt15 = MMod:add(MSt14, NewOffsetReg, SizeValue), MSt16 = MMod:shift_left(MSt15, NewOffsetReg, 3), % Write new offset @@ -1344,7 +1343,7 @@ first_pass(<>, MMod, MSt0, State0) -> BSBinaryReg1, Live, {free, HeapSizeReg}, MMod, MSt23 ), {MSt25, ResultTerm} = MMod:call_primitive(MSt24, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ - ctx, {free, BSBinaryReg2}, {free, BSOffsetReg}, {free, SizeValue} + ctx, {free, BSBinaryReg2}, {free, BSOffsetReg1}, {free, SizeValue} ]), {MSt26, Dest, Rest7} = decode_dest(Rest6, MMod, MSt25), ?TRACE("OP_BS_GET_BINARY2 ~p,~p,~p,~p,~p,~p,~p\n", [ @@ -1935,13 +1934,13 @@ first_pass( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt5, TagReg} = MMod:get_array_element(MSt4, Reg, 0), + {MSt5, TagReg0} = MMod:get_array_element(MSt4, Reg, 0), MSt6 = cond_jump_to_label( - {TagReg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5 + {TagReg0, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5 ), - MSt7 = MMod:shift_right(MSt6, TagReg, 6), - MSt8 = cond_jump_to_label({TagReg, '!=', Arity}, Label, MMod, MSt7), - MSt9 = MMod:free_native_registers(MSt8, [TagReg]), + {MSt7, TagReg1} = MMod:shift_right(MSt6, {free, TagReg0}, 6), + MSt8 = cond_jump_to_label({TagReg1, '!=', Arity}, Label, MMod, MSt7), + MSt9 = MMod:free_native_registers(MSt8, [TagReg1]), MSt10 = MMod:move_array_element(MSt9, Reg, 1, Reg), {MSt11, AtomReg} = case maps:find(AtomResolver(AtomIndex), ?DEFAULT_ATOMS) of @@ -2282,15 +2281,19 @@ first_pass( {MSt7, (BinaryTotalSize div 8), term_binary_heap_size((BinaryTotalSize div 8), MMod) + Alloc}; true -> - MSt8 = MMod:shift_right(MSt7, BinaryTotalSize, 3), - {MSt9, BinaryTotalSize0} = MMod:copy_to_native_register(MSt8, BinaryTotalSize), - {MSt10, AllocSizeReg} = term_binary_heap_size({free, BinaryTotalSize0}, MMod, MSt9), + {MSt8, BinaryTotalSizeBytes} = MMod:shift_right(MSt7, {free, BinaryTotalSize}, 3), + {MSt9, BinaryTotalSizeBytes0} = MMod:copy_to_native_register( + MSt8, BinaryTotalSizeBytes + ), + {MSt10, AllocSizeReg} = term_binary_heap_size( + {free, BinaryTotalSizeBytes0}, MMod, MSt9 + ), case Alloc of 0 -> - {MSt10, BinaryTotalSize, AllocSizeReg}; + {MSt10, BinaryTotalSizeBytes, AllocSizeReg}; _ -> MSt11 = MMod:add(MSt10, AllocSizeReg, Alloc), - {MSt11, BinaryTotalSize, AllocSizeReg} + {MSt11, BinaryTotalSizeBytes, AllocSizeReg} end end, {MSt13, MemoryEnsureFreeReg} = MMod:call_primitive( @@ -2899,33 +2902,32 @@ first_pass_bs_match_binary( ]) end, MatchedBytes = MatchedBits div 8, - {MSt2, BSOffseBytesReg} = MMod:copy_to_native_register(MSt1, BSOffsetReg), - MSt3 = MMod:shift_right(MSt2, BSOffseBytesReg, 3), - {MSt4, RemainingBytesReg} = MMod:get_array_element(MSt3, BSBinaryReg, 1), - MSt5 = MMod:sub(MSt4, RemainingBytesReg, BSOffseBytesReg), - MSt6 = cond_jump_to_label({RemainingBytesReg, '<', MatchedBytes}, Fail, MMod, MSt5), - MSt7 = MMod:free_native_registers(MSt6, [RemainingBytesReg]), - {MSt8, HeapSizeReg} = MMod:call_primitive(MSt7, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ + {MSt2, BSOffseBytesReg} = MMod:shift_right(MSt1, BSOffsetReg, 3), + {MSt3, RemainingBytesReg} = MMod:get_array_element(MSt2, BSBinaryReg, 1), + MSt4 = MMod:sub(MSt3, RemainingBytesReg, BSOffseBytesReg), + MSt5 = cond_jump_to_label({RemainingBytesReg, '<', MatchedBytes}, Fail, MMod, MSt4), + MSt6 = MMod:free_native_registers(MSt5, [RemainingBytesReg]), + {MSt7, HeapSizeReg} = MMod:call_primitive(MSt6, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ BSBinaryReg, MatchedBytes ]), - {MSt9, NewMatchState} = memory_ensure_free_with_extra_root( - MatchState, Live, {free, HeapSizeReg}, MMod, MSt8 + {MSt8, NewMatchState} = memory_ensure_free_with_extra_root( + MatchState, Live, {free, HeapSizeReg}, MMod, MSt7 ), % Restore BSBinaryReg as it may have been gc'd as well - {MSt10, MatchStateReg0} = MMod:copy_to_native_register(MSt9, NewMatchState), - MSt11 = MMod:and_(MSt10, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), - MSt12 = MMod:move_array_element(MSt11, MatchStateReg0, 1, BSBinaryReg), - MSt13 = MMod:free_native_registers(MSt12, [MatchStateReg0]), - {MSt14, ResultTerm} = MMod:call_primitive(MSt13, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ + {MSt9, MatchStateReg0} = MMod:copy_to_native_register(MSt8, NewMatchState), + MSt10 = MMod:and_(MSt9, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + MSt11 = MMod:move_array_element(MSt10, MatchStateReg0, 1, BSBinaryReg), + MSt12 = MMod:free_native_registers(MSt11, [MatchStateReg0]), + {MSt13, ResultTerm} = MMod:call_primitive(MSt12, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ ctx, BSBinaryReg, {free, BSOffseBytesReg}, MatchedBytes ]), - MSt15 = MMod:and_(MSt14, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt16, Dest, Rest5} = decode_dest(Rest4, MMod, MSt15), + MSt14 = MMod:and_(MSt13, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt15, Dest, Rest5} = decode_dest(Rest4, MMod, MSt14), ?TRACE("~p},", [Dest]), - MSt17 = MMod:move_to_vm_register(MSt16, ResultTerm, Dest), - MSt18 = MMod:free_native_registers(MSt17, [ResultTerm]), - MSt19 = MMod:add(MSt18, BSOffsetReg, MatchedBits), - {J0 - 5, Rest5, NewMatchState, BSOffsetReg, MSt19}. + MSt16 = MMod:move_to_vm_register(MSt15, ResultTerm, Dest), + MSt17 = MMod:free_native_registers(MSt16, [ResultTerm]), + MSt18 = MMod:add(MSt17, BSOffsetReg, MatchedBits), + {J0 - 5, Rest5, NewMatchState, BSOffsetReg, MSt18}. first_pass_bs_match_get_tail(MatchState, BSBinaryReg, BSOffsetReg, J0, Rest0, MMod, MSt0) -> {Live, Rest1} = decode_literal(Rest0), @@ -2945,32 +2947,31 @@ do_get_tail( MatchState, Live, BSOffsetReg, BSBinaryReg, MMod, MSt0 ) -> MSt1 = cond_raise_badarg({BSOffsetReg, '&', 2#111, '!=', 0}, MMod, MSt0), - {MSt2, BSOffseBytesReg} = MMod:copy_to_native_register(MSt1, BSOffsetReg), - MSt3 = MMod:shift_right(MSt2, BSOffseBytesReg, 3), - {MSt4, TailBytesReg0} = MMod:get_array_element(MSt3, BSBinaryReg, 1), - MSt5 = MMod:sub(MSt4, TailBytesReg0, BSOffseBytesReg), - {MSt6, HeapSizeReg} = MMod:call_primitive(MSt5, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ + {MSt2, BSOffseBytesReg} = MMod:shift_right(MSt1, BSOffsetReg, 3), + {MSt3, TailBytesReg0} = MMod:get_array_element(MSt2, BSBinaryReg, 1), + MSt4 = MMod:sub(MSt3, TailBytesReg0, BSOffseBytesReg), + {MSt5, HeapSizeReg} = MMod:call_primitive(MSt4, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ BSBinaryReg, {free, TailBytesReg0} ]), - {MSt7, NewMatchState} = memory_ensure_free_with_extra_root( - MatchState, Live, {free, HeapSizeReg}, MMod, MSt6 + {MSt6, NewMatchState} = memory_ensure_free_with_extra_root( + MatchState, Live, {free, HeapSizeReg}, MMod, MSt5 ), % Restore BSBinaryReg as it may have been gc'd as well - {MSt8, MatchStateReg0} = MMod:copy_to_native_register(MSt7, NewMatchState), - MSt9 = MMod:and_(MSt8, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), - MSt10 = MMod:move_array_element(MSt9, MatchStateReg0, 1, BSBinaryReg), - MSt11 = MMod:free_native_registers(MSt10, [MatchStateReg0]), - MSt12 = MMod:and_(MSt11, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt13, TailBytesReg1} = MMod:get_array_element(MSt12, BSBinaryReg, 1), - MSt14 = MMod:sub(MSt13, TailBytesReg0, BSOffseBytesReg), - MSt15 = MMod:add(MSt14, BSBinaryReg, ?TERM_PRIMARY_BOXED), - {MSt16, ResultTerm} = MMod:call_primitive(MSt15, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ + {MSt7, MatchStateReg0} = MMod:copy_to_native_register(MSt6, NewMatchState), + MSt8 = MMod:and_(MSt7, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + MSt9 = MMod:move_array_element(MSt8, MatchStateReg0, 1, BSBinaryReg), + MSt10 = MMod:free_native_registers(MSt9, [MatchStateReg0]), + MSt11 = MMod:and_(MSt10, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt12, TailBytesReg1} = MMod:get_array_element(MSt11, BSBinaryReg, 1), + MSt13 = MMod:sub(MSt12, TailBytesReg0, BSOffseBytesReg), + MSt14 = MMod:add(MSt13, BSBinaryReg, ?TERM_PRIMARY_BOXED), + {MSt15, ResultTerm} = MMod:call_primitive(MSt14, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ ctx, BSBinaryReg, {free, BSOffseBytesReg}, TailBytesReg1 ]), - MSt17 = MMod:shift_left(MSt16, TailBytesReg1, 3), - MSt18 = MMod:add(MSt17, BSOffsetReg, TailBytesReg1), - MSt19 = MMod:free_native_registers(MSt18, [TailBytesReg1]), - {MSt19, ResultTerm, NewMatchState}. + MSt16 = MMod:shift_left(MSt15, TailBytesReg1, 3), + MSt17 = MMod:add(MSt16, BSOffsetReg, TailBytesReg1), + MSt18 = MMod:free_native_registers(MSt17, [TailBytesReg1]), + {MSt18, ResultTerm, NewMatchState}. first_pass_bs_match_equal_colon_equal( Fail, MatchState, BSBinaryReg, BSOffsetReg, J0, Rest0, MMod, MSt0 @@ -2998,9 +2999,8 @@ first_pass_bs_match_equal_colon_equal( {MSt5, IntValue} = MMod:get_array_element(MSt4, {free, Result}, 1), cond_jump_to_label({{free, IntValue}, '!=', PatternValue}, Fail, MMod, MSt5); _ -> - MSt4 = MMod:shift_right(MSt3, Result, 4), - MSt5 = cond_jump_to_label({Result, '!=', PatternValue}, Fail, MMod, MSt4), - MMod:free_native_registers(MSt5, [Result]) + {MSt4, ResultInt} = MMod:shift_right(MSt3, {free, Result}, 4), + cond_jump_to_label({{free, ResultInt}, '!=', PatternValue}, Fail, MMod, MSt4) end, MSt7 = MMod:add(MSt6, BSOffsetReg, Size), {J0 - 3, Rest3, MatchState, BSOffsetReg, MSt7}. @@ -3241,8 +3241,8 @@ term_to_int({literal, Val}, _FailLabel, _MMod, MSt0) when is_integer(Val) -> % Optimized case: when we have type information showing this is an integer, skip the type check term_to_int({typed, Term, {t_integer, _Range}}, _FailLabel, MMod, MSt0) -> {MSt1, Reg} = MMod:move_to_native_register(MSt0, Term), - MSt2 = MMod:shift_right(MSt1, Reg, 4), - {MSt2, Reg}; + {MSt2, IntReg} = MMod:shift_right(MSt1, {free, Reg}, 4), + {MSt2, IntReg}; term_to_int({typed, Term, _NonIntegerType}, FailLabel, MMod, MSt0) -> % Type information shows it's not an integer, fall back to generic path term_to_int(Term, FailLabel, MMod, MSt0); @@ -3251,8 +3251,8 @@ term_to_int(Term, FailLabel, MMod, MSt0) -> MSt2 = cond_raise_badarg_or_jump_to_fail_label( {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, FailLabel, MMod, MSt1 ), - MSt3 = MMod:shift_right(MSt2, Reg, 4), - {MSt3, Reg}. + {MSt3, IntReg} = MMod:shift_right(MSt2, {free, Reg}, 4), + {MSt3, IntReg}. first_pass_float3(Primitive, Rest0, MMod, MSt0, State0) -> {Label, Rest1} = decode_label(Rest0), @@ -3610,8 +3610,8 @@ term_get_tuple_arity(Tuple, MMod, MSt0) -> end, MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg), - MSt4 = MMod:shift_right(MSt3, Reg, 6), - {MSt4, Reg}. + {MSt4, ArityReg} = MMod:shift_right(MSt3, {free, Reg}, 6), + {MSt4, ArityReg}. term_get_map_size(Map, MMod, MSt0) -> {MSt1, MapKeys} = term_get_map_keys(Map, MMod, MSt0), @@ -3658,7 +3658,7 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) -> {Reg, '<', ?REFC_BINARY_MIN_32}, fun(BSt0) -> BSt1 = MMod:add(BSt0, Reg, 3), - BSt2 = MMod:shift_right(BSt1, Reg, 2), + {BSt2, Reg} = MMod:shift_right(BSt1, {free, Reg}, 2), MMod:add(BSt2, Reg, 1 + ?BINARY_HEADER_SIZE) end, fun(BSt0) -> @@ -3674,7 +3674,7 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) -> {Reg, '<', ?REFC_BINARY_MIN_64}, fun(BSt0) -> BSt1 = MMod:add(BSt0, Reg, 7), - BSt2 = MMod:shift_right(BSt1, Reg, 3), + {BSt2, Reg} = MMod:shift_right(BSt1, {free, Reg}, 3), MMod:add(BSt2, Reg, 1 + ?BINARY_HEADER_SIZE) end, fun(BSt0) -> diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 8e5410aa9b..1eba4fba80 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -983,13 +983,29 @@ merge_used_regs(State, []) -> %% @param Shift number of bits to shift %% @return new state %%----------------------------------------------------------------------------- --spec shift_right(state(), aarch64_register(), non_neg_integer()) -> state(). -shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when +-spec shift_right(#state{}, maybe_free_aarch64_register(), non_neg_integer()) -> + {#state{}, aarch64_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when ?IS_GPR(Reg) andalso is_integer(Shift) -> I = jit_aarch64_asm:lsr(Reg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - State#state{stream = Stream1}. + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_aarch64_asm:lsr(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 9409059a89..ce683cf5ab 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -1386,12 +1386,29 @@ merge_used_regs(State, []) -> %% @param Shift number of bits to shift %% @return new state %%----------------------------------------------------------------------------- -shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when +-spec shift_right(#state{}, maybe_free_armv6m_register(), non_neg_integer()) -> + {#state{}, armv6m_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when ?IS_GPR(Reg) andalso is_integer(Shift) -> I = jit_armv6m_asm:lsrs(Reg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - State#state{stream = Stream1}. + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_armv6m_asm:lsrs(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 710b0063db..df8e7cf1d6 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -877,12 +877,30 @@ merge_used_regs(State, []) -> %% @param Shift number of bits to shift %% @return new state %%----------------------------------------------------------------------------- -shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when +-spec shift_right(#state{}, maybe_free_x86_64_register(), non_neg_integer()) -> + {#state{}, x86_64_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when ?IS_GPR(Reg) andalso is_integer(Shift) -> I = jit_x86_64_asm:shrq(Shift, Reg), Stream1 = StreamModule:append(Stream0, I), - State#state{stream = Stream1}. + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + available_regs = [ResultReg | T], + used_regs = UR, + stream = Stream0 + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I1 = jit_x86_64_asm:movq(Reg, ResultReg), + I2 = jit_x86_64_asm:shrq(Shift, ResultReg), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index c6c164a640..23291a400c 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -760,17 +760,34 @@ if_else_block_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -shift_right_test() -> - State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:shift_right(State1, Reg, 3), - Stream = ?BACKEND:stream(State2), - Dump = - << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: d343fce7 lsr x7, x7, #3" - >>, - ?assertEqual(dump_to_bin(Dump), Stream). +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d343fce7 lsr x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d343fce8 lsr x8, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. shift_left_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 5c15571412..ceaf926d7d 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1183,17 +1183,34 @@ if_else_block_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -shift_right_test() -> - State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:shift_right(State1, Reg, 3), - Stream = ?BACKEND:stream(State2), - Dump = - << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 08ff lsrs r7, r7, #3" - >>, - ?assertEqual(dump_to_bin(Dump), Stream). +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 08ff lsrs r7, r7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 08fe lsrs r6, r7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. shift_left_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index cc8e9ddf14..9aa86b6427 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -820,17 +820,35 @@ if_else_block_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -shift_right_test() -> - State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:shift_right(State1, Reg, 3), - Stream = ?BACKEND:stream(State2), - Dump = - << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 48 c1 e8 03 shr $0x3,%rax" - >>, - ?assertEqual(dump_to_bin(Dump), Stream). +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 48 c1 e8 03 shr $0x3,%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 49 89 c3 mov %rax,%r11\n" + " 7: 49 c1 eb 03 shr $0x3,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. shift_left_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), From f99f1dc8f72c8f4459a26bb8ed60b8c073d2c70f Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 29 Sep 2025 21:49:52 +0200 Subject: [PATCH 80/97] JIT: factorize tail calls to reduce binary size Use a cache to remember tail calls that were already implemented and replace further implementations of the same tail call with a jump to the previous implementation. Coverage shows that all cases are covered in libs/estdlib/src and libs/jit/src: OP_RETURN: 50 misses, 1735 hits (97%) OP_JUMP/OP_CALL_LAST/OP_CALL_ONLY: 656 misses, 389 hits (37%) OP_CALL_LAST: 220 misses, 206 hits (48%) OP_FUNC_INFO: 58 misses, 1619 hits (97%) Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 110 ++++++++++++++++++++++------- libs/jit/src/jit_aarch64.erl | 8 +++ libs/jit/src/jit_armv6m.erl | 35 ++++++---- libs/jit/src/jit_x86_64.erl | 8 +++ tests/libs/jit/jit_tests.erl | 129 ++++++++++++++++++++++++++++------- 5 files changed, 227 insertions(+), 63 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 1992bf5841..71d38c615f 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -100,7 +100,8 @@ labels_count :: pos_integer(), atom_resolver :: fun((integer()) -> atom()), literal_resolver :: fun((integer()) -> any()), - type_resolver :: fun((integer()) -> any()) + type_resolver :: fun((integer()) -> any()), + tail_cache :: [{tuple(), non_neg_integer()}] }). -type stream() :: any(). @@ -142,7 +143,8 @@ compile( labels_count = LabelsCount, atom_resolver = AtomResolver, literal_resolver = LiteralResolver, - type_resolver = TypeResolver + type_resolver = TypeResolver, + tail_cache = [] }, {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), MSt3 = second_pass(MMod, MSt2, State1), @@ -170,18 +172,30 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest1, MMod, MSt1, State0); % 2 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_ModuleAtom, Rest1} = decode_atom(Rest0), {_FunctionName, Rest2} = decode_atom(Rest1), {_Arity, Rest3} = decode_literal(Rest2), ?TRACE("OP_FUNC_INFO ~p, ~p, ~p\n", [_ModuleAtom, _FunctionName, _Arity]), - % Implement function clause at the previous label. (TODO: optimize it out to save space) - MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [ - ctx, jit_state, offset, ?FUNCTION_CLAUSE_ATOM - ]), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest3, MMod, MSt1, State0); + % Implement function clause at the previous label. + Offset = MMod:offset(MSt0), + {MSt1, OffsetReg} = MMod:move_to_native_register(MSt0, Offset), + TailCacheKey = {call_primitive_last, ?PRIM_RAISE_ERROR, [OffsetReg, ?FUNCTION_CLAUSE_ATOM]}, + State1 = + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + MSt3 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, {free, OffsetReg}, ?FUNCTION_CLAUSE_ATOM + ]), + State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, CacheOffset} -> + MSt2 = MMod:jump_to_offset(MSt1, CacheOffset), + MSt3 = MMod:free_native_registers(MSt2, [OffsetReg]), + State0 + end, + ?ASSERT_ALL_NATIVE_FREE(MSt3), + first_pass(Rest3, MMod, MSt3, State1); % 3 first_pass( <>, MMod, MSt0, #state{labels_count = LabelsCount} = State @@ -203,26 +217,56 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State0); % 5 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), {NWords, Rest3} = decode_literal(Rest2), ?TRACE("OP_CALL_LAST ~p, ~p, ~p\n", [_Arity, Label, NWords]), - MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), - MSt2 = MMod:increment_sp(MSt1, NWords + 1), - MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + TailCacheKey0 = {op_call_last, NWords, Label}, + case lists:keyfind(TailCacheKey0, 1, TC) of + false -> + Offset0 = MMod:offset(MSt0), + MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), + MSt2 = MMod:increment_sp(MSt1, NWords + 1), + TailCacheKey1 = {op_call_only, Label}, + case lists:keyfind(TailCacheKey1, 1, TC) of + false -> + Offset1 = MMod:offset(MSt2), + MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + State1 = State0#state{ + tail_cache = [{TailCacheKey1, Offset1}, {TailCacheKey0, Offset0} | TC] + }; + {TailCacheKey1, Offset1} -> + MSt3 = MMod:jump_to_offset(MSt2, Offset1), + State1 = State0#state{ + tail_cache = [{TailCacheKey0, Offset0} | TC] + } + end; + {TailCacheKey0, Offset0} -> + MSt3 = MMod:jump_to_offset(MSt0, Offset0), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt3), - first_pass(Rest3, MMod, MSt3, State0); + first_pass(Rest3, MMod, MSt3, State1); % 6 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), ?TRACE("OP_CALL_ONLY ~p, ~p\n", [_Arity, Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest2, MMod, MSt1, State0); + first_pass(Rest2, MMod, MSt1, State1); % 7 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -348,7 +392,7 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 19 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_RETURN\n", []), % Optimized return: check if returning within same module @@ -371,9 +415,18 @@ first_pass(<>, MMod, MSt0, State0) -> ), MSt5 = MMod:free_native_registers(MSt4, [CpReg0]), % Different module: use existing slow path - MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + TailCacheKey = {call_primitive_last, ?PRIM_RETURN}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt5), + MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt6 = MMod:jump_to_offset(MSt5, Offset), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt6), - first_pass(Rest, MMod, MSt6, State0); + first_pass(Rest, MMod, MSt6, State1); % 20 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -836,13 +889,22 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 61 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_JUMP ~p\n", [Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest1, MMod, MSt1, State0); + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}); + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0) + end; % 62 % Same implementation as OP_TRY, to confirm. first_pass(<>, MMod, MSt0, State0) -> diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 1eba4fba80..be1b62f9b4 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -38,6 +38,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -531,6 +532,13 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + Rel = TargetOffset - Offset, + I1 = jit_aarch64_asm:b(Rel), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. %% This is used for optimized intra-module returns. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index ce683cf5ab..f792bc1f58 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -38,6 +38,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -731,6 +732,12 @@ jump_to_label( Stream1 = StreamModule:append(Stream0, CodeBlock), State1#state{stream = Stream1}. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register %% The continuation points to a function prologue, so we need to compute @@ -793,15 +800,14 @@ jump_to_continuation( % Free all registers as this is a terminal instruction State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}. -branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) when - LabelOffset - Offset =< 2050, LabelOffset - Offset >= -2044 +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 -> % Near branch: use direct B instruction - Rel = LabelOffset - Offset, - CodeBlock = jit_armv6m_asm:b(Rel), - {State, CodeBlock}; -branch_to_label_code( - #state{available_regs = [TempReg | _]} = State0, Offset, Label, {Label, LabelOffset} + Rel = TargetOffset - Offset, + jit_armv6m_asm:b(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, Offset, TargetOffset ) -> % Far branch: use register-based sequence, need temporary register if @@ -812,19 +818,22 @@ branch_to_label_code( I3 = jit_armv6m_asm:bx(TempReg), % Unaligned : need nop I4 = jit_armv6m_asm:nop(), - LiteralValue = LabelOffset - Offset - 5, + LiteralValue = TargetOffset - Offset - 5, I5 = <>, - CodeBlock = <>; + <>; true -> % Unaligned I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), - LiteralValue = LabelOffset - Offset - 5, + LiteralValue = TargetOffset - Offset - 5, I4 = <>, - CodeBlock = <> - end, - {State0, CodeBlock}; + <> + end. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; branch_to_label_code( #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false ) -> diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index df8e7cf1d6..26b08de0d7 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -38,6 +38,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -524,6 +525,13 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + RelOffset = TargetOffset - Offset, + I1 = jit_x86_64_asm:jmp(RelOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. %% This is used for optimized intra-module returns. diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index 72a356ae3c..3683287938 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -62,6 +62,44 @@ <<0, 0, 0, 3, 0, 0, 0, 2, 15, 255, 0, 16>> ). +% Code chunk from bool_min2.erl - tests tail-call cache optimization +% This module has multiple return opcodes which trigger the tail-call cache: +% - The first return creates a cached implementation +% - Subsequent returns use jump_to_offset to jump back to the cached code +-define(CODE_CHUNK_3, + <<16#00, 16#00, 16#00, 16#10, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#B2, 16#00, + 16#00, 16#00, 16#09, 16#00, 16#00, 16#00, 16#03, 16#01, 16#10, 16#99, 16#10, 16#02, 16#12, + 16#22, 16#00, 16#01, 16#20, 16#0C, 16#10, 16#00, 16#AC, 16#17, 16#10, 16#04, 16#40, 16#32, + 16#23, 16#40, 16#32, 16#33, 16#40, 16#32, 16#13, 16#40, 16#42, 16#43, 16#40, 16#32, 16#03, + 16#99, 16#20, 16#04, 16#50, 16#45, 16#04, 16#10, 16#65, 16#40, 16#03, 16#04, 16#40, 16#42, + 16#23, 16#40, 16#42, 16#33, 16#40, 16#32, 16#13, 16#40, 16#42, 16#43, 16#40, 16#42, 16#03, + 16#99, 16#30, 16#04, 16#50, 16#45, 16#04, 16#10, 16#65, 16#99, 16#20, 16#7D, 16#05, 16#10, + 16#00, 16#57, 16#04, 16#10, 16#57, 16#03, 16#10, 16#03, 16#12, 16#10, 16#13, 16#01, 16#30, + 16#99, 16#40, 16#02, 16#12, 16#72, 16#50, 16#01, 16#40, 16#99, 16#50, 16#0B, 16#05, 16#10, + 16#03, 16#13, 16#03, 16#0B, 16#05, 16#10, 16#23, 16#33, 16#13, 16#0B, 16#05, 16#20, 16#57, + 16#03, 16#20, 16#57, 16#13, 16#20, 16#03, 16#0A, 16#05, 16#30, 16#43, 16#13, 16#0B, 16#05, + 16#20, 16#57, 16#03, 16#20, 16#57, 16#13, 16#20, 16#03, 16#13, 16#01, 16#50, 16#99, 16#60, + 16#02, 16#12, 16#B2, 16#10, 16#01, 16#60, 16#3B, 16#03, 16#55, 16#17, 16#40, 16#32, 16#85, + 16#42, 16#75, 16#01, 16#70, 16#40, 16#11, 16#03, 16#13, 16#01, 16#80, 16#40, 16#01, 16#03, + 16#13, 16#03>> +). +-define(ATU8_CHUNK_3, + <<16#FF, 16#FF, 16#FF, 16#F5, 16#90, 16#62, 16#6F, 16#6F, 16#6C, 16#5F, 16#6D, 16#69, 16#6E, + 16#32, 16#50, 16#73, 16#74, 16#61, 16#72, 16#74, 16#50, 16#66, 16#61, 16#6C, 16#73, 16#65, + 16#40, 16#74, 16#72, 16#75, 16#65, 16#60, 16#65, 16#72, 16#6C, 16#61, 16#6E, 16#67, 16#10, + 16#2B, 16#10, 16#66, 16#30, 16#61, 16#6E, 16#64, 16#20, 16#6F, 16#72, 16#30, 16#6E, 16#6F, + 16#74, 16#B0, 16#6F, 16#6E, 16#65, 16#5F, 16#69, 16#66, 16#5F, 16#74, 16#72, 16#75, 16#65>> +). +-define(TYPE_CHUNK_3, + <<16#00, 16#00, 16#00, 16#03, 16#00, 16#00, 16#00, 16#03, 16#0F, 16#FF, 16#30, 16#20, 16#00, + 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, + 16#00, 16#01, 16#00, 16#01>> +). +-define(LINE_CHUNK_3, + <<16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#07, 16#00, + 16#00, 16#00, 16#06, 16#00, 16#00, 16#00, 16#00, 16#41, 16#51, 16#61, 16#81, 16#91, 16#B1>> +). + compile_minimal_x86_64_test() -> Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_0, @@ -105,24 +143,33 @@ check_labels_table0(N, <>) -> check_labels_table0 check_lines_table(<>) -> ok. -term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> - % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization +backend_to_arch(jit_x86_64) -> ?JIT_ARCH_X86_64; +backend_to_arch(jit_aarch64) -> ?JIT_ARCH_AARCH64; +backend_to_arch(jit_armv6m) -> ?JIT_ARCH_ARMV6M. + +compile_stream_for_backend(Backend, CodeChunk, AtomChunk, TypeChunk) -> Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_1, + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, + Arch = backend_to_arch(Backend), Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC) ), - Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), - AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_1), + AtomResolver = jit_precompile:atom_resolver(AtomChunk), LiteralResolver = fun(_) -> test_literal end, - TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_1), + TypeResolver = jit_precompile:type_resolver(TypeChunk), % Compile with typed register support - {_LabelsCount, Stream3} = jit:compile( - ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 + {LabelsCount, Stream3} = jit:compile( + CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 + ), + Backend:stream(Stream3). + +term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> + CompiledCode = compile_stream_for_backend( + jit_x86_64, ?CODE_CHUNK_1, ?ATU8_CHUNK_1, ?TYPE_CHUNK_1 ), - CompiledCode = jit_x86_64:stream(Stream3), % Check the reading of x[1] is immediatly followed by a shift right. % 15c: 4c 8b 5f 38 mov 0x38(%rdi),%r11 @@ -183,23 +230,9 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> ok. verify_is_function_typed_optimization_x86_64_test() -> - % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization - Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_2, - Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + CompiledCode = compile_stream_for_backend( + jit_x86_64, ?CODE_CHUNK_2, ?ATU8_CHUNK_2, ?TYPE_CHUNK_2 ), - Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), - - AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_2), - LiteralResolver = fun(_) -> test_literal end, - TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_2), - - % Compile with typed register support - {_LabelsCount, Stream3} = jit:compile( - ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 - ), - CompiledCode = jit_x86_64:stream(Stream3), % Check that call to allocate is directly followed by the building the cp % for call @@ -250,3 +283,47 @@ verify_is_function_typed_optimization_x86_64_test() -> ) ), ok. + +tail_call_cache_armv6m_test() -> + CompiledCode = compile_stream_for_backend( + jit_armv6m, ?CODE_CHUNK_3, ?ATU8_CHUNK_3, ?TYPE_CHUNK_3 + ), + + % Check that we have the following pattern: + % 8c: 278c movs r7, #140 @ 0x8c + % 8e: 6816 ldr r6, [r2, #0] + % 90: 463a mov r2, r7 + % 92: 4b01 ldr r3, [pc, #4] @ (0x98) + % 94: e002 b.n 0x9c + % 96: 0000 movs r0, r0 + % 98: 01cb lsls r3, r1, #7 + % 9a: 0000 movs r0, r0 + % 9c: 9f05 ldr r7, [sp, #20] + % 9e: 9605 str r6, [sp, #20] + % a0: 46be mov lr, r7 + + % Check for the first return implementation (call_primitive_last for PRIM_RETURN) + % Pattern: movs r7, #140 / ldr r6, [r2, #0] / mov r2, r7 + % 278c 6816 463a + ?assertMatch( + {_, _}, + binary:match(CompiledCode, <<16#278c:16/little, 16#6816:16/little, 16#463a:16/little>>) + ), + + % 3f0: 4f00 ldr r7, [pc, #0] @ (0x3f4) + % 3f2: e001 b.n 0x3f8 + % 3f4: 03f0 lsls r0, r6, #15 + % 3f6: 0000 movs r0, r0 + % 3f8: e648 b.n 0x8c + + % Check for tail-call cache jump: ldr r7, [pc, #0] followed by b.n (backward branch) + % Pattern: 4f00 e6f5 (ldr r7, [pc, #0] / b.n 0x8c) + ?assertMatch( + {_, _}, + binary:match( + CompiledCode, + <<16#4f00:16/little, 16#e001:16/little, 16#03f0:16/little, 0:16/little, + 16#e648:16/little>> + ) + ), + ok. From 933af32ecdbcda3aade34e331eddb16bc6f5a750 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 29 Sep 2025 23:24:21 +0200 Subject: [PATCH 81/97] armv6m: use literal pool to reduce binary size Signed-off-by: Paul Guyot --- libs/estdlib/src/code_server.erl | 5 +- libs/jit/src/jit_armv6m.erl | 89 +++-- tests/libs/jit/jit_armv6m_tests.erl | 577 +++++++++++++++------------- tests/libs/jit/jit_tests.erl | 41 +- 4 files changed, 379 insertions(+), 333 deletions(-) diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index 427d5fa529..69aa359327 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -174,11 +174,12 @@ load(Module) -> BackendModule, BackendState0 ), - Stream1 = BackendModule:stream(BackendState1), + BackendState2 = BackendModule:flush(BackendState1), + Stream1 = BackendModule:stream(BackendState2), code_server:set_native_code(Module, LabelsCount, Stream1), End = erlang:system_time(millisecond), io:format("~B ms (bytecode: ~B bytes, native code: ~B bytes)\n", [ - End - Start, byte_size(Code), BackendModule:offset(BackendState1) + End - Start, byte_size(Code), BackendModule:offset(BackendState2) ]) catch T:V:S -> diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index f792bc1f58..81271bf540 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -139,7 +139,8 @@ available_regs :: [armv6m_register()], used_regs :: [armv6m_register()], labels :: [{integer() | reference(), integer()}], - variant :: non_neg_integer() + variant :: non_neg_integer(), + literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}] }). -type state() :: #state{}. @@ -252,7 +253,8 @@ new(Variant, StreamModule, Stream) -> available_regs = ?AVAILABLE_REGS, used_regs = [], labels = [], - variant = Variant + variant = Variant, + literal_pool = [] }. %%----------------------------------------------------------------------------- @@ -637,7 +639,8 @@ call_primitive_last( State2 = set_registers_args(State1, ArgsForTailCall, 0), tail_call_with_jit_state_registers_only(State2, Temp) end, - State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State5). %%----------------------------------------------------------------------------- %% @doc Tail call to address in register, restoring prolog registers including @@ -730,13 +733,15 @@ jump_to_label( Offset = StreamModule:offset(Stream0), {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), - State1#state{stream = Stream1}. + State2 = State1#state{stream = Stream1}, + flush_literal_pool(State2). jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> Offset = StreamModule:offset(Stream0), CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), Stream1 = StreamModule:append(Stream0, CodeBlock), - State#state{stream = Stream1}. + State2 = State#state{stream = Stream1}, + flush_literal_pool(State2). %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register @@ -798,7 +803,8 @@ jump_to_continuation( Code = <>, Stream2 = StreamModule:append(State1#state.stream, Code), % Free all registers as this is a terminal instruction - State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State2). branch_to_offset_code(_State, Offset, TargetOffset) when TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 @@ -1741,7 +1747,7 @@ set_registers_args( UsedRegs, Args ), - State0#state{ + State1#state{ stream = Stream1, available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) @@ -2625,41 +2631,42 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re I2 = jit_armv6m_asm:negs(Reg, Reg), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; -mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - %% Use a literal pool with a branch instruction (branch-over pattern) - %% Calculate where literal will be placed (must be word-aligned) - %% After LDR (2 bytes) + Branch (2 bytes) = 4 bytes from current position - CurrentOffset = StreamModule:offset(Stream0), - OffsetAfterInstructions = CurrentOffset + 4, - %% Find next word-aligned position for literal - LiteralPosition = - case OffsetAfterInstructions rem 4 of - % Already aligned - 0 -> OffsetAfterInstructions; - % Add 2 bytes padding to align - _ -> OffsetAfterInstructions + 2 +mov_immediate( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State, Reg, Val +) -> + LdrInstructionAddr = StreamModule:offset(Stream0), + I1 = jit_armv6m_asm:ldr(Reg, {pc, 0}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1, literal_pool = [{LdrInstructionAddr, Reg, Val} | LP]}. + +flush_literal_pool(#state{literal_pool = []} = State) -> + State; +flush_literal_pool( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State +) -> + % Align + Offset = StreamModule:offset(Stream0), + Stream1 = + if + Offset rem 4 =:= 0 -> Stream0; + true -> StreamModule:append(Stream0, <<0:16>>) end, - PaddingNeeded = LiteralPosition - OffsetAfterInstructions, - - %% Calculate LDR PC-relative offset - %% PC = (current_instruction_address & ~3) + 4 - LdrInstructionAddr = CurrentOffset, - LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, - LiteralOffset = LiteralPosition - LdrPC, - - %% Generate: ldr rTemp, [pc, #LiteralOffset] ; Load from literal - I1 = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), - %% Calculate branch offset - %% Branch is at CurrentOffset + 2, need to jump past literal - BranchPosition = CurrentOffset + 2, - % After the 4-byte literal - TargetPosition = LiteralPosition + 4, - BranchOffset = TargetPosition - BranchPosition, - I2 = jit_armv6m_asm:b(BranchOffset), - %% Generate padding if needed (just zeros) - Padding = <<0:(PaddingNeeded * 8)>>, - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}. + % Lay all values and update ldr instructions + Stream2 = lists:foldl( + fun({LdrInstructionAddr, Reg, Val}, AccStream) -> + LiteralPosition = StreamModule:offset(AccStream), + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + LdrInstruction = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), + AccStream1 = StreamModule:append(AccStream, <>), + StreamModule:replace( + AccStream1, LdrInstructionAddr, LdrInstruction + ) + end, + Stream1, + lists:reverse(LP) + ), + State#state{stream = Stream2, literal_pool = []}. sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when (Val >= 0 andalso Val =< 255) orelse is_atom(Val) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index ceaf926d7d..c7cf14ae75 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -312,13 +312,13 @@ call_primitive_last_5_args_test() -> " 6: 9700 str r7, [sp, #0]\n" " 8: 9902 ldr r1, [sp, #8]\n" " a: 2204 movs r2, #4\n" - " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" - " e: e001 b.n 0x14\n" - " 10: 02cb lsrs r3, r1, #16\n" - " 12: 0000 movs r0, r0\n" - " 14: 47b0 blx r6\n" - " 16: b002 add sp, #8\n" - " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: 47b0 blx r6\n" + " 10: b002 add sp, #8\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + % Literal pool + " 14: 02cb lsls r3, r1, #11\n" + " 16: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -535,17 +535,19 @@ if_block_test_() -> ?BACKEND:add(BSt0, RegB, 2) end ), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" - " 6: da04 bge.n 0x12\n" - " 8: 0400 lsls r0, r0, #16\n" - " a: 0000 movs r0, r0\n" - " c: 42af cmp r7, r5\n" - " e: dafe bge.n 0xe\n" - " 10: 3602 adds r6, #2" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: da01 bge.n 0xc\n" + " 8: dafe bge.n 0x8\n" + " a: 3602 adds r6, #2\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 0400 lsls r0, r0, #16\n" + " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -713,17 +715,19 @@ if_block_test_() -> ?BACKEND:add(BSt0, RegB, 1) end ), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" - " 6: e001 b.n 0xc\n" - " 8: 07cb lsls r3, r1, #31\n" - " a: 0000 movs r0, r0\n" - " c: 42af cmp r7, r5\n" - " e: d000 beq.n 0x12\n" - " 10: 3601 adds r6, #1" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: 42af cmp r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3601 adds r6, #1\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 07cb lsls r3, r1, #31\n" + " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -1395,35 +1399,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 128: 3f01 subs r7, #1\n" " 12a: 60b7 str r7, [r6, #8]\n" " 12c: d004 beq.n 0x138\n" - " 12e: e011 b.n 0x154\n" + " 12e: e00f b.n 0x150\n" " 130: 46c0 nop ; (mov r8, r8)\n" " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" - " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" - " 13c: e002 b.n 0x144\n" - " 13e: 0000 movs r0, r0\n" - " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" - " 144: 19f6 adds r6, r6, r7\n" - " 146: 9f00 ldr r7, [sp, #0]\n" - " 148: 607e str r6, [r7, #4]\n" - " 14a: 6897 ldr r7, [r2, #8]\n" - " 14c: 9e05 ldr r6, [sp, #20]\n" - " 14e: 9705 str r7, [sp, #20]\n" - " 150: 46b6 mov lr, r6\n" - " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 154: 6817 ldr r7, [r2, #0]\n" - " 156: 9e05 ldr r6, [sp, #20]\n" - " 158: 9705 str r7, [sp, #20]\n" - " 15a: 46b6 mov lr, r6\n" - " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 15e: 46c0 nop ; (mov r8, r8)\n" - " 160: 6857 ldr r7, [r2, #4]\n" - " 162: 9e05 ldr r6, [sp, #20]\n" - " 164: 9705 str r7, [sp, #20]\n" - " 166: 46b6 mov lr, r6\n" - " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#124), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1459,35 +1461,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> " 128: 3f01 subs r7, #1\n" " 12a: 60b7 str r7, [r6, #8]\n" " 12c: d004 beq.n 0x138\n" - " 12e: e011 b.n 0x154\n" + " 12e: e00f b.n 0x150\n" " 130: 46c0 nop ; (mov r8, r8)\n" " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" - " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" - " 13c: e002 b.n 0x144\n" - " 13e: 0000 movs r0, r0\n" - " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" - " 144: 19f6 adds r6, r6, r7\n" - " 146: 9f00 ldr r7, [sp, #0]\n" - " 148: 607e str r6, [r7, #4]\n" - " 14a: 6897 ldr r7, [r2, #8]\n" - " 14c: 9e05 ldr r6, [sp, #20]\n" - " 14e: 9705 str r7, [sp, #20]\n" - " 150: 46b6 mov lr, r6\n" - " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 154: 6817 ldr r7, [r2, #0]\n" - " 156: 9e05 ldr r6, [sp, #20]\n" - " 158: 9705 str r7, [sp, #20]\n" - " 15a: 46b6 mov lr, r6\n" - " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 15e: 46c0 nop ; (mov r8, r8)\n" - " 160: 6857 ldr r7, [r2, #4]\n" - " 162: 9e05 ldr r6, [sp, #20]\n" - " 164: 9705 str r7, [sp, #20]\n" - " 166: 46b6 mov lr, r6\n" - " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#122), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1517,32 +1517,31 @@ call_bif_with_large_literal_integer_test() -> " c: bc05 pop {r0, r2}\n" " e: 6bd6 ldr r6, [r2, #60] ; 0x3c\n" " 10: b4c5 push {r0, r2, r6, r7}\n" - " 12: 4901 ldr r1, [pc, #4] ; (0x18)\n" - " 14: e002 b.n 0x1c\n" - " 16: 0000 movs r0, r0\n" - " 18: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" - " 1c: 47b0 blx r6\n" - " 1e: 4605 mov r5, r0\n" - " 20: bcc5 pop {r0, r2, r6, r7}\n" - " 22: b405 push {r0, r2}\n" - " 24: b082 sub sp, #8\n" - " 26: 9500 str r5, [sp, #0]\n" - " 28: 2100 movs r1, #0\n" - " 2a: 2201 movs r2, #1\n" - " 2c: 6983 ldr r3, [r0, #24]\n" - " 2e: 47b8 blx r7\n" - " 30: 4607 mov r7, r0\n" - " 32: b002 add sp, #8\n" - " 34: bc05 pop {r0, r2}\n" - " 36: 2f00 cmp r7, #0\n" - " 38: d105 bne.n 0x46\n" - " 3a: 6997 ldr r7, [r2, #24]\n" - " 3c: 223c movs r2, #60 ; 0x3c\n" - " 3e: 9e05 ldr r6, [sp, #20]\n" - " 40: 9705 str r7, [sp, #20]\n" - " 42: 46b6 mov lr, r6\n" - " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 46: 6187 str r7, [r0, #24]" + " 12: 490b ldr r1, [pc, #44] ; (0x40)\n" + " 14: 47b0 blx r6\n" + " 16: 4605 mov r5, r0\n" + " 18: bcc5 pop {r0, r2, r6, r7}\n" + " 1a: b405 push {r0, r2}\n" + " 1c: b082 sub sp, #8\n" + " 1e: 9500 str r5, [sp, #0]\n" + " 20: 2100 movs r1, #0\n" + " 22: 2201 movs r2, #1\n" + " 24: 6983 ldr r3, [r0, #24]\n" + " 26: 47b8 blx r7\n" + " 28: 4607 mov r7, r0\n" + " 2a: b002 add sp, #8\n" + " 2c: bc05 pop {r0, r2}\n" + " 2e: 2f00 cmp r7, #0\n" + " 30: d108 bne.n 0x44\n" + " 32: 6997 ldr r7, [r2, #24]\n" + " 34: 2234 movs r2, #52 ; 0x34\n" + " 36: 9e05 ldr r6, [sp, #20]\n" + " 38: 9705 str r7, [sp, #20]\n" + " 3a: 46b6 mov lr, r6\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3e: 0000 movs r0, r0\n" + " 40: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" + " 44: 6187 str r7, [r0, #24]" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1879,48 +1878,46 @@ wait_timeout_test() -> Stream = ?BACKEND:stream(State10), Dump = << - " 0: a707 add r7, pc, #28 ; (adr r7, 0x22)\n" + " 0: a706 add r7, pc, #24 ; (adr r7, 0x1c)\n" " 2: 3701 adds r7, #1\n" " 4: 9e00 ldr r6, [sp, #0]\n" " 6: 6077 str r7, [r6, #4]\n" - " 8: 4f00 ldr r7, [pc, #0] ; (0xc)\n" - " a: e001 b.n 0x10\n" - " c: 1388 asrs r0, r1, #14\n" - " e: 0000 movs r0, r0\n" - " 10: 6f96 ldr r6, [r2, #120] ; 0x78\n" - " 14: 463a mov r2, r7\n" - " 16: 232a movs r3, #42 ; 0x2a\n" - " 18: 9f05 ldr r7, [sp, #20]\n" - " 1a: 9605 str r6, [sp, #20]\n" - " 1c: 46be mov lr, r7\n" - " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 20: 46c0 nop ; (mov r8, r8)\n" - " 22: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 24: 6d57 ldr r7, [r2, #84] ; 0x54\n" - " 26: b405 push {r0, r2}\n" - " 28: 9902 ldr r1, [sp, #8]\n" - " 2a: 47b8 blx r7\n" - " 2c: 4607 mov r7, r0\n" - " 2e: bc05 pop {r0, r2}\n" - " 30: 4287 cmp r7, r0\n" - " 32: d001 beq.n 0x38\n" - " 34: 4638 mov r0, r7\n" - " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 38: 2784 movs r7, #132 ; 0x84\n" - " 3a: 59d7 ldr r7, [r2, r7]\n" - " 3c: b405 push {r0, r2}\n" - " 3e: 2102 movs r1, #2\n" - " 40: 47b8 blx r7\n" - " 42: 4607 mov r7, r0\n" - " 44: bc05 pop {r0, r2}\n" - " 46: 2f00 cmp r7, #0\n" - " 48: d105 bne.n 0x56\n" - " 4a: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" - " 4c: 222a movs r2, #42 ; 0x2a\n" - " 4e: 9e05 ldr r6, [sp, #20]\n" - " 50: 9705 str r7, [sp, #20]\n" - " 52: 46b6 mov lr, r6\n" - " 54: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 8: 4f03 ldr r7, [pc, #12] ; (0x18)\n" + " a: 6f96 ldr r6, [r2, #120] ; 0x78\n" + " c: 463a mov r2, r7\n" + " e: 232a movs r3, #42 ; 0x2a\n" + " 10: 9f05 ldr r7, [sp, #20]\n" + " 12: 9605 str r6, [sp, #20]\n" + " 14: 46be mov lr, r7\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: 1388 asrs r0, r1, #14\n" + " 1a: 0000 movs r0, r0\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 20: b405 push {r0, r2}\n" + " 22: 9902 ldr r1, [sp, #8]\n" + " 24: 47b8 blx r7\n" + " 26: 4607 mov r7, r0\n" + " 28: bc05 pop {r0, r2}\n" + " 2a: 4287 cmp r7, r0\n" + " 2c: d001 beq.n 0x32\n" + " 2e: 4638 mov r0, r7\n" + " 30: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 32: 2784 movs r7, #132 ; 0x84\n" + " 34: 59d7 ldr r7, [r2, r7]\n" + " 36: b405 push {r0, r2}\n" + " 38: 2102 movs r1, #2\n" + " 3a: 47b8 blx r7\n" + " 3c: 4607 mov r7, r0\n" + " 3e: bc05 pop {r0, r2}\n" + " 40: 2f00 cmp r7, #0\n" + " 42: d105 bne.n 0x50\n" + " 44: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" + " 46: 222a movs r2, #42 ; 0x2a\n" + " 48: 9e05 ldr r6, [sp, #20]\n" + " 4a: 9705 str r7, [sp, #20]\n" + " 4c: 46b6 mov lr, r6\n" + " 4e: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2224,55 +2221,55 @@ call_fun_test() -> " 24: 2403 movs r4, #3\n" " 26: 4025 ands r5, r4\n" " 28: 2d02 cmp r5, #2\n" - " 2a: d00c beq.n 0x46\n" + " 2a: d00b beq.n 0x44\n" " 2c: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" " 2e: b082 sub sp, #8\n" " 30: 9600 str r6, [sp, #0]\n" " 32: 9902 ldr r1, [sp, #8]\n" " 34: 222e movs r2, #46 ; 0x2e\n" - " 36: 4b01 ldr r3, [pc, #4] ; (0x3c)\n" - " 38: e002 b.n 0x40\n" - " 3a: 0000 movs r0, r0\n" - " 3c: 018b lsls r3, r1, #6\n" + " 36: 4b02 ldr r3, [pc, #8] ; (0x40)\n" + " 38: 47b8 blx r7\n" + " 3a: b002 add sp, #8\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 3e: 0000 movs r0, r0\n" - " 40: 47b8 blx r7\n" - " 42: b002 add sp, #8\n" - " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 46: 2503 movs r5, #3\n" - " 48: 43ae bics r6, r5\n" - " 4a: 6836 ldr r6, [r6, #0]\n" - " 4c: 4635 mov r5, r6\n" - " 4e: 243f movs r4, #63 ; 0x3f\n" - " 50: 4025 ands r5, r4\n" - " 52: 2d14 cmp r5, #20\n" - " 54: d00b beq.n 0x6e\n" - " 56: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" - " 58: b082 sub sp, #8\n" - " 5a: 9600 str r6, [sp, #0]\n" - " 5c: 9902 ldr r1, [sp, #8]\n" - " 5e: 2258 movs r2, #88 ; 0x58\n" - " 60: 4b00 ldr r3, [pc, #0] ; (0x64)\n" - " 62: e001 b.n 0x68\n" - " 64: 018b lsls r3, r1, #6\n" + " 40: 018b lsls r3, r1, #6\n" + " 42: 0000 movs r0, r0\n" + " 44: 2503 movs r5, #3\n" + " 46: 43ae bics r6, r5\n" + " 48: 6836 ldr r6, [r6, #0]\n" + " 4a: 4635 mov r5, r6\n" + " 4c: 243f movs r4, #63 ; 0x3f\n" + " 4e: 4025 ands r5, r4\n" + " 50: 2d14 cmp r5, #20\n" + " 52: d00b beq.n 0x6c\n" + " 54: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 56: b082 sub sp, #8\n" + " 58: 9600 str r6, [sp, #0]\n" + " 5a: 9902 ldr r1, [sp, #8]\n" + " 5c: 2256 movs r2, #86 ; 0x56\n" + " 5e: 4b02 ldr r3, [pc, #8] ; (0x68)\n" + " 60: 47b8 blx r7\n" + " 62: b002 add sp, #8\n" + " 64: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 66: 0000 movs r0, r0\n" - " 68: 47b8 blx r7\n" - " 6a: b002 add sp, #8\n" - " 6c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 6e: 9d00 ldr r5, [sp, #0]\n" - " 70: 682e ldr r6, [r5, #0]\n" - " 72: 6836 ldr r6, [r6, #0]\n" - " 74: 0636 lsls r6, r6, #24\n" - " 76: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" - " 78: 432e orrs r6, r5\n" - " 7a: 65c6 str r6, [r0, #92] ; 0x5c\n" - " 7c: 2680 movs r6, #128 ; 0x80\n" - " 7e: 5996 ldr r6, [r2, r6]\n" - " 80: 463a mov r2, r7\n" - " 82: 2300 movs r3, #0\n" - " 84: 9f05 ldr r7, [sp, #20]\n" - " 86: 9605 str r6, [sp, #20]\n" - " 88: 46be mov lr, r7\n" - " 8a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 68: 018b lsls r3, r1, #6\n" + " 6a: 0000 movs r0, r0\n" + " 6c: 9d00 ldr r5, [sp, #0]\n" + " 6e: 682e ldr r6, [r5, #0]\n" + " 70: 6836 ldr r6, [r6, #0]\n" + " 72: 0636 lsls r6, r6, #24\n" + " 74: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" + " 76: 432e orrs r6, r5\n" + " 78: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 7a: 2680 movs r6, #128 ; 0x80\n" + " 7c: 5996 ldr r6, [r2, r6]\n" + " 7e: 463a mov r2, r7\n" + " 80: 2300 movs r3, #0\n" + " 82: 9f05 ldr r7, [sp, #20]\n" + " 84: 9605 str r6, [sp, #20]\n" + " 86: 46be mov lr, r7\n" + " 88: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 8a: 0000 movs r0, r0\n" " 8c: 0240 lsls r0, r0, #9\n" " 8e: 0000 movs r0, r0\n" " 90: b5f2 push {r1, r4, r5, r6, r7, lr}" @@ -2281,7 +2278,8 @@ call_fun_test() -> move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). move_to_vm_register_test_() -> @@ -2294,88 +2292,101 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << " 0: 2700 movs r7, #0\n" - " 2: 6187 str r7, [r0, #24]" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << " 0: 2700 movs r7, #0\n" - " 2: 6587 str r7, [r0, #88] ; 0x58" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {ptr, r6}, << " 0: 2700 movs r7, #0\n" - " 2: 6037 str r7, [r6, #0]" + " 2: 6037 str r7, [r6, #0]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << " 0: 2600 movs r6, #0\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 60be str r6, [r7, #8]" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << " 0: 2600 movs r6, #0\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 653e str r6, [r7, #80] ; 0x50" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6187 str r7, [r0, #24]" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6587 str r7, [r0, #88] ; 0x58" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << " 0: 262a movs r6, #42 ; 0x2a\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 60be str r6, [r7, #8]" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << " 0: 262a movs r6, #42 ; 0x2a\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 653e str r6, [r7, #80] ; 0x50" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 99, {ptr, r3}, << " 0: 2763 movs r7, #99 ; 0x63\n" - " 2: 601f str r7, [r3, #0]" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 6207 str r7, [r0, #32]" + " 2: 6207 str r7, [r0, #32]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, << " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 600f str r7, [r1, #0]" + " 2: 600f str r7, [r1, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: ptr to x_reg ?_test(begin move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, << " 0: 6827 ldr r7, [r4, #0]\n" - " 2: 6247 str r7, [r0, #36] ; 0x24" + " 2: 6247 str r7, [r0, #36] ; 0x24\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to y_reg @@ -2383,7 +2394,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 6946 ldr r6, [r0, #20]\n" - " 4: 6077 str r7, [r6, #4]" + " 4: 6077 str r7, [r6, #4]\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to x_reg @@ -2391,7 +2403,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6837 ldr r7, [r6, #0]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to y_reg @@ -2399,41 +2412,47 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6877 ldr r7, [r6, #4]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, r5, {x_reg, 0}, << - " 0: 6185 str r5, [r0, #24]" + " 0: 6185 str r5, [r0, #24]\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, r6, {x_reg, extra}, << - " 0: 6586 str r6, [r0, #88] ; 0x58" + " 0: 6586 str r6, [r0, #88] ; 0x58\n" + " 2: e07d b.n 0x100" >>) end), %% Test: Native register to ptr ?_test(begin move_to_vm_register_test0(State0, r4, {ptr, r3}, << - " 0: 601c str r4, [r3, #0]" + " 0: 601c str r4, [r3, #0]\n" + " 2: e07d b.n 0x100" >>) end), %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, r1, {y_reg, 0}, << " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 6039 str r1, [r7, #0]" + " 2: 6039 str r1, [r7, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: Large immediate to x_reg (32-bit literal pool, aligned case) ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6187 str r7, [r0, #24]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case) @@ -2442,55 +2461,57 @@ move_to_vm_register_test_() -> State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), %% Then do large immediate which should handle unaligned case State2 = ?BACKEND:move_to_vm_register(State1, 16#12345678, {x_reg, 0}), - Stream = ?BACKEND:stream(State2), + State3 = ?BACKEND:jump_to_offset(State2, 16#100), + Stream = ?BACKEND:stream(State3), Expected = dump_to_bin(<< " 0: 6019 str r1, [r3, #0]\n" " 2: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 4: e002 b.n 0xc\n" - " 6: 0000 movs r0, r0\n" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100\n" " 8: 5678 ldrsb r0, [r7, r1]\n" - " a: 1234 asrs r4, r6, #8\n" - " c: 6187 str r7, [r0, #24]" + " a: 1234 asrs r4, r6, #8" >>), ?assertEqual(Expected, Stream) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6587 str r7, [r0, #88] ; 0x58" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6946 ldr r6, [r0, #20]\n" - " a: 60b7 str r7, [r6, #8]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 60b7 str r7, [r6, #8]\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6946 ldr r6, [r0, #20]\n" - " a: 6537 str r7, [r6, #80] ; 0x50" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 6537 str r7, [r6, #80] ; 0x50\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: Large immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 601f str r7, [r3, #0]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: x_reg to y_reg (high index) @@ -2498,7 +2519,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << " 0: 6d47 ldr r7, [r0, #84] ; 0x54\n" " 2: 6946 ldr r6, [r0, #20]\n" - " 4: 67f7 str r7, [r6, #124] ; 0x7c" + " 4: 67f7 str r7, [r6, #124] ; 0x7c\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to x_reg (high index) @@ -2506,7 +2528,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c\n" - " 4: 6547 str r7, [r0, #84] ; 0x54" + " 4: 6547 str r7, [r0, #84] ; 0x54\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Large y_reg index (32) that exceeds str immediate offset limit @@ -2516,7 +2539,8 @@ move_to_vm_register_test_() -> " 2: 6947 ldr r7, [r0, #20]\n" " 4: 2580 movs r5, #128 ; 0x80\n" " 6: 443d add r5, r7\n" - " 8: 602e str r6, [r5, #0]" + " 8: 602e str r6, [r5, #0]\n" + " a: e079 b.n 0x100" >>) end), %% Test: Negative immediate to x_reg @@ -2524,7 +2548,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, -1, {x_reg, 0}, << " 0: 2701 movs r7, #1\n" " 2: 427f negs r7, r7\n" - " 4: 6187 str r7, [r0, #24]" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100" >>) end) ] @@ -2787,11 +2812,12 @@ move_to_native_register_test_() -> %% move_to_native_register/2: -256 (boundary case, should use literal pool) ?_test(begin {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(r7, Reg), Dump = << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" + " 2: e07d b.n 0x100\n" " 4: ff00 ffff vmaxnm.f32 , q8, " >>, ?assertEqual(dump_to_bin(Dump), Stream) @@ -2895,7 +2921,9 @@ move_to_native_register_test_() -> add_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:add(State0, Reg, Imm), - Stream = ?BACKEND:stream(State1), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). add_test_() -> @@ -2907,21 +2935,24 @@ add_test_() -> [ ?_test(begin add_test0(State0, r2, 2, << - " 0: 3202 adds r2, #2" + " 0: 3202 adds r2, #2\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin add_test0(State0, r2, 256, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 0100 lsls r0, r0, #4\n" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 19d2 adds r2, r2, r7\n" + " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" - " 8: 19d2 adds r2, r2, r7" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" >>) end), ?_test(begin add_test0(State0, r2, r3, << - " 0: 18d2 adds r2, r2, r3" + " 0: 18d2 adds r2, r2, r3\n" + " 2: e07d b.n 0x100" >>) end) ] @@ -2929,7 +2960,9 @@ add_test_() -> sub_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:sub(State0, Reg, Imm), - Stream = ?BACKEND:stream(State1), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). sub_test_() -> @@ -2941,21 +2974,24 @@ sub_test_() -> [ ?_test(begin sub_test0(State0, r2, 2, << - " 0: 3a02 subs r2, #2" + " 0: 3a02 subs r2, #2\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin sub_test0(State0, r2, 256, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 0100 lsls r0, r0, #4\n" + " 0: 4f01 ldr r7, [pc, #4] @ (0xc)\n" + " 2: 1bd2 subs r2, r2, r7\n" + " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" - " 8: 1bd2 subs r2, r2, r7" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" >>) end), ?_test(begin sub_test0(State0, r2, r3, << - " 0: 1ad2 subs r2, r2, r3" + " 0: 1ad2 subs r2, r2, r3\n" + " 2: e07d b.n 0x110" >>) end) ] @@ -3256,23 +3292,36 @@ alloc_boxed_integer_fragment_large_test() -> {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ ctx, {avm_int64_t, 16#123456789ABCDEF0} ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), ?assertEqual(r7, ResultReg), - Stream = ?BACKEND:stream(State1), + Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6bd7 ldr r7, [r2, #60] ; 0x3c\n" + " 0: 6bd7 ldr r7, [r2, #60] @ 0x3c\n" " 2: b405 push {r0, r2}\n" - " 4: 4a00 ldr r2, [pc, #0] ; (0x8)\n" - " 6: e001 b.n 0xc\n" - " 8: def0 udf #240 ; 0xf0\n" - " a: 9abc ldr r2, [sp, #752] ; 0x2f0\n" - " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" - " e: e001 b.n 0x14\n" - " 10: 5678 ldrsb r0, [r7, r1]\n" - " 12: 1234 asrs r4, r6, #8\n" - " 14: 47b8 blx r7\n" - " 16: 4607 mov r7, r0\n" - " 18: bc05 pop {r0, r2}" + " 4: 4a06 ldr r2, [pc, #24] @ (0x20)\n" + " 6: 4b07 ldr r3, [pc, #28] @ (0x24)\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: 6cd6 ldr r6, [r2, #76] @ 0x4c\n" + " 10: b082 sub sp, #8\n" + " 12: 9700 str r7, [sp, #0]\n" + " 14: 9902 ldr r1, [sp, #8]\n" + " 16: 2210 movs r2, #16\n" + " 18: 4b03 ldr r3, [pc, #12] @ (0x28)\n" + " 1a: 47b0 blx r6\n" + " 1c: b002 add sp, #8\n" + " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 20: def0 udf #240 @ 0xf0\n" + " 22: 9abc ldr r2, [sp, #752] @ 0x2f0\n" + " 24: 5678 ldrsb r0, [r7, r1]\n" + " 26: 1234 asrs r4, r6, #8\n" + " 28: 028b lsls r3, r1, #10\n" + " 2a: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index 3683287938..c7d88480d1 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -290,40 +290,29 @@ tail_call_cache_armv6m_test() -> ), % Check that we have the following pattern: - % 8c: 278c movs r7, #140 @ 0x8c - % 8e: 6816 ldr r6, [r2, #0] - % 90: 463a mov r2, r7 - % 92: 4b01 ldr r3, [pc, #4] @ (0x98) - % 94: e002 b.n 0x9c - % 96: 0000 movs r0, r0 - % 98: 01cb lsls r3, r1, #7 - % 9a: 0000 movs r0, r0 - % 9c: 9f05 ldr r7, [sp, #20] - % 9e: 9605 str r6, [sp, #20] - % a0: 46be mov lr, r7 + % 8c: 278c movs r7, #140 @ 0x8c + % 8e: 6816 ldr r6, [r2, #0] + % 90: 463a mov r2, r7 + % 92: 4b02 ldr r3, [pc, #8] @ (0x9c) + % 94: 9f05 ldr r7, [sp, #20] + % 96: 9605 str r6, [sp, #20] + % 98: 46be mov lr, r7 % Check for the first return implementation (call_primitive_last for PRIM_RETURN) - % Pattern: movs r7, #140 / ldr r6, [r2, #0] / mov r2, r7 - % 278c 6816 463a ?assertMatch( {_, _}, - binary:match(CompiledCode, <<16#278c:16/little, 16#6816:16/little, 16#463a:16/little>>) + binary:match( + CompiledCode, + <<16#278c:16/little, 16#6816:16/little, 16#463a:16/little, 16#4b02:16/little, + 16#9f05:16/little, 16#9605:16/little, 16#46be:16/little>> + ) ), - % 3f0: 4f00 ldr r7, [pc, #0] @ (0x3f4) - % 3f2: e001 b.n 0x3f8 - % 3f4: 03f0 lsls r0, r6, #15 - % 3f6: 0000 movs r0, r0 - % 3f8: e648 b.n 0x8c - % Check for tail-call cache jump: ldr r7, [pc, #0] followed by b.n (backward branch) - % Pattern: 4f00 e6f5 (ldr r7, [pc, #0] / b.n 0x8c) + % 29c: 4f00 ldr r7, [pc, #0] @ (0x2a0) + % 29e: e6f5 b.n 0x8c ?assertMatch( {_, _}, - binary:match( - CompiledCode, - <<16#4f00:16/little, 16#e001:16/little, 16#03f0:16/little, 0:16/little, - 16#e648:16/little>> - ) + binary:match(CompiledCode, <<16#4f00:16/little, 16#e6f5:16/little>>) ), ok. From f1ea3290f48ff8e2b15dced8392f874fd26399a0 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 4 Oct 2025 22:42:54 +0200 Subject: [PATCH 82/97] riscv32: initial commit of asm module Signed-off-by: Paul Guyot --- libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_riscv32_asm.erl | 1000 ++++++++++++++++++++++ tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_riscv32_asm_tests.erl | 553 ++++++++++++ tests/libs/jit/jit_tests_common.erl | 7 +- tests/libs/jit/tests.erl | 1 + 6 files changed, 1562 insertions(+), 1 deletion(-) create mode 100644 libs/jit/src/jit_riscv32_asm.erl create mode 100644 tests/libs/jit/jit_riscv32_asm_tests.erl diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 7aad016575..586223b4bc 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -31,6 +31,7 @@ set(ERLANG_MODULES jit_aarch64_asm jit_armv6m jit_armv6m_asm + jit_riscv32_asm jit_x86_64 jit_x86_64_asm ) diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl new file mode 100644 index 0000000000..64d42c10ad --- /dev/null +++ b/libs/jit/src/jit_riscv32_asm.erl @@ -0,0 +1,1000 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm). + +-export([ + % R-type arithmetic and logical instructions + add/3, + sub/3, + and_/3, + or_/2, + or_/3, + xor_/3, + sll/3, + srl/3, + sra/3, + slt/3, + sltu/3, + % I-type immediate instructions + addi/3, + andi/3, + ori/3, + xori/3, + slli/3, + srli/3, + srai/3, + slti/3, + sltiu/3, + % Load instructions + lw/2, + lw/3, + lh/2, + lh/3, + lhu/2, + lhu/3, + lb/2, + lb/3, + lbu/2, + lbu/3, + % Store instructions + sw/2, + sw/3, + sh/2, + sh/3, + sb/2, + sb/3, + % Branch instructions + beq/3, + bne/3, + blt/3, + bge/3, + bltu/3, + bgeu/3, + % Jump instructions + jal/2, + jalr/3, + jalr/2, + % Upper immediate instructions + lui/2, + auipc/2, + % Pseudo-instructions + nop/0, + li/2, + mv/2, + not_/2, + neg/2, + j/1, + jr/1, + ret/0, + call/2, + % M extension (multiply/divide) + mul/3, + % System instructions + bkpt/1, + ebreak/0 +]). + +-export_type([ + riscv_register/0 +]). + +%% RISC-V 32-bit (RV32I) Assembler +%% +%% This module provides an assembler for the RISC-V 32-bit instruction set. +%% It generates binary machine code for RISC-V instructions following the +%% RV32I base integer instruction set architecture. +%% +%% RISC-V Register Set (32 registers): +%% x0 (zero) - Hardwired zero (reads as 0, writes ignored) +%% x1 (ra) - Return address +%% x2 (sp) - Stack pointer +%% x3 (gp) - Global pointer +%% x4 (tp) - Thread pointer +%% x5 (t0) - Temporary register 0 +%% x6 (t1) - Temporary register 1 +%% x7 (t2) - Temporary register 2 +%% x8 (s0/fp)- Saved register 0 / Frame pointer +%% x9 (s1) - Saved register 1 +%% x10 (a0) - Function argument 0 / Return value 0 +%% x11 (a1) - Function argument 1 / Return value 1 +%% x12 (a2) - Function argument 2 +%% x13 (a3) - Function argument 3 +%% x14 (a4) - Function argument 4 +%% x15 (a5) - Function argument 5 +%% x16 (a6) - Function argument 6 +%% x17 (a7) - Function argument 7 +%% x18 (s2) - Saved register 2 +%% x19 (s3) - Saved register 3 +%% x20 (s4) - Saved register 4 +%% x21 (s5) - Saved register 5 +%% x22 (s6) - Saved register 6 +%% x23 (s7) - Saved register 7 +%% x24 (s8) - Saved register 8 +%% x25 (s9) - Saved register 9 +%% x26 (s10) - Saved register 10 +%% x27 (s11) - Saved register 11 +%% x28 (t3) - Temporary register 3 +%% x29 (t4) - Temporary register 4 +%% x30 (t5) - Temporary register 5 +%% x31 (t6) - Temporary register 6 +%% +%% RISC-V Calling Convention (ILP32): +%% - Arguments: a0-a7 (x10-x17) +%% - Return values: a0-a1 (x10-x11) +%% - Caller-saved: t0-t6, a0-a7 +%% - Callee-saved: s0-s11, sp, ra +%% - Stack grows downward +%% - Stack must be 16-byte aligned at function call boundaries +%% +%% Instruction Encoding: +%% All RV32I instructions are 32 bits (4 bytes). +%% Bit ordering is little-endian within each 32-bit word. +%% +%% See: RISC-V Instruction Set Manual, Volume I: User-Level ISA +%% https://riscv.org/technical/specifications/ +%% https://github.com/riscv/riscv-isa-manual/ + +-type riscv_register() :: + zero + | ra + | sp + | gp + | tp + | t0 + | t1 + | t2 + | s0 + | fp + | s1 + | a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | t3 + | t4 + | t5 + | t6. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers (0-31) +-spec reg_to_num(riscv_register()) -> 0..31. +% ABI names +reg_to_num(zero) -> 0; +reg_to_num(ra) -> 1; +reg_to_num(sp) -> 2; +reg_to_num(gp) -> 3; +reg_to_num(tp) -> 4; +reg_to_num(t0) -> 5; +reg_to_num(t1) -> 6; +reg_to_num(t2) -> 7; +reg_to_num(s0) -> 8; +reg_to_num(fp) -> 8; +reg_to_num(s1) -> 9; +reg_to_num(a0) -> 10; +reg_to_num(a1) -> 11; +reg_to_num(a2) -> 12; +reg_to_num(a3) -> 13; +reg_to_num(a4) -> 14; +reg_to_num(a5) -> 15; +reg_to_num(a6) -> 16; +reg_to_num(a7) -> 17; +reg_to_num(s2) -> 18; +reg_to_num(s3) -> 19; +reg_to_num(s4) -> 20; +reg_to_num(s5) -> 21; +reg_to_num(s6) -> 22; +reg_to_num(s7) -> 23; +reg_to_num(s8) -> 24; +reg_to_num(s9) -> 25; +reg_to_num(s10) -> 26; +reg_to_num(s11) -> 27; +reg_to_num(t3) -> 28; +reg_to_num(t4) -> 29; +reg_to_num(t5) -> 30; +reg_to_num(t6) -> 31. + +%%----------------------------------------------------------------------------- +%% R-type instruction encoding +%%----------------------------------------------------------------------------- + +%% R-type instruction format: +%% funct7 (7) | rs2 (5) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_r_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Funct7 :: 0..127 +) -> binary(). +encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct7 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADD - Add +%% rd = rs1 + rs2 +-spec add(riscv_register(), riscv_register(), riscv_register()) -> binary(). +add(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00). + +%% SUB - Subtract +%% rd = rs1 - rs2 +-spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sub(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20). + +%% AND - Bitwise AND +%% rd = rs1 & rs2 +-spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +and_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR +%% rd = rs1 | rs2 +-spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR (in-place) +%% rd = rd | rs +-spec or_(riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs) -> + or_(Rd, Rd, Rs). + +%% XOR - Bitwise XOR +%% rd = rs1 ^ rs2 +-spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +xor_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00). + +%% SLL - Shift Left Logical +%% rd = rs1 << rs2[4:0] +-spec sll(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sll(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 001, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#1, Rs1, Rs2, 16#00). + +%% SRL - Shift Right Logical +%% rd = rs1 >> rs2[4:0] (zero-extend) +-spec srl(riscv_register(), riscv_register(), riscv_register()) -> binary(). +srl(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#00). + +%% SRA - Shift Right Arithmetic +%% rd = rs1 >> rs2[4:0] (sign-extend) +-spec sra(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sra(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#20). + +%% SLT - Set Less Than +%% rd = (rs1 < rs2) ? 1 : 0 (signed) +-spec slt(riscv_register(), riscv_register(), riscv_register()) -> binary(). +slt(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 010, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#2, Rs1, Rs2, 16#00). + +%% SLTU - Set Less Than Unsigned +%% rd = (rs1 < rs2) ? 1 : 0 (unsigned) +-spec sltu(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sltu(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 011, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#3, Rs1, Rs2, 16#00). + +%%----------------------------------------------------------------------------- +%% I-type instruction encoding +%%----------------------------------------------------------------------------- + +%% I-type instruction format: +%% imm[11:0] (12) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-20 19-15 14-12 11-7 6-0 + +-spec encode_i_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + % Sign-extend and mask to 12 bits + ImmMasked = Imm band 16#FFF, + Instr = + (ImmMasked bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% I-type immediate arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADDI - Add Immediate +%% rd = rs1 + imm +-spec addi(riscv_register(), riscv_register(), integer()) -> binary(). +addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 000 + encode_i_type(16#13, Rd, 16#0, Rs1, Imm); +addi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ANDI - AND Immediate +%% rd = rs1 & imm +-spec andi(riscv_register(), riscv_register(), integer()) -> binary(). +andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 111 + encode_i_type(16#13, Rd, 16#7, Rs1, Imm); +andi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ORI - OR Immediate +%% rd = rs1 | imm +-spec ori(riscv_register(), riscv_register(), integer()) -> binary(). +ori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 110 + encode_i_type(16#13, Rd, 16#6, Rs1, Imm); +ori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% XORI - XOR Immediate +%% rd = rs1 ^ imm +-spec xori(riscv_register(), riscv_register(), integer()) -> binary(). +xori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 100 + encode_i_type(16#13, Rd, 16#4, Rs1, Imm); +xori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTI - Set Less Than Immediate +%% rd = (rs1 < imm) ? 1 : 0 (signed) +-spec slti(riscv_register(), riscv_register(), integer()) -> binary(). +slti(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 010 + encode_i_type(16#13, Rd, 16#2, Rs1, Imm); +slti(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTIU - Set Less Than Immediate Unsigned +%% rd = (rs1 < imm) ? 1 : 0 (unsigned) +-spec sltiu(riscv_register(), riscv_register(), integer()) -> binary(). +sltiu(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 011 + encode_i_type(16#13, Rd, 16#3, Rs1, Imm); +sltiu(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% I-type immediate shift instructions +%%----------------------------------------------------------------------------- + +%% SLLI - Shift Left Logical Immediate +%% rd = rs1 << shamt +-spec slli(riscv_register(), riscv_register(), 0..31) -> binary(). +slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#1, Rs1, Shamt); +slli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRLI - Shift Right Logical Immediate +%% rd = rs1 >> shamt (zero-extend) +-spec srli(riscv_register(), riscv_register(), 0..31) -> binary(). +srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#5, Rs1, Shamt); +srli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRAI - Shift Right Arithmetic Immediate +%% rd = rs1 >> shamt (sign-extend) +-spec srai(riscv_register(), riscv_register(), 0..31) -> binary(). +srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000 + % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30); +srai(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%%----------------------------------------------------------------------------- +%% Load instructions (I-type) +%%----------------------------------------------------------------------------- + +%% LW - Load Word +%% rd = mem[rs1 + offset] (32-bit) +-spec lw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lw(Rd, {Rs1, Offset}) -> + lw(Rd, Rs1, Offset); +lw(Rd, Rs1) when is_atom(Rs1) -> + lw(Rd, Rs1, 0). + +-spec lw(riscv_register(), riscv_register(), integer()) -> binary(). +lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 010 + encode_i_type(16#03, Rd, 16#2, Rs1, Offset); +lw(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LH - Load Halfword (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][15:0]) +-spec lh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lh(Rd, {Rs1, Offset}) -> + lh(Rd, Rs1, Offset); +lh(Rd, Rs1) when is_atom(Rs1) -> + lh(Rd, Rs1, 0). + +-spec lh(riscv_register(), riscv_register(), integer()) -> binary(). +lh(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 001 + encode_i_type(16#03, Rd, 16#1, Rs1, Offset); +lh(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LHU - Load Halfword Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][15:0]) +-spec lhu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lhu(Rd, {Rs1, Offset}) -> + lhu(Rd, Rs1, Offset); +lhu(Rd, Rs1) when is_atom(Rs1) -> + lhu(Rd, Rs1, 0). + +-spec lhu(riscv_register(), riscv_register(), integer()) -> binary(). +lhu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 101 + encode_i_type(16#03, Rd, 16#5, Rs1, Offset); +lhu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LB - Load Byte (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][7:0]) +-spec lb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lb(Rd, {Rs1, Offset}) -> + lb(Rd, Rs1, Offset); +lb(Rd, Rs1) when is_atom(Rs1) -> + lb(Rd, Rs1, 0). + +-spec lb(riscv_register(), riscv_register(), integer()) -> binary(). +lb(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 000 + encode_i_type(16#03, Rd, 16#0, Rs1, Offset); +lb(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LBU - Load Byte Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][7:0]) +-spec lbu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lbu(Rd, {Rs1, Offset}) -> + lbu(Rd, Rs1, Offset); +lbu(Rd, Rs1) when is_atom(Rs1) -> + lbu(Rd, Rs1, 0). + +-spec lbu(riscv_register(), riscv_register(), integer()) -> binary(). +lbu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 100 + encode_i_type(16#03, Rd, 16#4, Rs1, Offset); +lbu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% S-type instruction encoding (for stores) +%%----------------------------------------------------------------------------- + +%% S-type instruction format: +%% imm[11:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:0] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_s_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_s_type(Opcode, Funct3, Rs1, Rs2, Imm) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Split immediate: imm[11:5] goes to bits 31-25, imm[4:0] goes to bits 11-7 + ImmMasked = Imm band 16#FFF, + Imm11_5 = (ImmMasked bsr 5) band 16#7F, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Imm11_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_0 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Store instructions (S-type) +%%----------------------------------------------------------------------------- + +%% SW - Store Word +%% mem[rs1 + offset] = rs2[31:0] +-spec sw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sw(Rs2, {Rs1, Offset}) -> + sw(Rs1, Rs2, Offset); +sw(Rs2, Rs1) when is_atom(Rs1) -> + sw(Rs1, Rs2, 0). + +-spec sw(riscv_register(), riscv_register(), integer()) -> binary(). +sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 010 + encode_s_type(16#23, 16#2, Rs1, Rs2, Offset); +sw(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SH - Store Halfword +%% mem[rs1 + offset][15:0] = rs2[15:0] +-spec sh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sh(Rs2, {Rs1, Offset}) -> + sh(Rs1, Rs2, Offset); +sh(Rs2, Rs1) when is_atom(Rs1) -> + sh(Rs1, Rs2, 0). + +-spec sh(riscv_register(), riscv_register(), integer()) -> binary(). +sh(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 001 + encode_s_type(16#23, 16#1, Rs1, Rs2, Offset); +sh(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SB - Store Byte +%% mem[rs1 + offset][7:0] = rs2[7:0] +-spec sb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sb(Rs2, {Rs1, Offset}) -> + sb(Rs1, Rs2, Offset); +sb(Rs2, Rs1) when is_atom(Rs1) -> + sb(Rs1, Rs2, 0). + +-spec sb(riscv_register(), riscv_register(), integer()) -> binary(). +sb(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 000 + encode_s_type(16#23, 16#0, Rs1, Rs2, Offset); +sb(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% B-type instruction encoding (for branches) +%%----------------------------------------------------------------------------- + +%% B-type instruction format: +%% imm[12|10:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:1|11] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 +%% +%% The immediate is split across the instruction and represents a signed offset +%% in multiples of 2 bytes (must be 2-byte aligned). +%% Range: ±4 KiB (±4096 bytes) + +-spec encode_b_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Offset :: integer() +) -> binary(). +encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Offset must be 2-byte aligned and in range [-4096, 4094] + % Extract bits: imm[12], imm[10:5], imm[4:1], imm[11] + OffsetMasked = Offset band 16#1FFF, + % imm[12] -> bit 31 + Imm12 = (OffsetMasked bsr 12) band 1, + % imm[10:5] -> bits 30-25 + Imm10_5 = (OffsetMasked bsr 5) band 16#3F, + % imm[4:1] -> bits 11-8 + Imm4_1 = (OffsetMasked bsr 1) band 16#F, + % imm[11] -> bit 7 + Imm11 = (OffsetMasked bsr 11) band 1, + Instr = + (Imm12 bsl 31) bor + (Imm10_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_1 bsl 8) bor + (Imm11 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Branch instructions (B-type) +%%----------------------------------------------------------------------------- + +%% BEQ - Branch if Equal +%% if (rs1 == rs2) pc += offset +-spec beq(riscv_register(), riscv_register(), integer()) -> binary(). +beq(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 000 + encode_b_type(16#63, 16#0, Rs1, Rs2, Offset); +beq(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +beq(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BNE - Branch if Not Equal +%% if (rs1 != rs2) pc += offset +-spec bne(riscv_register(), riscv_register(), integer()) -> binary(). +bne(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 001 + encode_b_type(16#63, 16#1, Rs1, Rs2, Offset); +bne(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bne(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLT - Branch if Less Than (signed) +%% if (rs1 < rs2) pc += offset +-spec blt(riscv_register(), riscv_register(), integer()) -> binary(). +blt(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 100 + encode_b_type(16#63, 16#4, Rs1, Rs2, Offset); +blt(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +blt(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGE - Branch if Greater or Equal (signed) +%% if (rs1 >= rs2) pc += offset +-spec bge(riscv_register(), riscv_register(), integer()) -> binary(). +bge(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 101 + encode_b_type(16#63, 16#5, Rs1, Rs2, Offset); +bge(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bge(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLTU - Branch if Less Than Unsigned +%% if (rs1 < rs2) pc += offset (unsigned) +-spec bltu(riscv_register(), riscv_register(), integer()) -> binary(). +bltu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 110 + encode_b_type(16#63, 16#6, Rs1, Rs2, Offset); +bltu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bltu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGEU - Branch if Greater or Equal Unsigned +%% if (rs1 >= rs2) pc += offset (unsigned) +-spec bgeu(riscv_register(), riscv_register(), integer()) -> binary(). +bgeu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 111 + encode_b_type(16#63, 16#7, Rs1, Rs2, Offset); +bgeu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bgeu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%%----------------------------------------------------------------------------- +%% J-type instruction encoding (for JAL) +%%----------------------------------------------------------------------------- + +%% J-type instruction format (JAL): +%% imm[20|10:1|11|19:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 +%% +%% The immediate represents a signed offset in multiples of 2 bytes. +%% Range: ±1 MiB (±1048576 bytes) + +-spec encode_j_type( + Opcode :: 0..127, Rd :: riscv_register(), Offset :: integer() +) -> binary(). +encode_j_type(Opcode, Rd, Offset) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits: imm[20], imm[10:1], imm[11], imm[19:12] + OffsetMasked = Offset band 16#1FFFFF, + % imm[20] -> bit 31 + Imm20 = (OffsetMasked bsr 20) band 1, + % imm[10:1] -> bits 30-21 + Imm10_1 = (OffsetMasked bsr 1) band 16#3FF, + % imm[11] -> bit 20 + Imm11 = (OffsetMasked bsr 11) band 1, + % imm[19:12] -> bits 19-12 + Imm19_12 = (OffsetMasked bsr 12) band 16#FF, + Instr = + (Imm20 bsl 31) bor + (Imm10_1 bsl 21) bor + (Imm11 bsl 20) bor + (Imm19_12 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% U-type instruction encoding (for LUI, AUIPC) +%%----------------------------------------------------------------------------- + +%% U-type instruction format: +%% imm[31:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 + +-spec encode_u_type( + Opcode :: 0..127, Rd :: riscv_register(), Imm :: integer() +) -> binary(). +encode_u_type(Opcode, Rd, Imm) -> + RdNum = reg_to_num(Rd), + % Upper 20 bits of immediate + ImmUpper = (Imm bsr 12) band 16#FFFFF, + Instr = (ImmUpper bsl 12) bor (RdNum bsl 7) bor Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Jump and link instructions +%%----------------------------------------------------------------------------- + +%% JAL - Jump and Link +%% rd = pc + 4; pc += offset +-spec jal(riscv_register(), integer()) -> binary(). +jal(Rd, Offset) when + Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0 +-> + % Opcode: 1101111 (0x6F) + encode_j_type(16#6F, Rd, Offset); +jal(_Rd, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +jal(_Rd, Offset) -> + error({offset_out_of_range, Offset, -1048576, 1048574}). + +%% JALR - Jump and Link Register +%% rd = pc + 4; pc = (rs1 + offset) & ~1 +-spec jalr(riscv_register(), riscv_register(), integer()) -> binary(). +jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 1100111 (0x67), Funct3: 000 + encode_i_type(16#67, Rd, 16#0, Rs1, Offset); +jalr(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% JALR - Jump and Link Register (no offset) +%% rd = pc + 4; pc = rs1 & ~1 +-spec jalr(riscv_register(), riscv_register()) -> binary(). +jalr(Rd, Rs1) -> + jalr(Rd, Rs1, 0). + +%%----------------------------------------------------------------------------- +%% Upper immediate instructions +%%----------------------------------------------------------------------------- + +%% LUI - Load Upper Immediate +%% rd = imm << 12 +-spec lui(riscv_register(), integer()) -> binary(). +lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0110111 (0x37) + encode_u_type(16#37, Rd, Imm bsl 12); +lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%% AUIPC - Add Upper Immediate to PC +%% rd = pc + (imm << 12) +-spec auipc(riscv_register(), integer()) -> binary(). +auipc(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0010111 (0x17) + encode_u_type(16#17, Rd, Imm bsl 12); +auipc(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%%----------------------------------------------------------------------------- +%% Pseudo-instructions +%%----------------------------------------------------------------------------- +%% These are convenience instructions that map to actual RV32I instructions + +%% NOP - No Operation +%% Expands to: addi x0, x0, 0 +-spec nop() -> binary(). +nop() -> + addi(zero, zero, 0). + +%% LI - Load Immediate +%% Load a 32-bit immediate value into a register +%% For small immediates (-2048 to 2047): addi rd, x0, imm +%% For larger immediates: lui + addi sequence +-spec li(riscv_register(), integer()) -> binary(). +li(Rd, Imm) when Imm >= -2048, Imm =< 2047 -> + % Small immediate: addi rd, x0, imm + addi(Rd, zero, Imm); +li(Rd, Imm) when Imm >= -16#80000000, Imm =< 16#7FFFFFFF -> + % Large immediate: lui + addi + % Split into upper 20 bits and lower 12 bits + % Need to account for sign extension of lower 12 bits + Lower = Imm band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + UpperRaw = + if + Lower >= 16#800 -> + (Imm bsr 12) + 1; + true -> + Imm bsr 12 + end, + % Mask to 20 bits first, then sign extend if needed + UpperMasked = UpperRaw band 16#FFFFF, + Upper = + if + UpperMasked band 16#80000 =/= 0 -> + % Bit 19 is set, so this is negative in 20-bit representation + % Sign extend from 20 bits + UpperMasked - 16#100000; + true -> + % Positive value + UpperMasked + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + LuiInstr = lui(Rd, Upper), + AddiInstr = addi(Rd, Rd, LowerSigned), + <>; +li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}). + +%% MV - Move (copy register) +%% Expands to: addi rd, rs, 0 +-spec mv(riscv_register(), riscv_register()) -> binary(). +mv(Rd, Rs) -> + addi(Rd, Rs, 0). + +%% NOT - Bitwise NOT +%% Expands to: xori rd, rs, -1 +-spec not_(riscv_register(), riscv_register()) -> binary(). +not_(Rd, Rs) -> + xori(Rd, Rs, -1). + +%% NEG - Negate (two's complement) +%% Expands to: sub rd, x0, rs +-spec neg(riscv_register(), riscv_register()) -> binary(). +neg(Rd, Rs) -> + sub(Rd, zero, Rs). + +%% J - Unconditional Jump +%% Expands to: jal x0, offset +-spec j(integer()) -> binary(). +j(Offset) -> + jal(zero, Offset). + +%% JR - Jump Register +%% Expands to: jalr x0, rs, 0 +-spec jr(riscv_register()) -> binary(). +jr(Rs) -> + jalr(zero, Rs, 0). + +%% RET - Return from subroutine +%% Expands to: jalr x0, ra, 0 +-spec ret() -> binary(). +ret() -> + jalr(zero, ra, 0). + +%% CALL - Call function (far call using AUIPC + JALR) +%% This is a two-instruction sequence for calling functions beyond JAL range +%% Expands to: auipc ra, offset[31:12]; jalr ra, ra, offset[11:0] +-spec call(riscv_register(), integer()) -> binary(). +call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF -> + % Split offset into upper 20 bits and lower 12 bits + Lower = Offset band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + Upper = + if + Lower >= 16#800 -> + ((Offset bsr 12) + 1) band 16#FFFFF; + true -> + (Offset bsr 12) band 16#FFFFF + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + AuipcInstr = auipc(Rd, Upper), + JalrInstr = jalr(ra, Rd, LowerSigned), + <>; +call(_Rd, Offset) -> + error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}). + +%% EBREAK - Environment Breakpoint +%% Causes a breakpoint exception to be raised. +%% This is the RISC-V equivalent of ARM's BKPT instruction. +%% Encoding: 0x00100073 +-spec ebreak() -> binary(). +ebreak() -> + <<16#73, 16#00, 16#10, 16#00>>. + +%% BKPT - Breakpoint (for ARM compatibility) +%% In RISC-V, this is implemented as EBREAK. +%% The immediate parameter is ignored for compatibility with ARM. +-spec bkpt(integer()) -> binary(). +bkpt(_Imm) -> + ebreak(). + +%% MUL - Multiply (RV32M extension) +%% Multiplies rs1 by rs2 and places the lower 32 bits in rd +%% Format: mul rd, rs1, rs2 +%% Encoding: R-type with opcode=0x33, funct3=0x0, funct7=0x01 +-spec mul(riscv_register(), riscv_register(), riscv_register()) -> binary(). +mul(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01). diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 26ab6b4ecc..9bc1c8c78b 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -30,6 +30,7 @@ set(ERLANG_MODULES jit_aarch64_asm_tests jit_armv6m_tests jit_armv6m_asm_tests + jit_riscv32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl new file mode 100644 index 0000000000..94e4942db5 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_asm_tests.erl @@ -0,0 +1,553 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(riscv32, Bin, Str), Value) +). + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instruction tests +%%----------------------------------------------------------------------------- + +add_test_() -> + [ + ?_assertAsmEqual( + <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00a585b3:32/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#01e787b3:32/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) + ) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual( + <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#40a585b3:32/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5) + ) + ]. + +and_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5f5b3:32/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) + ) + ]. + +or_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5e5b3:32/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) + ) + ]. + +xor_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5c5b3:32/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) + ) + ]. + +sll_test_() -> + [ + ?_assertAsmEqual( + <<16#00629533:32/little>>, "sll a0, t0, t1", jit_riscv32_asm:sll(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c59633:32/little>>, "sll a2, a1, a2", jit_riscv32_asm:sll(a2, a1, a2) + ) + ]. + +srl_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d533:32/little>>, "srl a0, t0, t1", jit_riscv32_asm:srl(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5d633:32/little>>, "srl a2, a1, a2", jit_riscv32_asm:srl(a2, a1, a2) + ) + ]. + +sra_test_() -> + [ + ?_assertAsmEqual( + <<16#4062d533:32/little>>, "sra a0, t0, t1", jit_riscv32_asm:sra(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#40c5d633:32/little>>, "sra a2, a1, a2", jit_riscv32_asm:sra(a2, a1, a2) + ) + ]. + +slt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062a533:32/little>>, "slt a0, t0, t1", jit_riscv32_asm:slt(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5a633:32/little>>, "slt a2, a1, a2", jit_riscv32_asm:slt(a2, a1, a2) + ) + ]. + +sltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062b533:32/little>>, "sltu a0, t0, t1", jit_riscv32_asm:sltu(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5b633:32/little>>, "sltu a2, a1, a2", jit_riscv32_asm:sltu(a2, a1, a2) + ) + ]. + +%%----------------------------------------------------------------------------- +%% I-type immediate instruction tests +%%----------------------------------------------------------------------------- + +addi_test_() -> + [ + ?_assertAsmEqual( + <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#fff58593:32/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) + ), + ?_assertAsmEqual( + <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047) + ), + ?_assertAsmEqual( + <<16#80000593:32/little>>, "addi a1, zero, -2048", jit_riscv32_asm:addi(a1, zero, -2048) + ) + ]. + +andi_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#00f5f593:32/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) + ) + ]. + +ori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2e513:32/little>>, "ori a0, t0, 255", jit_riscv32_asm:ori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#00f5e593:32/little>>, "ori a1, a1, 15", jit_riscv32_asm:ori(a1, a1, 15) + ) + ]. + +xori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2c513:32/little>>, "xori a0, t0, 255", jit_riscv32_asm:xori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#fff5c593:32/little>>, "xori a1, a1, -1", jit_riscv32_asm:xori(a1, a1, -1) + ) + ]. + +slli_test_() -> + [ + ?_assertAsmEqual( + <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#01f59593:32/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) + ), + ?_assertAsmEqual( + <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0) + ) + ]. + +srli_test_() -> + [ + ?_assertAsmEqual( + <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#01f5d593:32/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) + ) + ]. + +srai_test_() -> + [ + ?_assertAsmEqual( + <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#41f5d593:32/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) + ) + ]. + +slti_test_() -> + [ + ?_assertAsmEqual( + <<16#0142a513:32/little>>, "slti a0, t0, 20", jit_riscv32_asm:slti(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#fff5a593:32/little>>, "slti a1, a1, -1", jit_riscv32_asm:slti(a1, a1, -1) + ) + ]. + +sltiu_test_() -> + [ + ?_assertAsmEqual( + <<16#0142b513:32/little>>, "sltiu a0, t0, 20", jit_riscv32_asm:sltiu(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#00153513:32/little>>, "sltiu a0, a0, 1", jit_riscv32_asm:sltiu(a0, a0, 1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Load instruction tests +%%----------------------------------------------------------------------------- + +lw_test_() -> + [ + ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), + ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), + ?_assertAsmEqual(<<16#00452583:32/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), + ?_assertAsmEqual( + <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4) + ), + ?_assertAsmEqual( + <<16#7ff52503:32/little>>, "lw a0, 2047(a0)", jit_riscv32_asm:lw(a0, a0, 2047) + ) + ]. + +lh_test_() -> + [ + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0, 0)), + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0)), + ?_assertAsmEqual(<<16#00251583:32/little>>, "lh a1, 2(a0)", jit_riscv32_asm:lh(a1, a0, 2)) + ]. + +lhu_test_() -> + [ + ?_assertAsmEqual( + <<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0)), + ?_assertAsmEqual(<<16#00255583:32/little>>, "lhu a1, 2(a0)", jit_riscv32_asm:lhu(a1, a0, 2)) + ]. + +lb_test_() -> + [ + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0, 0)), + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0)), + ?_assertAsmEqual(<<16#00150583:32/little>>, "lb a1, 1(a0)", jit_riscv32_asm:lb(a1, a0, 1)) + ]. + +lbu_test_() -> + [ + ?_assertAsmEqual( + <<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0)), + ?_assertAsmEqual(<<16#00154583:32/little>>, "lbu a1, 1(a0)", jit_riscv32_asm:lbu(a1, a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Store instruction tests +%%----------------------------------------------------------------------------- + +sw_test_() -> + [ + ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), + ?_assertAsmEqual(<<16#00b52223:32/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), + ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4)) + ]. + +sh_test_() -> + [ + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a1, a0)), + ?_assertAsmEqual(<<16#00b51123:32/little>>, "sh a1, 2(a0)", jit_riscv32_asm:sh(a0, a1, 2)) + ]. + +sb_test_() -> + [ + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a1, a0)), + ?_assertAsmEqual(<<16#00b500a3:32/little>>, "sb a1, 1(a0)", jit_riscv32_asm:sb(a0, a1, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Branch instruction tests +%%----------------------------------------------------------------------------- + +beq_test_() -> + [ + ?_assertAsmEqual( + <<16#00628463:32/little>>, "beq t0, t1, .+8", jit_riscv32_asm:beq(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4) + ), + ?_assertAsmEqual( + <<16#00050063:32/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) + ) + ]. + +bne_test_() -> + [ + ?_assertAsmEqual( + <<16#00629463:32/little>>, "bne t0, t1, .+8", jit_riscv32_asm:bne(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb51ee3:32/little>>, "bne a0, a1, .-4", jit_riscv32_asm:bne(a0, a1, -4) + ) + ]. + +blt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c463:32/little>>, "blt t0, t1, .+8", jit_riscv32_asm:blt(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb54ee3:32/little>>, "blt a0, a1, .-4", jit_riscv32_asm:blt(a0, a1, -4) + ) + ]. + +bge_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d463:32/little>>, "bge t0, t1, .+8", jit_riscv32_asm:bge(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb55ee3:32/little>>, "bge a0, a1, .-4", jit_riscv32_asm:bge(a0, a1, -4) + ) + ]. + +bltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e463:32/little>>, "bltu t0, t1, .+8", jit_riscv32_asm:bltu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb56ee3:32/little>>, "bltu a0, a1, .-4", jit_riscv32_asm:bltu(a0, a1, -4) + ) + ]. + +bgeu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f463:32/little>>, "bgeu t0, t1, .+8", jit_riscv32_asm:bgeu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb57ee3:32/little>>, "bgeu a0, a1, .-4", jit_riscv32_asm:bgeu(a0, a1, -4) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Jump instruction tests +%%----------------------------------------------------------------------------- + +jal_test_() -> + [ + ?_assertAsmEqual( + <<16#008000ef:32/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) + ), + ?_assertAsmEqual( + <<16#ffdff0ef:32/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) + ), + ?_assertAsmEqual( + <<16#00000517:32/little, 16#000500e7:32/little>>, + "auipc a0, 0\njalr a0", + jit_riscv32_asm:call(a0, 0) + ), + ?_assertAsmEqual( + <<16#00002517:32/little, 16#800500e7:32/little>>, + "auipc a0, 0x2\njalr -2048(a0)", + jit_riscv32_asm:call(a0, 16#1800) + ) + ]. + +jalr_test_() -> + [ + ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), + ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), + ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4)) + ]. + +%%----------------------------------------------------------------------------- +%% Upper immediate instruction tests +%%----------------------------------------------------------------------------- + +lui_test_() -> + [ + ?_assertAsmEqual(<<16#000125b7:32/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), + ?_assertAsmEqual(<<16#00001537:32/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), + ?_assertAsmEqual(<<16#fffff5b7:32/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) + ]. + +auipc_test_() -> + [ + ?_assertAsmEqual(<<16#00012597:32/little>>, "auipc a1, 18", jit_riscv32_asm:auipc(a1, 18)), + ?_assertAsmEqual(<<16#00001517:32/little>>, "auipc a0, 1", jit_riscv32_asm:auipc(a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +nop_test_() -> + [ + ?_assertAsmEqual(<<16#00000013:32/little>>, "nop", jit_riscv32_asm:nop()) + ]. + +li_small_test_() -> + [ + ?_assertAsmEqual(<<16#00a00513:32/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), + ?_assertAsmEqual(<<16#fff00513:32/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), + ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047)) + ]. + +li_large_test_() -> + [ + % 0x12345 = 74565 - requires lui + addi + ?_assertAsmEqual( + <<16#00012537:32/little, 16#34550513:32/little>>, + "lui a0, 0x12\naddi a0, a0, 0x345", + jit_riscv32_asm:li(a0, 16#12345) + ), + % 0x80000000 = -2147483648 (minimum 32-bit signed) + ?_assertAsmEqual( + <<16#800005b7:32/little, 16#00058593:32/little>>, + "lui a1, 0x80000\naddi a1, a1, 0", + jit_riscv32_asm:li(a1, -16#80000000) + ), + % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed) + ?_assertAsmEqual( + <<16#80000537:32/little, 16#fff50513:32/little>>, + "lui a0, 0x80000\naddi a0, a0, -1", + jit_riscv32_asm:li(a0, 16#7FFFFFFF) + ) + ]. + +mv_test_() -> + [ + ?_assertAsmEqual(<<16#00050513:32/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), + ?_assertAsmEqual(<<16#00058593:32/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) + ]. + +not_test_() -> + [ + ?_assertAsmEqual(<<16#fff54513:32/little>>, "not a0, a0", jit_riscv32_asm:not_(a0, a0)), + ?_assertAsmEqual(<<16#fff5c593:32/little>>, "not a1, a1", jit_riscv32_asm:not_(a1, a1)) + ]. + +neg_test_() -> + [ + ?_assertAsmEqual(<<16#40a00533:32/little>>, "neg a0, a0", jit_riscv32_asm:neg(a0, a0)), + ?_assertAsmEqual(<<16#40b005b3:32/little>>, "neg a1, a1", jit_riscv32_asm:neg(a1, a1)) + ]. + +j_test_() -> + [ + ?_assertAsmEqual( + <<16#0080006f:32/little>>, "j .+8", jit_riscv32_asm:j(8) + ), + ?_assertAsmEqual( + <<16#ffdff06f:32/little>>, "j .-4", jit_riscv32_asm:j(-4) + ) + ]. + +jr_test_() -> + [ + ?_assertAsmEqual(<<16#00050067:32/little>>, "jr a0", jit_riscv32_asm:jr(a0)), + ?_assertAsmEqual(<<16#00028067:32/little>>, "jr t0", jit_riscv32_asm:jr(t0)) + ]. + +ret_test_() -> + [ + ?_assertAsmEqual(<<16#00008067:32/little>>, "ret", jit_riscv32_asm:ret()) + ]. + +%%----------------------------------------------------------------------------- +%% M Extension (Multiply/Divide) instruction tests +%%----------------------------------------------------------------------------- + +mul_test_() -> + [ + ?_assertAsmEqual( + <<16#02f50533:32/little>>, "mul a0, a0, a5", jit_riscv32_asm:mul(a0, a0, a5) + ), + ?_assertAsmEqual( + <<16#03f60633:32/little>>, "mul a2, a2, t6", jit_riscv32_asm:mul(a2, a2, t6) + ), + ?_assertAsmEqual( + <<16#026585b3:32/little>>, "mul a1, a1, t1", jit_riscv32_asm:mul(a1, a1, t1) + ), + ?_assertAsmEqual( + <<16#02d282b3:32/little>>, "mul t0, t0, a3", jit_riscv32_asm:mul(t0, t0, a3) + ) + ]. + +%%----------------------------------------------------------------------------- +%% System instruction tests +%%----------------------------------------------------------------------------- + +ebreak_test_() -> + [ + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:ebreak() + ) + ]. + +bkpt_test_() -> + [ + % bkpt is an ARM compatibility wrapper that generates ebreak + % The immediate parameter is ignored + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(0) + ), + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(42) + ), + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(255) + ) + ]. diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index cfabfcf15f..7117ee5f69 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -77,6 +77,7 @@ asm(Arch, Bin, Str) -> find_binutils(Arch) -> ArchStr = atom_to_list(Arch), BinutilsList = [ + {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"}, {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"}, {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"}, {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"} @@ -104,6 +105,8 @@ get_asm_header(arm) -> get_asm_header(aarch64) -> ".text\n"; get_asm_header(x86_64) -> + ".text\n"; +get_asm_header(riscv32) -> ".text\n". %% Get architecture-specific assembler flags @@ -113,7 +116,9 @@ get_as_flags(arm) -> get_as_flags(aarch64) -> ""; get_as_flags(x86_64) -> - "--64". + "--64"; +get_as_flags(riscv32) -> + "-march=rv32ima". %% Parse objdump output lines and extract binary data -spec asm_lines([binary()], binary(), atom()) -> binary(). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index ff272f6eac..a289a01a6f 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -31,6 +31,7 @@ start() -> jit_aarch64_asm_tests, jit_armv6m_tests, jit_armv6m_asm_tests, + jit_riscv32_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). From 2bedbdc84ddecc70226d6b1227d7146bac0373a2 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 5 Oct 2025 09:28:20 +0200 Subject: [PATCH 83/97] riscv32: initial backend implementation Signed-off-by: Paul Guyot --- libs/jit/include/jit.hrl | 1 + libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_precompile.erl | 1 + libs/jit/src/jit_riscv32.erl | 3051 +++++++++++++++ src/libAtomVM/defaultatoms.def | 1 + src/libAtomVM/jit.c | 62 +- src/libAtomVM/jit.h | 6 + src/libAtomVM/module.c | 3 + src/libAtomVM/nifs.c | 2 + src/libAtomVM/opcodesswitch.h | 2 +- src/platforms/esp32/CMakeLists.txt | 18 +- .../esp32/components/avm_sys/CMakeLists.txt | 1 + .../components/avm_sys/jit_stream_flash.c | 34 + src/platforms/esp32/components/avm_sys/sys.c | 25 + .../esp32/components/libatomvm/CMakeLists.txt | 2 +- src/platforms/esp32/main/Kconfig.projbuild | 6 + src/platforms/esp32/test/CMakeLists.txt | 12 +- .../test/main/test_erl_sources/CMakeLists.txt | 87 +- tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_riscv32_tests.erl | 3419 +++++++++++++++++ tests/libs/jit/tests.erl | 1 + 21 files changed, 6671 insertions(+), 65 deletions(-) create mode 100644 libs/jit/src/jit_riscv32.erl create mode 100644 src/platforms/esp32/components/avm_sys/jit_stream_flash.c create mode 100644 tests/libs/jit/jit_riscv32_tests.erl diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index b006c5f34f..81ff1c42c2 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -23,6 +23,7 @@ -define(JIT_ARCH_X86_64, 1). -define(JIT_ARCH_AARCH64, 2). -define(JIT_ARCH_ARMV6M, 3). +-define(JIT_ARCH_RISCV32, 4). -define(JIT_VARIANT_PIC, 1). -define(JIT_VARIANT_FLOAT32, 2). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 586223b4bc..ae62643c30 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -31,6 +31,7 @@ set(ERLANG_MODULES jit_aarch64_asm jit_armv6m jit_armv6m_asm + jit_riscv32 jit_riscv32_asm jit_x86_64 jit_x86_64_asm diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index cd9646790d..930b79dc37 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -93,6 +93,7 @@ compile(Target, Dir, Path) -> "x86_64" -> ?JIT_ARCH_X86_64; "aarch64" -> ?JIT_ARCH_AARCH64; "armv6m" -> ?JIT_ARCH_ARMV6M; + "riscv32" -> ?JIT_ARCH_RISCV32; _ -> error({unsupported_target, Target}) end, diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl new file mode 100644 index 0000000000..56887fb064 --- /dev/null +++ b/libs/jit/src/jit_riscv32.erl @@ -0,0 +1,3051 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + flush/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3 +]). + +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2 +]). +-endif. + +-compile([warnings_as_errors]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). + +-define(ASSERT(Expr), true = Expr). + +%% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers). +%% a0-a1 are used for return values (a0 for 32-bit, a0-a1 for 64-bit returns). +%% s0-s11 are callee-saved registers (must be preserved across calls). +%% t0-t6 are caller-saved temporary registers. +%% sp is the stack pointer. +%% ra is the return address register. +%% zero (x0) is hardwired to constant 0. +%% This implementation uses RV32IMC (base + multiply/compressed extensions). +%% +%% See: RISC-V Calling Convention +%% https://riscv.org/wp-content/uploads/2024/12/riscv-calling.pdf +%% +%% Registers used by the JIT backend (RISC-V32): +%% - Argument/return: a0-a7 (up to 8 args in registers) +%% - Callee-saved: s0-s11 (must preserve) +%% - Temporaries: t0-t6 (caller-saved) +%% - Stack pointer: sp +%% - Return address: ra +%% - Zero register: zero (always 0) +%% - Available for JIT scratch: t0-t6 (7 temp registers) +%% +%% Note: RISC-V32 instructions are fixed 32-bit with uniform encoding, +%% allowing access to all 32 registers. +%% +%% For more details, refer to the RISC-V ILP32 Procedure Call Standard. + +-type riscv32_register() :: + a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | t0 + | t1 + | t2 + | t3 + | t4 + | t5 + | t6 + | s0 + | s1 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | sp + | ra. + +-define(IS_GPR(Reg), + (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse + Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= t0 orelse Reg =:= t1 orelse + Reg =:= t2 orelse Reg =:= t3 orelse Reg =:= t4 orelse Reg =:= t5 orelse Reg =:= t6 orelse + Reg =:= s0 orelse Reg =:= s1 orelse Reg =:= s2 orelse Reg =:= s3 orelse Reg =:= s4 orelse + Reg =:= s5 orelse Reg =:= s6 orelse Reg =:= s7 orelse Reg =:= s8 orelse Reg =:= s9 orelse + Reg =:= s10 orelse Reg =:= s11 orelse Reg =:= sp orelse Reg =:= ra) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [riscv32_register()], + used_regs :: [riscv32_register()], + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer(), + literal_pool :: [{non_neg_integer(), riscv32_register(), non_neg_integer()}] +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}. +-type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. + +-type maybe_free_riscv32_register() :: + {free, riscv32_register()} | riscv32_register(). + +-type condition() :: + {riscv32_register(), '<', integer()} + | {maybe_free_riscv32_register(), '<', riscv32_register()} + | {maybe_free_riscv32_register(), '==', integer()} + | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(int)', maybe_free_riscv32_register(), '==', integer()} + | {'(int)', maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(bool)', maybe_free_riscv32_register(), '==', false} + | {'(bool)', maybe_free_riscv32_register(), '!=', false} + | {maybe_free_riscv32_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, riscv32_register()}, '==', {free, riscv32_register()}}. + +% Context offsets (32-bit architecture) +% ctx->e is 0x14 +% ctx->x is 0x18 +-define(CTX_REG, a0). +-define(NATIVE_INTERFACE_REG, a2). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). +% JITSTATE is in a1 register (no prolog, following aarch64 model) +-define(JITSTATE_REG, a1). +% Return address register (like LR in AArch64) +-define(RA_REG, ra). +-define(JITSTATE_MODULE_OFFSET, 0). +-define(JITSTATE_CONTINUATION_OFFSET, 16#4). +-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +-define(JUMP_TABLE_ENTRY_SIZE, 8). + +%% RISC-V32 register mappings + +%% Use t3 as temporary for some operations +-define(IP_REG, t3). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). + +%% RISC-V32 ILP32 ABI register allocation: +%% - a0: context pointer (reserved, passed as first parameter) +%% - a1-a5: available for parameters to native functions (up to 6 params) +%% - a2: native interface pointer (reserved) +%% - t0-t6: temporaries, caller-saved, available for JIT use +%% - s0-s11: callee-saved (would need to be saved/restored) +-define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]). +-define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]). +-define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + used_regs = [], + labels = [], + variant = Variant, + literal_pool = [] + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Flush the stream. +%% @end +%% @param State current backend state +%% @return The new state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> stream(). +flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:flush(Stream0), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:bkpt(0)), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [riscv32_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [riscv32_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% +%% On this platform, each jump table entry is 12 bytes. +%% ``` +%% ldr a3, pc+4 +%% push {a1, r4, r5, r6, r7, lr} +%% add pc, pc, a3 +%% nop() +%% offset_to_label0 +%% ``` +%% +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + % Create jump table entry: AUIPC + JALR (8 bytes total) + % This will be patched later in update_branches/2 + Offset = StreamModule:offset(Stream0), + % Placeholder: Load PC + upper20 bits + I1 = jit_riscv32_asm:auipc(a3, 0), + % Placeholder: Jump to a3 + lower12 bits + I2 = jit_riscv32_asm:jalr(zero, a3, 0), + + JumpEntry = <>, + Stream1 = StreamModule:append(Stream0, JumpEntry), + + % Record both AUIPC and JALR offsets for patching + Reloc = {N, Offset, jump_table_auipc_jalr}, + UpdatedState = State#state{stream = Stream1, branches = [Reloc | Branches]}, + + jump_table0(UpdatedState, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {adr, Reg} when Rel rem 4 =:= 0 -> pc_relative_address(Reg, Rel); + {adr, Reg} when Rel rem 4 =:= 2 -> pc_relative_address(Reg, Rel + 2); + {far_branch, Size, TempReg} -> + % Check if branch can now be optimized to near branch + if + Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 -> + % RISC-V jal has ±1MB range + % Optimize to near branch: jal + nops to fill original size + DirectBranch = jit_riscv32_asm:jal(zero, Rel), + % Fill remaining bytes with NOPs (RISC-V instructions are 4 bytes) + NopCount = (Size - 4) div 4, + Nops = << + <<(jit_riscv32_asm:nop())/binary>> + || _ <- lists:seq(1, NopCount) + >>, + <>; + true -> + % Keep far branch sequence: auipc + lw + jalr + data + % RISC-V far branch is always 16 bytes + case Size of + 16 -> + % 16-byte sequence: auipc + lw + jalr + data + I1 = jit_riscv32_asm:auipc(TempReg, 0), + I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), + I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), + % Calculate absolute target address + TargetAddress = LabelOffset, + I4 = <>, + <> + end + end; + jump_table_auipc_jalr -> + % Calculate PC-relative offset from AUIPC instruction to target + % AUIPC is at Offset, JALR is at Offset+4 + % Target is at LabelOffset + % Offset from AUIPC PC to target + PCRelOffset = LabelOffset - Offset, + + % Split into upper 20 bits and lower 12 bits + % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) + % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper + Upper20 = (PCRelOffset + 16#800) bsr 12, + Lower12 = PCRelOffset band 16#FFF, + % Sign-extend lower 12 bits for JALR immediate + Lower12Signed = + if + Lower12 >= 16#800 -> Lower12 - 16#1000; + true -> Lower12 + end, + + % Encode AUIPC and JALR with computed offsets + I1 = jit_riscv32_asm:auipc(a3, Upper20), + I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), + <> + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), riscv32_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, 0); + N when N * 4 =< 124 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, N * 4); + N when N * 4 < 256 -> + % Can encode N * 4 directly in li instruction + I1 = jit_riscv32_asm:li(TargetReg, N * 4), + I2 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I3 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <>; + N -> + % For very large primitive numbers, load N and shift left by 2 (multiply by 4) + I1 = jit_riscv32_asm:li(TargetReg, N), + I2 = jit_riscv32_asm:slli(TargetReg, TargetReg, 2), + I3 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I4 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <> + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), riscv32_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | RestRegs], + used_regs = UsedRegs + } = State, + Primitive, + Args +) -> + % Use a low register for LDR since ARM Thumb LDR only works with low registers + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + available_regs = RestRegs, + used_regs = [TempReg | UsedRegs] + }, + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = []} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = load_primitive_ptr(Primitive, Temp), + Stream1 = StreamModule:append(Stream0, PrepCall), + + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + + % Preprocess offset special arg + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + % In RISC-V, all up to 8 arguments fit in registers (a0-a7) + % Always use tail call when calling primitives in tail position + State4 = + case Args1 of + [FirstArg, jit_state | ArgsT] -> + % Use tail call + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + State2 = set_registers_args(State1, ArgsForTailCall, 0), + tail_call_with_jit_state_registers_only(State2, Temp) + end, + State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State5). + +%%----------------------------------------------------------------------------- +%% @doc Tail call to address in register. +%% RA is preserved across regular calls (call_func_ptr saves/restores it), +%% so when the called C primitive returns, it returns to opcodesswitch.h. +%% @end +%% @param State current backend state +%% @param Reg register containing the target address +%% @return Updated backend state +%%----------------------------------------------------------------------------- +tail_call_with_jit_state_registers_only( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Reg +) -> + % Jump to address in register (tail call) + I1 = jit_riscv32_asm:jr(Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + % RISC-V doesn't have a separate cmp instruction, use beq directly + I2 = + case Reg of + % Return value is already in a0 + a0 -> <<>>; + % Move to a0 (return register) + _ -> jit_riscv32_asm:mv(a0, Reg) + end, + I3 = jit_riscv32_asm:ret(), + % Branch if equal (skip the return) + I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, byte_size(I2) + byte_size(I3)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label +) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), + Offset = StreamModule:offset(Stream0), + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State2 = State1#state{stream = Stream1}, + flush_literal_pool(State2). + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State2 = State#state{stream = Stream1}, + flush_literal_pool(State2). + +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% Calculate absolute address and jump to it. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset, + + % Get native code base address into temporary register + I1 = pc_relative_address(Temp, NetOffset), + % Add target offset to get final absolute address + I2 = jit_riscv32_asm:add(Temp, Temp, OffsetReg), + % Indirect branch to the calculated absolute address + I3 = jit_riscv32_asm:jr(Temp), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 +-> + % Near branch: use direct J instruction + Rel = TargetOffset - Offset, + jit_riscv32_asm:j(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, _Offset, TargetOffset +) -> + % Far branch: use auipc + lw + jalr sequence (RISC-V) + % This creates a PC-relative load sequence - always 16 bytes (4-byte aligned) + + % TempReg = PC + I1 = jit_riscv32_asm:auipc(TempReg, 0), + % TempReg = *(PC+8) + I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), + % Jump to TempReg + I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), + % The literal value is the absolute target offset + I4 = <>, + <>. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Far branch sequence - always 16 bytes (4-byte aligned) + + % Load PC into temp + I1 = jit_riscv32_asm:auipc(TempReg, 0), + % Load offset from PC+8 + I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), + % Jump to address + I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), + % Placeholder offset + I4 = <<0:32/little>>, + CodeBlock = <>, + SequenceSize = byte_size(CodeBlock), + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code( + #state{available_regs = [], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) + % Same sequence as when we have available regs - always 16 bytes (4-byte aligned) + + % Load PC into t6 + I1 = jit_riscv32_asm:auipc(t6, 0), + % Load offset from PC+8 + I2 = jit_riscv32_asm:lw(t6, t6, 8), + % Jump to address + I3 = jit_riscv32_asm:jalr(zero, t6, 0), + % Placeholder offset + I4 = <<0:32/little>>, + CodeBlock = <>, + SequenceSize = byte_size(CodeBlock), + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, t6}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> + error({no_available_registers, _LabelLookup}). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, BranchInfo, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, BranchInfo} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, {BranchFunc, Reg, Operand}}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchInstrOffset = Offset + BranchInstrDelta, + BranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + BranchInstrOffset = Offset + BranchInstrDelta, + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + ElseJumpInstr = jit_riscv32_asm:j(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, ElseBranchOffset]), + Stream4 = StreamModule:replace(Stream3, BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_riscv32_asm:j(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> + { + state(), + {beq | bne | blt | bge, atom(), atom() | integer()}, + non_neg_integer() + }. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0) + BranchInstr = jit_riscv32_asm:bge(Reg, zero, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + {State1, {bge, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) + % Load immediate into a temp register for comparison + [Temp | _] = State0#state.available_regs, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than) + BranchInstr = jit_riscv32_asm:bge(Reg, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bge, Reg, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0) + BranchInstr = jit_riscv32_asm:bne(Reg, zero, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(Reg, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, RegB}, 0}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then beq Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal) + BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {beq, Reg, Val}, 0}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then bne Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(RegA, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, {bne, RegA, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal) + BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = jit_riscv32_asm:blt(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {blt, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = jit_riscv32_asm:bge(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bits using ANDI or li+and + TestCode = + if + Val >= -2048 andalso Val =< 2047 -> + %% Can use ANDI instruction directly + jit_riscv32_asm:andi(Temp, Reg, Val); + true -> + %% Need to load immediate into temp register first + TestCode0 = jit_riscv32_asm:li(Temp, Val), + TestCode1 = jit_riscv32_asm:and_(Temp, Reg, Temp), + <> + end, + OffsetBefore = StreamModule:offset(Stream0), + Stream1 = StreamModule:append(Stream0, TestCode), + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% Branch if result is zero (no bits set, NOT != 0) + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {beq, Temp, zero}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Temp, Reg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Reg, Reg), + I2 = jit_riscv32_asm:slli(Reg, Reg, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, {beq, Reg, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + I1 = jit_riscv32_asm:mv(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + State2 = and_(State1#state{available_regs = AT}, Temp, Mask), + Stream2 = State2#state.stream, + %% Compare Temp with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, Val, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need second temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT2] = AT, + State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + Stream3 = State3#state.stream, + BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, MaskReg, 0), + Stream4 = StreamModule:append(Stream3, BranchInstr), + State4 = State3#state{ + stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs] + }, + {State4, {beq, Temp, MaskReg}, BranchDelta} + end; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailRegs + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + State1 = and_(State0, Reg, Mask), + Stream1 = State1#state.stream, + %% Compare Reg with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT] = State1#state.available_regs, + State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + Stream2 = State2#state.stream, + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, MaskReg, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, {beq, Reg, MaskReg}, BranchDelta} + end. + +-spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), [riscv32_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(#state{}, maybe_free_riscv32_register(), non_neg_integer()) -> + {#state{}, riscv32_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_riscv32_asm:slli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, riscv32_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), riscv32_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + % Save RA (like AArch64 saves LR) so it's preserved across jalr calls + SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + + % Calculate available registers + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + % Calculate stack space: round up to 16-byte boundary for RISC-V ABI + NumRegs = length(SavedRegs), + StackBytes = NumRegs * 4, + AlignedStackBytes = ((StackBytes + 15) div 16) * 16, + + Stream1 = push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0), + + % Set up arguments following RISC-V ILP32 calling convention + % Arguments are passed in a0-a7 (up to 8 register arguments) + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + RegArgs0 = Args1, + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + + % We pushed registers to stack, so we can use these registers we saved + % and the currently available registers + SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = SetArgsRegsOnlyAvailableArgs, + used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, + stream = Stream1 + }, + + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must swap it with a free reg. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> + % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + % that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State1#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State1#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; + false -> + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + end; + {primitive, Primitive} -> + [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(State1#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + end, + + State3 = State1#state{ + available_regs = SetArgsAvailableRegs, + used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + stream = Stream3 + }, + + StackOffset = AlignedStackBytes, + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), + Stream4 = State4#state.stream, + + % Call the function pointer (using JALR for call with return) + Call = jit_riscv32_asm:jalr(ra, FuncPtrReg, 0), + Stream5 = StreamModule:append(Stream4, Call), + + % For result, we need a free register (including FuncPtrReg). + % If none are available (all registers were pushed to the stack), + % we write the result to the stack position of FuncPtrReg + {Stream6, UsedRegs2} = + case length(SavedRegs) of + N when N >= 7 andalso element(1, FuncPtrTuple) =:= free -> + % We use original FuncPtrReg then as we know it's available. + % Calculate stack offset: find register index in SavedRegs * 4 bytes + ResultReg = element(2, FuncPtrTuple), + RegIndex = index_of(ResultReg, SavedRegs), + StoreResultStackOffset = RegIndex * 4, + StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + _ -> + % Use any free that is not in SavedRegs + [ResultReg | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + end, + + Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + { + State4#state{ + stream = Stream8, + available_regs = AvailableRegs3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + +index_of(Item, List) -> index_of(Item, List, 0). + +index_of(_, [], _) -> -1; +index_of(Item, [Item | _], Index) -> Index; +index_of(Item, [_ | Rest], Index) -> index_of(Item, Rest, Index + 1). + +push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: addi sp, sp, -AlignedStackBytes then sw reg, offset(sp) for each reg + StackAdjust = jit_riscv32_asm:addi(sp, sp, -AlignedStackBytes), + Stream1 = StreamModule:append(Stream0, StackAdjust), + {Stream2, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Store = jit_riscv32_asm:sw(sp, Reg, Offset), + {StreamModule:append(StreamAcc, Store), Offset + 4} + end, + {Stream1, 0}, + SavedRegs + ), + Stream2; +push_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: lw reg, offset(sp) for each reg then addi sp, sp, AlignedStackBytes + {Stream1, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Load = jit_riscv32_asm:lw(Reg, sp, Offset), + {StreamModule:append(StreamAcc, Load), Offset + 4} + end, + {Stream0, 0}, + SavedRegs + ), + StackAdjust = jit_riscv32_asm:addi(sp, sp, AlignedStackBytes), + StreamModule:append(Stream1, StackAdjust); +pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +set_registers_args(State0, Args, StackOffset) -> + ParamRegs = parameter_regs(Args), + set_registers_args(State0, Args, ParamRegs, StackOffset). + +set_registers_args( + #state{used_regs = UsedRegs} = State0, + Args, + ParamRegs, + StackOffset +) -> + ArgsRegs = args_regs(Args), + AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), + Stream1 = State1#state.stream, + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State1#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +% ILP32: 64-bit arguments require double-word alignment (even register number) +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) -> + parameter_regs0(T, Rest, [a1, a0 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a1, a2, a3 | Rest], Acc) -> + parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) -> + parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> + State; +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to ILP32 +set_registers_args0( + State, + [{avm_int64_t, Value} | ArgsT], + ArgsRegs, + ParamRegs, + AvailGP, + StackOffset +) when is_integer(Value) -> + LowPartUnsigned = Value band 16#FFFFFFFF, + HighPartUnsigned = (Value bsr 32) band 16#FFFFFFFF, + % Convert to signed 32-bit values for RISC-V li instruction + LowPart = + if + LowPartUnsigned > 16#7FFFFFFF -> LowPartUnsigned - 16#100000000; + true -> LowPartUnsigned + end, + HighPart = + if + HighPartUnsigned > 16#7FFFFFFF -> HighPartUnsigned - 16#100000000; + true -> HighPartUnsigned + end, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, + [Arg | ArgsT], + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, + StackOffset +) -> + case lists:member(ParamReg, ArgsRegsT) of + false -> + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); + true -> + [Avail | AvailGPT] = AvailGP, + I = jit_riscv32_asm:mv(Avail, ParamReg), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset + ) + end. + +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + jit_state, + ParamReg, + _StackOffset +) -> + % jit_state is always in a1, so we only need to move it if the param reg is different + case ParamReg of + a1 -> + State; + _ -> + I = jit_riscv32_asm:mv(ParamReg, a1), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1} + end; +% For tail calls, jit_state is already in a1 +set_registers_args1(State, jit_state_tail_call, a1, _StackOffset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> + {XReg, X_REGOffset} = ?X_REG(X), + I = jit_riscv32_asm:lw(Reg, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> + I = jit_riscv32_asm:lw(Reg, Source, 0), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + {y_reg, X}, + Reg, + _StackOffset +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when + ?IS_GPR(ArgReg) +-> + I = jit_riscv32_asm:mv(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(X), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_riscv32_asm:sw(Reg, Src, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State0#state{stream = Stream1}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + State2#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% term_to_float +move_to_vm_register( + #state{ + stream_module = StreamModule, + available_regs = [Temp1, Temp2 | _], + stream = Stream0, + variant = Variant + } = + State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off), + I2 = jit_riscv32_asm:lw(Temp2, Reg, 4), + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8), + I4 = jit_riscv32_asm:lw(Temp2, Reg, 8), + I5 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8 + 4), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 4), + Code = <> + end, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + riscv32_register(), + non_neg_integer() | riscv32_register(), + vm_register() | riscv32_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + {BaseReg, Off} = ?X_REG(X), + I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + I2 = jit_riscv32_asm:sw(Dest, Temp, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Dest, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + {BaseReg, Off} = ?X_REG(X), + I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I4 = Code, + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element( + state(), riscv32_register() | {free, riscv32_register()}, non_neg_integer() +) -> + {state(), riscv32_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | riscv32_register(), riscv32_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:sw(Reg, ValueReg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_riscv32_asm:mv(Temp, IndexReg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, Reg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value() | cp) -> {state(), riscv32_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + {BaseReg, Off} = ?CP, + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + available_regs = [Reg | AvailT], + used_regs = Used + } = State0, + Imm +) when + is_integer(Imm) +-> + State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + {BaseReg, Offset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + Code = ldr_y_reg(Reg, Y, AvailT), + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [RegA, RegB | AvailT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + {fp, RegA, RegB} + }. + +-spec move_to_native_register(state(), value(), riscv32_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) -> + I = jit_riscv32_asm:mv(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + mov_immediate(State, RegDst, ValSrc); +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_riscv32_asm:lw(RegDst, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(RegDst, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(RegDst, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State, + {y_reg, Y}, + RegDst +) -> + Code = ldr_y_reg(RegDst, Y, AT), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {fp_reg, F}, + {fp, RegA, RegB} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), riscv32_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:mv(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + {y_reg, Y} +) -> + I1 = ldr_y_reg(Reg, Y, AvailT), + {BaseReg, Off} = ?CP, + I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + {BaseReg1, Off1} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1), + I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4), + {BaseReg2, Off2} = ?Y_REGS, + I3 = jit_riscv32_asm:sw(BaseReg2, Reg, Off2), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State, + Label +) -> + % Similar to AArch64: use pc_relative_address with a relocation that will be + % resolved to point directly to the label's actual address (not the jump table entry) + Offset = StreamModule:offset(Stream0), + % Emit placeholder for pc_relative_address (auipc + addi) + % The relocation will replace these with the correct offset + I1 = pc_relative_address(Temp, 4), + Reloc = {Label, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]}. + +%% @doc Set the contination to a given offset +%% Return a reference so the offset will be updated with update_branches +%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current +%% code and not too far, so on Thumb we can use adr instruction. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + I1 = pc_relative_address(Temp, 4), + Reloc = {OffsetRef, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%% @doc Implement a continuation entry point. +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + % Load module from jit_state (which is in a1) + I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET), + I2 = jit_riscv32_asm:lw(Reg, Reg, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{ + stream = Stream1, + available_regs = AvailableT, + used_regs = [Reg | UsedRegs0] + }, + Reg + }. + +%% @doc Perform an AND of a register with an immediate. +%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to +%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool +%% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> + I1 = jit_riscv32_asm:slli(Reg, Reg, 8), + I2 = jit_riscv32_asm:srli(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + Stream1 = State1#state.stream, + % RISC-V doesn't have bics, use not + and + I1 = jit_riscv32_asm:not_(Temp, Temp), + I2 = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BICS operation (RISC-V: not + and) + I1 = jit_riscv32_asm:not_(a0, a0), + I2 = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, <>), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, Val), + Stream2 = State1#state.stream, + % Perform ANDS operation + I = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, I), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}. + +or_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:or_(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I = jit_riscv32_asm:addi(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:add(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:add(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= -16#800, Val =< 16#7FF +-> + % RISC-V li can handle 12-bit signed immediates in a single instruction (addi) + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + % For values outside 12-bit range, li will use lui + addi (2 instructions) + % which is efficient enough, no need for literal pool + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +flush_literal_pool(#state{literal_pool = []} = State) -> + State; +flush_literal_pool( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State +) -> + % Align + Offset = StreamModule:offset(Stream0), + Stream1 = + if + Offset rem 4 =:= 0 -> Stream0; + true -> StreamModule:append(Stream0, <<0:16>>) + end, + % Lay all values and update ldr instructions + Stream2 = lists:foldl( + fun({LdrInstructionAddr, Reg, Val}, AccStream) -> + LiteralPosition = StreamModule:offset(AccStream), + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + LdrInstruction = jit_riscv32_asm:lw(Reg, pc, LiteralOffset), + AccStream1 = StreamModule:append(AccStream, <>), + StreamModule:replace( + AccStream1, LdrInstructionAddr, LdrInstruction + ) + end, + Stream1, + lists:reverse(LP) + ), + State#state{stream = Stream2, literal_pool = []}. + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I1 = jit_riscv32_asm:addi(Reg, Reg, -Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:sub(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 1), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 2), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 4), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:mul(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + +%% +%% Analysis of AArch64 pattern and RISC-V32 implementation: +%% +%% AArch64 layout (from call_ext_only_test): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: b.ne 0x20 ; Branch if reductions != 0 to continuation +%% 0x10-0x1c: adr/str/ldr/br sequence for scheduling next process +%% 0x20: [CONTINUATION POINT] - Actual function starts here +%% +%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: bne continuation ; Branch if reductions != 0 to continuation +%% 0x10-0x?: adr/sw/ldr/jalr sequence for scheduling next process +%% continuation: [actual function body] +%% +%% Key insight: With 32 registers, RISC-V32 doesn't need prolog/epilog like ARM Thumb. +%% When reductions != 0, we branch directly to continue execution. +%% When reductions == 0, we schedule the next process, and resume at the continuation point. +%% +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = jit_riscv32_asm:bne(Temp, zero, 0), + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + I5 = pc_relative_address(Temp, 0), + I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset), + NewI5 = pc_relative_address(Temp, NewOffset - ADROffset), + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + Label +) -> + % Load reduction count (jit_state is in a1) + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + % Use trampoline technique: branch if zero (eq) to skip over the long branch + % If not zero, we want to continue execution at Label + % If zero, we want to fall through to scheduling code + + % Look up label once to avoid duplicate lookup in helper + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + BccOffset = StreamModule:offset(Stream1), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + % Label is known, check if we can optimize the conditional branch + % After branch instruction + Rel = LabelOffset - BccOffset, + + if + Rel >= -4096 andalso Rel =< 4094 andalso (Rel rem 2) =:= 0 -> + % Near branch: use direct conditional branch (RISC-V has ±4KB range) + + % Branch if NOT zero (temp != 0) + I4 = jit_riscv32_asm:bne(Temp, zero, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + % Far branch: use trampoline with helper + % Get the code block size for the far branch sequence that will follow + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + % Skip over the far branch sequence if zero (temp == 0) + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + % Label not known, get the far branch size for the skip + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +-spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}. +set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) -> + % Reserve a temporary register for the offset BEFORE calling get_module_index + % to avoid running out of available registers + State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]}, + % get module index (dynamically) + { + #state{stream_module = StreamModule, stream = Stream0} = State1, + Reg + } = get_module_index( + State0b + ), + + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_riscv32_asm:slli(Reg, Reg, 24), + % Reserve space for offset load instruction + % li can generate 1 instruction (4 bytes) for small immediates (< 2048) + % or 2 instructions (8 bytes) for large immediates + % Since we use (offset bsl 2), threshold is when offset >= 512 bytes + % To be safe, use same threshold as AArch64 relative to instruction encoding limits + {I2, I3} = + if + Offset >= 512 -> + % Need 2 instructions (lui + addi) for large offsets + {jit_riscv32_asm:nop(), jit_riscv32_asm:nop()}; + true -> + % Need 1 instruction (addi) for small offsets + {jit_riscv32_asm:nop(), <<>>} + end, + MOVOffset = Offset + byte_size(I1), + % OR the module index with the offset (loaded in temp register) + I4 = jit_riscv32_asm:or_(Reg, TempReg), + {BaseReg, Off} = ?CP, + I5 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + State4 = free_native_register(State3, TempReg), + {State4, MOVOffset, TempReg}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), riscv32_register()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + TempReg +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + NewMoveInstr = jit_riscv32_asm:li(TempReg, NewOffset bsl 2), + Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), + State0#state{stream = Stream1}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + TermReg +) -> + {BaseReg1, Off1} = ?BS, + I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1), + I2 = jit_riscv32_asm:li(Temp, 0), + {BaseReg2, Off2} = ?BS_OFFSET, + I3 = jit_riscv32_asm:sw(BaseReg2, Temp, Off2), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + I1 = pc_relative_address(a0, 12), + I2 = jit_riscv32_asm:ret(), + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% @doc Generate PC-relative address calculation using AUIPC + ADDI +%% This replaces the ARM-style 'adr' pseudo-instruction with native RISC-V instructions +-spec pc_relative_address(riscv32_register(), integer()) -> binary(). +pc_relative_address(Rd, 0) -> + % Simple case: just get current PC + jit_riscv32_asm:auipc(Rd, 0); +pc_relative_address(Rd, Offset) -> + % PC-relative address calculation + % Split offset into upper 20 bits and lower 12 bits + % AUIPC can represent offsets in range: (-524288 << 12) to (524287 << 12) + % Combined with ADDI: (-524288 << 12) - 2048 to (524287 << 12) + 2047 + Lower = Offset band 16#FFF, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + % Compute upper 20 bits, adjusting if lower is negative + % Use arithmetic right shift (bsr) which preserves sign in Erlang + Upper = + if + LowerSigned < 0 -> + (Offset bsr 12) + 1; + true -> + Offset bsr 12 + end, + % Validate that Upper is in valid range for AUIPC + if + Upper < -16#80000; Upper > 16#7FFFF -> + error({offset_out_of_range, Offset, Upper, -16#80000, 16#7FFFF}); + true -> + ok + end, + case {Upper, LowerSigned} of + {0, 0} -> + % Zero offset + jit_riscv32_asm:auipc(Rd, 0); + {0, _} -> + % Only lower bits needed: auipc + addi + AuipcInstr = jit_riscv32_asm:auipc(Rd, 0), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <>; + {_, 0} -> + % Only upper bits needed + jit_riscv32_asm:auipc(Rd, Upper); + {_, _} -> + % Both upper and lower bits + AuipcInstr = jit_riscv32_asm:auipc(Rd, Upper), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <> + end. + +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4), + <>; +str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> + % Large offset - use register arithmetic with second available register + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:li(TempReg2, Offset), + I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1), + I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0), + <>; +str_y_reg(SrcReg, Y, TempReg1, []) -> + % Large offset - no additional registers available, use IP_REG as second temp + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, TempReg1), + I3 = jit_riscv32_asm:li(TempReg1, Offset), + I4 = jit_riscv32_asm:add(TempReg1, TempReg1, ?IP_REG), + I5 = jit_riscv32_asm:sw(TempReg1, SrcReg, 0), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, [TempReg | _]) -> + % Large offset - use DstReg as second temp register for arithmetic + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:li(DstReg, Offset), + I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg), + I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>; +ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> + % Small offset, no registers available - use DstReg as temp + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, []) -> + % Large offset, no registers available - use IP_REG as temp register + % Note: IP_REG (t3) can only be used with mov, not ldr directly + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, DstReg), + I3 = jit_riscv32_asm:li(DstReg, Offset), + I4 = jit_riscv32_asm:add(DstReg, DstReg, ?IP_REG), + I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>. + +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset. Eventually align it with a nop. +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + {State1, Offset1} = + if + Offset0 rem 4 =:= 0 -> + {State0, Offset0}; + true -> + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:nop()), + {State0#state{stream = Stream1}, Offset0 + 2} + end, + add_label(State1, Label, Offset1). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 35330fdecc..db7914438b 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -209,3 +209,4 @@ X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m") +X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32") diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 88b73e6bb5..8704bd6db1 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -97,6 +97,20 @@ _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->rema _Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 +_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_riscv32.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_riscv32.erl"); + +_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); + #else #error Unknown jit target #endif @@ -144,7 +158,7 @@ static void destroy_extended_registers(Context *ctx, unsigned int live) static void jit_trim_live_regs(Context *ctx, uint32_t live) { - TRACE("jit_trim_live_regs: ctx->process_id = %d, live = %d\n", ctx->process_id, live); + TRACE("jit_trim_live_regs: ctx->process_id = %" PRId32 ", live = %" PRIu32 "\n", ctx->process_id, live); if (UNLIKELY(!list_is_empty(&ctx->extended_x_regs))) { destroy_extended_registers(ctx, live); } @@ -184,8 +198,8 @@ static Context *jit_return(Context *ctx, JITState *jit_state) static Context *jit_terminate_context(Context *ctx, JITState *jit_state) { - TRACE("jit_terminate_context: ctx->process_id = %d\n", ctx->process_id); - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 "\n", ctx->process_id); + TRACE("-- Code execution finished for %" PRId32 "--\n", ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); @@ -197,7 +211,7 @@ static Context *jit_terminate_context(Context *ctx, JITState *jit_state) static Context *jit_handle_error(Context *ctx, JITState *jit_state, int offset) { - TRACE("jit_terminate_context: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); if (offset || term_is_invalid_term(ctx->x[2])) { ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, ctx->x[0]); } @@ -264,14 +278,14 @@ static void set_error(Context *ctx, JITState *jit_state, int offset, term error_ static Context *jit_raise_error(Context *ctx, JITState *jit_state, int offset, term error_type_atom) { - TRACE("jit_raise_error: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); set_error(ctx, jit_state, offset, error_type_atom); return jit_handle_error(ctx, jit_state, 0); } static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int offset, term error_atom, term arg1) { - TRACE("jit_raise_error_tuple: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error_tuple: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); // We can gc as we are raising if (UNLIKELY(memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, &arg1, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { set_error(ctx, jit_state, offset, OUT_OF_MEMORY_ATOM); @@ -288,7 +302,7 @@ static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int off static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term stacktrace, term exc_value) { - TRACE("jit_raise: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); ctx->x[0] = stacktrace_exception_class(stacktrace); ctx->x[1] = exc_value; ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, stacktrace); @@ -297,7 +311,7 @@ static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term st static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_next_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_next_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -306,7 +320,7 @@ static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) static Context *jit_schedule_wait_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_wait_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_wait_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -444,7 +458,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = bif->bif2_ptr(ctx, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -474,7 +488,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = gcbif->gcbif2_ptr(ctx, 0, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -497,7 +511,7 @@ static term jit_module_get_atom_term_by_id(JITState *jit_state, int atom_index) static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, uint32_t heap_need, uint32_t live) { - TRACE("jit_allocate: stack_need=%u heap_need=%u live=%u\n", stack_need, heap_need, live); + TRACE("jit_allocate: ENTRY ctx=%p jit_state=%p stack_need=%" PRIu32 " heap_need=%" PRIu32 " live=%" PRIu32 "\n", (void*)ctx, (void*)jit_state, stack_need, heap_need, live); if (ctx->heap.root->next || ((ctx->heap.heap_ptr + heap_need > ctx->e - (stack_need + 1)))) { TRIM_LIVE_REGS(live); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need + stack_need + 1, live, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { @@ -512,7 +526,7 @@ static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) { - TRACE("jit_get_imported_bif: bif=%u\n", bif); + TRACE("jit_get_imported_bif: bif=%" PRIu32 "\n", bif); const struct ExportedFunction *exported_bif = jit_state->module->imported_funcs[bif]; const BifImpl0 result = EXPORTED_FUNCTION_TO_BIF(exported_bif)->bif0_ptr; return result; @@ -520,7 +534,7 @@ static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) static bool jit_deallocate(Context *ctx, JITState *jit_state, uint32_t n_words) { - TRACE("jit_deallocate: n_words=%u\n", n_words); + TRACE("jit_deallocate: n_words=%" PRIu32 "\n", n_words); ctx->cp = ctx->e[n_words]; ctx->e += n_words + 1; // Hopefully, we only need x[0] @@ -545,7 +559,7 @@ static TermCompareResult jit_term_compare(Context *ctx, JITState *jit_state, ter static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, uint32_t live_registers) { - TRACE("jit_test_heap: heap_need=%u live_registers=%u\n", heap_need, live_registers); + TRACE("jit_test_heap: heap_need=%" PRIu32 " live_registers=%" PRIu32 "\n", heap_need, live_registers); size_t heap_free = context_avail_free_memory(ctx); // if we need more heap space than is currently free, then try to GC the needed space if (heap_free < heap_need) { @@ -559,7 +573,7 @@ static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, } else if (heap_free > heap_need * HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF) { TRIM_LIVE_REGS(live_registers); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need * (HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF / 2), live_registers, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { - TRACE("Unable to ensure free memory. heap_need=%i\n", heap_need); + TRACE("Unable to ensure free memory. heap_need=%" PRIu32 "\n", heap_need); set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM); return false; } @@ -640,13 +654,13 @@ static term maybe_alloc_boxed_integer_fragment(Context *ctx, avm_int64_t value) static term jit_term_alloc_tuple(Context *ctx, uint32_t size) { - TRACE("jit_term_alloc_tuple: size=%u\n", size); + TRACE("jit_term_alloc_tuple: size=%" PRIu32 "\n", size); return term_alloc_tuple(size, &ctx->heap); } static term jit_term_alloc_fun(Context *ctx, JITState *jit_state, uint32_t fun_index, uint32_t numfree) { - TRACE("jit_term_alloc_fun: fun_index=%u numfree=%u\n", fun_index, numfree); + TRACE("jit_term_alloc_fun: fun_index=%" PRIu32 " numfree=%" PRIu32 "\n", fun_index, numfree); size_t size = numfree + BOXED_FUN_SIZE; term *boxed_func = memory_heap_alloc(&ctx->heap, size); @@ -852,7 +866,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state) static term jit_mailbox_peek(Context *ctx) { - TRACE("jit_mailbox_peek: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id); term out = term_invalid_term(); mailbox_peek(ctx, &out); return out; @@ -860,26 +874,26 @@ static term jit_mailbox_peek(Context *ctx) static void jit_mailbox_remove_message(Context *ctx) { - TRACE("jit_mailbox_remove_message: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_remove_message(&ctx->mailbox, &ctx->heap); } static void jit_timeout(Context *ctx) { - TRACE("jit_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); mailbox_reset(&ctx->mailbox); } static void jit_mailbox_next(Context *ctx) { - TRACE("jit_mailbox_next: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_next: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_next(&ctx->mailbox); } static void jit_cancel_timeout(Context *ctx) { - TRACE("jit_cancel_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_cancel_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); if (context_get_flags(ctx, WaitingTimeout | WaitingTimeoutExpired)) { scheduler_cancel_timeout(ctx); } @@ -887,7 +901,7 @@ static void jit_cancel_timeout(Context *ctx) static void jit_clear_timeout_flag(Context *ctx) { - TRACE("jit_clear_timeout_flag: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_clear_timeout_flag: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); } diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index ee53259886..c7764166dd 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -173,6 +173,7 @@ enum TrapAndLoadResult #define JIT_ARCH_X86_64 1 #define JIT_ARCH_AARCH64 2 #define JIT_ARCH_ARMV6M 3 +#define JIT_ARCH_RISCV32 4 #define JIT_VARIANT_PIC 1 #define JIT_VARIANT_FLOAT32 2 @@ -194,6 +195,11 @@ enum TrapAndLoadResult #define JIT_JUMPTABLE_ENTRY_SIZE 12 #endif +#if defined(__riscv) && (__riscv_xlen == 32) +#define JIT_ARCH_TARGET JIT_ARCH_RISCV32 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#endif + #ifndef JIT_ARCH_TARGET #error Unknown JIT target #endif diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 108d5027d8..3500992fb1 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -38,6 +38,9 @@ #include #include +// #define ENABLE_TRACE +#include "trace.h" + #ifdef WITH_ZLIB #include #endif diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index e3d76eba41..17fdce2a20 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -5688,6 +5688,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) return JIT_AARCH64_ATOM; #elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M return JIT_ARMV6M_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + return JIT_RISCV32_ATOM; #else #error Unknown JIT target #endif diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index d8fc4106b0..e9d49698a0 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -7444,7 +7444,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) } terminate_context: - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("-- Code execution finished for %i--\n", (int) ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 9dec6ec5f3..1212a15fe9 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,11 +51,23 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) - project(atomvm-esp32) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") + else() + message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) + message(STATUS "JIT compilation disabled") +endif() + # esp-idf does not use compile_feature but instead sets version in # c_compile_options # Ensure project is compiled with at least C11 diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt index ebcedd3b57..8156bb2ac8 100644 --- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt +++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt @@ -25,6 +25,7 @@ set(AVM_SYS_COMPONENT_SRCS "sys.c" "platform_nifs.c" "platform_defaultatoms.c" + "jit_stream_flash.c" "../../../../libAtomVM/inet.c" "../../../../libAtomVM/otp_crypto.c" "../../../../libAtomVM/otp_net.c" diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c index 8318ae759a..ec229d70b2 100644 --- a/src/platforms/esp32/components/avm_sys/sys.c +++ b/src/platforms/esp32/components/avm_sys/sys.c @@ -807,3 +807,28 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) UNUSED(global); #endif } + +#ifndef AVM_NO_JIT +#include + +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + uintptr_t addr = (uintptr_t) (native_code + offset); + +#if defined(CONFIG_IDF_TARGET_ARCH_RISCV) + // On RISC-V ESP32 targets, native code in flash needs to be accessed + // through the instruction cache (IROM) not data cache (DROM) +#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2) + // ESP32-C3 and C2 have separate DROM and IROM regions + if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) { + // Convert from data cache address to instruction cache address + addr = addr - SOC_DROM_LOW + SOC_IROM_LOW; + } +#endif + // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed +#endif + + return (ModuleNativeEntryPoint) addr; +} +#endif diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index 97580dbfea..c8e3ede411 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -32,6 +32,6 @@ if (HAVE_PLATFORM_ATOMIC_H) endif() target_link_libraries(${COMPONENT_LIB} - INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11) diff --git a/src/platforms/esp32/main/Kconfig.projbuild b/src/platforms/esp32/main/Kconfig.projbuild index 88bf92aa1a..1eba944ed7 100755 --- a/src/platforms/esp32/main/Kconfig.projbuild +++ b/src/platforms/esp32/main/Kconfig.projbuild @@ -39,5 +39,11 @@ menu "AtomVM configuration" depends on USE_USB_SERIAL help Enable TinyUSB CDC functionality if USE_USB_SERIAL is enabled. + + config JIT_ENABLED + bool "Enable just in time compilation" + default n + help + Enable Just in time compilation, or just execution of precompiled native code endmenu diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt index 2d97d91345..cee138d34c 100644 --- a/src/platforms/esp32/test/CMakeLists.txt +++ b/src/platforms/esp32/test/CMakeLists.txt @@ -57,8 +57,16 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# This must be set before project() so libAtomVM is configured correctly +if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") +else() + message(STATUS "JIT is not supported on ${IDF_TARGET} (Xtensa architecture) - using interpreter") + set(AVM_DISABLE_JIT ON) +endif() project(atomvm-esp32-test) diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt index e2d67269e8..dc4789f374 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt +++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt @@ -20,11 +20,31 @@ add_library(esp32_test_modules) +include(ExternalProject) +if(NOT AVM_DISABLE_JIT) +set(host_atomvm_jit_target "--target=jit") +else() +set(host_atomvm_jit_target "") +endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." + BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM ) +macro(jit_precompile module_name) + if(NOT AVM_DISABLE_JIT) + add_custom_command( + OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam + COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH} + && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam + DEPENDS ${module_name}.beam HostAtomVM + COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}" + VERBATIM + ) + endif() +endmacro() + function(compile_erlang module_name) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" @@ -33,6 +53,7 @@ function(compile_erlang module_name) COMMENT "Compiling ${module_name}.erl" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + jit_precompile(${module_name}) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() @@ -55,46 +76,44 @@ compile_erlang(test_time_and_processes) compile_erlang(test_twdt) compile_erlang(test_tz) +set(erlang_test_beams + test_esp_partition.beam + test_file.beam + test_wifi_example.beam + test_list_to_atom.beam + test_list_to_binary.beam + test_md5.beam + test_crypto.beam + test_monotonic_time.beam + test_mount.beam + test_net.beam + test_rtc_slow.beam + test_select.beam + test_socket.beam + test_ssl.beam + test_time_and_processes.beam + test_twdt.beam + test_tz.beam +) + +if(NOT AVM_DISABLE_JIT) + set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) + list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) + set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) +else() + set(erlang_test_beams_to_package ${erlang_test_beams}) + set(erlang_test_beams_depends ${erlang_test_beams}) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i esp32_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm - test_esp_partition.beam - test_file.beam - test_wifi_example.beam - test_list_to_atom.beam - test_list_to_binary.beam - test_md5.beam - test_crypto.beam - test_monotonic_time.beam - test_mount.beam - test_net.beam - test_rtc_slow.beam - test_select.beam - test_socket.beam - test_ssl.beam - test_time_and_processes.beam - test_twdt.beam - test_tz.beam + ${erlang_test_beams_to_package} DEPENDS HostAtomVM - "${CMAKE_CURRENT_BINARY_DIR}/test_esp_partition.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_wifi_example.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_file.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_atom.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_binary.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_md5.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_monotonic_time.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_mount.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_net.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_rtc_slow.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_select.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_socket.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_ssl.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_time_and_processes.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_twdt.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_tz.beam" + ${erlang_test_beams_depends} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM ) diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 9bc1c8c78b..45473d9f10 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -30,6 +30,7 @@ set(ERLANG_MODULES jit_aarch64_asm_tests jit_armv6m_tests jit_armv6m_asm_tests + jit_riscv32_tests jit_riscv32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl new file mode 100644 index 0000000000..4a4fba5593 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -0,0 +1,3419 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_riscv32). + +% disassembly obtained with: +% arm-elf-objdump -b binary -D dump.bin -M arm + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00462f83 lw t6,4(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00862f83 lw t6,8(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 02a00593 li a1,42\n" + " 1c: 02b00613 li a2,43\n" + " 20: 02c00693 li a3,44\n" + " 24: 000f80e7 jalr t6\n" + " 28: 00050f93 mv t6,a0\n" + " 2c: 00012083 lw ra,0(sp)\n" + " 30: 00412503 lw a0,4(sp)\n" + " 34: 00812583 lw a1,8(sp)\n" + " 38: 00c12603 lw a2,12(sp)\n" + " 3c: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01462f83 lw t6,20(a2)\n" + " 4: 01000613 li a2,16\n" + " 8: 02000693 li a3,32\n" + " c: 00200713 li a4,2\n" + " 10: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 00300f13 li t5,3\n" + " 8: ffff4f13 not t5,t5\n" + " c: 01efffb3 and t6,t6,t5\n" + " 10: 01c52f03 lw t5,28(a0)\n" + " 14: 0b800e93 li t4,184\n" + " 18: 00ce8eb3 add t4,t4,a2\n" + " 1c: 000eae83 lw t4,0(t4)\n" + " 20: ff010113 addi sp,sp,-16\n" + " 24: 00112023 sw ra,0(sp)\n" + " 28: 00a12223 sw a0,4(sp)\n" + " 2c: 00b12423 sw a1,8(sp)\n" + " 30: 00c12623 sw a2,12(sp)\n" + " 34: 000f8613 mv a2,t6\n" + " 38: 04000693 li a3,64\n" + " 3c: 00800713 li a4,8\n" + " 40: 000f0793 mv a5,t5\n" + " 44: 000e80e7 jalr t4\n" + " 48: 00050e93 mv t4,a0\n" + " 4c: 00012083 lw ra,0(sp)\n" + " 50: 00412503 lw a0,4(sp)\n" + " 54: 00812583 lw a1,8(sp)\n" + " 58: 00c12603 lw a2,12(sp)\n" + " 5c: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 04862f83 lw t6,72(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 01300593 li a1,19\n" + " 1c: 000f80e7 jalr t6\n" + " 20: 00050f93 mv t6,a0\n" + " 24: 00012083 lw ra,0(sp)\n" + " 28: 00412503 lw a0,4(sp)\n" + " 2c: 00812583 lw a1,8(sp)\n" + " 30: 00c12603 lw a2,12(sp)\n" + " 34: 01010113 addi sp,sp,16\n" + " 38: 04862f03 lw t5,72(a2)\n" + " 3c: fe010113 addi sp,sp,-32\n" + " 40: 00112023 sw ra,0(sp)\n" + " 44: 00a12223 sw a0,4(sp)\n" + " 48: 00b12423 sw a1,8(sp)\n" + " 4c: 00c12623 sw a2,12(sp)\n" + " 50: 01f12823 sw t6,16(sp)\n" + " 54: 01400593 li a1,20\n" + " 58: 000f00e7 jalr t5\n" + " 5c: 00050f13 mv t5,a0\n" + " 60: 00012083 lw ra,0(sp)\n" + " 64: 00412503 lw a0,4(sp)\n" + " 68: 00812583 lw a1,8(sp)\n" + " 6c: 00c12603 lw a2,12(sp)\n" + " 70: 01012f83 lw t6,16(sp)\n" + " 74: 02010113 addi sp,sp,32\n" + " 78: 04862e83 lw t4,72(a2)\n" + " 7c: fe010113 addi sp,sp,-32\n" + " 80: 00112023 sw ra,0(sp)\n" + " 84: 00a12223 sw a0,4(sp)\n" + " 88: 00b12423 sw a1,8(sp)\n" + " 8c: 00c12623 sw a2,12(sp)\n" + " 90: 01e12823 sw t5,16(sp)\n" + " 94: 01f12a23 sw t6,20(sp)\n" + " 98: 01300593 li a1,19\n" + " 9c: 000e80e7 jalr t4\n" + " a0: 00050e93 mv t4,a0\n" + " a4: 00012083 lw ra,0(sp)\n" + " a8: 00412503 lw a0,4(sp)\n" + " ac: 00812583 lw a1,8(sp)\n" + " b0: 00c12603 lw a2,12(sp)\n" + " b4: 01012f03 lw t5,16(sp)\n" + " b8: 01412f83 lw t6,20(sp)\n" + " bc: 02010113 addi sp,sp,32\n" + " c0: 03462e03 lw t3,52(a2)\n" + " c4: fe010113 addi sp,sp,-32\n" + " c8: 00112023 sw ra,0(sp)\n" + " cc: 00a12223 sw a0,4(sp)\n" + " d0: 00b12423 sw a1,8(sp)\n" + " d4: 00c12623 sw a2,12(sp)\n" + " d8: 01d12823 sw t4,16(sp)\n" + " dc: 000fa583 lw a1,0(t6)\n" + " e0: 000f2603 lw a2,0(t5)\n" + " e4: 000e00e7 jalr t3\n" + " e8: 00050e13 mv t3,a0\n" + " ec: 00012083 lw ra,0(sp)\n" + " f0: 00412503 lw a0,4(sp)\n" + " f4: 00812583 lw a1,8(sp)\n" + " f8: 00c12603 lw a2,12(sp)\n" + " fc: 01012e83 lw t4,16(sp)\n" + " 100: 02010113 addi sp,sp,32\n" + " 104: 01cea023 sw t3,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, 1), + {State2, t5} = ?BACKEND:move_to_native_register(State1, 2), + {State3, t4} = ?BACKEND:move_to_native_register(State2, 3), + {State4, t3} = ?BACKEND:move_to_native_register(State3, 4), + {State5, t2} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + t5, t6, {free, t3}, t4, {free, t2} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, t5, t6, t4]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: 00100f93 li t6,1\n" + " 4: 00200f13 li t5,2\n" + " 8: 00300e93 li t4,3\n" + " c: 00400e13 li t3,4\n" + " 10: 00500393 li t2,5\n" + " 14: 0e400313 li t1,228\n" + " 18: 00c30333 add t1,t1,a2\n" + " 1c: 00032303 lw t1,0(t1)\n" + " 20: fe010113 addi sp,sp,-32\n" + " 24: 00112023 sw ra,0(sp)\n" + " 28: 00a12223 sw a0,4(sp)\n" + " 2c: 00b12423 sw a1,8(sp)\n" + " 30: 00c12623 sw a2,12(sp)\n" + " 34: 01d12823 sw t4,16(sp)\n" + " 38: 01e12a23 sw t5,20(sp)\n" + " 3c: 01f12c23 sw t6,24(sp)\n" + " 40: 000f0513 mv a0,t5\n" + " 44: 000f8593 mv a1,t6\n" + " 48: 000e0613 mv a2,t3\n" + " 4c: 000e8693 mv a3,t4\n" + " 50: 00038713 mv a4,t2\n" + " 54: 000300e7 jalr t1\n" + " 58: fea12e23 sw a0,-4(sp)\n" + " 5c: 00012083 lw ra,0(sp)\n" + " 60: 00412503 lw a0,4(sp)\n" + " 64: 00812583 lw a1,8(sp)\n" + " 68: 00c12603 lw a2,12(sp)\n" + " 6c: 01012e83 lw t4,16(sp)\n" + " 70: 01412f03 lw t5,20(sp)\n" + " 74: 01812f83 lw t6,24(sp)\n" + " 78: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 00200693 li a3,2\n" + " 2c: 00200713 li a4,2\n" + " 30: fff00793 li a5,-1\n" + " 34: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04c62f03 lw t5,76(a2)\n" + " 8: 00800613 li a2,8\n" + " c: 2cb00693 li a3,715\n" + " 10: 000f8713 mv a4,t6\n" + " 14: 000f0067 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 00200693 li a3,2\n" + " 2c: 00200713 li a4,2\n" + " 30: 00a00793 li a5,10\n" + " 34: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 02a00613 li a2,42\n" + " 8: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16\n" + " 34: 00af8463 beq t6,a0,0x3c\n" + " 38: 000f8513 mv a0,t6\n" + " 3c: 00008067 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(t5, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16\n" + " 34: 000f8f13 mv t5,t6\n" + " 38: 00af0463 beq t5,a0,0x40\n" + " 3c: 000f0513 mv a0,t5\n" + " 40: 00008067 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 05f52e23 sw t6,92(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 01cf8f93 addi t6,t6,28\n" + " 8: 01f52a23 sw t6,20(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000fd463 bgez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd463 bge t6,t5,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01dfd463 bge t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 1024}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01dfd463 bge t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2\n" + " 14: 0ec0006f j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', -1}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: fff00e93 li t4,-1\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + % Test large immediate (1995) that requires temporary register + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', 1995}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 1) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 7cb00e93 li t4,1995\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 001f0f13 addi t5,t5,1\n" + " 14: 0ec0006f j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec463 bltz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec463 bltz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed463 bgez t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed463 bgez t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 005ffe93 andi t4,t6,5\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffce93 not t4,t6\n" + " c: 01ce9e93 slli t4,t4,0x1c\n" + " 10: 000e8463 beqz t4,0x18\n" + " 14: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffcf93 not t6,t6\n" + " c: 01cf9f93 slli t6,t6,0x1c\n" + " 10: 000f8463 beqz t6,0x18\n" + " 14: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f8e93 mv t4,t6\n" + " c: 03f00e13 li t3,63\n" + " 10: 01cefeb3 and t4,t4,t3\n" + " 14: 00800e13 li t3,8\n" + " 18: 01ce8463 beq t4,t3,0x20\n" + " 1c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd463 bge t6,t5,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + { + {free, RegA}, + '&', + ?TERM_BOXED_TAG_MASK, + '!=', + ?TERM_BOXED_POSITIVE_INTEGER + }, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03f00e93 li t4,63\n" + " c: 01dfffb3 and t6,t6,t4\n" + " 10: 00800e93 li t4,8\n" + " 14: 01df8463 beq t6,t4,0x1c\n" + " 18: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + %% Test {RegA, '&', 16#3, '!=', 0} using ANDI instruction + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#3, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 003ffe93 andi t4,t6,3\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9663 bne t6,t4,0x18\n" + " 10: 002f0f13 addi t5,t5,2\n" + " 14: 0080006f j 0x1c\n" + " 18: 004f0f13 addi t5,t5,4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf93 srli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf13 srli t5,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003f9f93 slli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 05468067 jr 84(a3)\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3)\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 03c68067 jr 60(a3)\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: ffff8f93 addi t6,t6,-1\n" + " 20: 01f5a423 sw t6,8(a1)\n" + " 24: 000f8a63 beqz t6,0x38\n" + " 28: 0240006f j 0x4c\n" + " 2c: 00000013 nop\n" + " 30: 00000013 nop\n" + " 34: 00000013 nop\n" + " 38: 00000f97 auipc t6,0x0\n" + " 3c: 014f8f93 addi t6,t6,20\n" + " 40: 01f5a223 sw t6,4(a1)\n" + " 44: 00862f83 lw t6,8(a2)\n" + " 48: 000f8067 jr t6\n" + " 4c: 00062f83 lw t6,0(a2)\n" + " 50: 000f8067 jr t6\n" + " 54: 00462f83 lw t6,4(a2)\n" + " 58: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with large gap (256+ bytes) to force mov_immediate path +call_only_or_schedule_next_and_label_relocation_large_gap_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the actual function bodies + % Each operation emits ~2 bytes, so 128 operations = ~256 bytes + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, a3) + end, + State1, + lists:seq(1, 128) + ), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + % Extract the final section starting at 0x218 (after jump table 24 bytes + 128 loads 512 bytes) + % RISC-V: Jump table is 3×8=24 bytes, loads are 4 bytes each + Dump = << + " 218: 0085af83 lw t6,8(a1)\n" + " 21c: ffff8f93 addi t6,t6,-1\n" + " 220: 01f5a423 sw t6,8(a1)\n" + " 224: 000f8a63 beqz t6,0x238\n" + " 228: 0240006f j 0x24c\n" + " 22c: 00000013 nop\n" + " 230: 00000013 nop\n" + " 234: 00000013 nop\n" + " 238: 00000f97 auipc t6,0x0\n" + " 23c: 014f8f93 addi t6,t6,20\n" + " 240: 01f5a223 sw t6,4(a1)\n" + " 244: 00862f83 lw t6,8(a2)\n" + " 248: 000f8067 jr t6\n" + " 24c: 00062f83 lw t6,0(a2)\n" + " 250: 000f8067 jr t6\n" + " 254: 00462f83 lw t6,4(a2)\n" + " 258: 000f8067 jr t6" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#218), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 00058513 mv a0,a1\n" + " 1c: 00200593 li a1,2\n" + " 20: 000f80e7 jalr t6\n" + " 24: 00050f93 mv t6,a0\n" + " 28: 00012083 lw ra,0(sp)\n" + " 2c: 00412503 lw a0,4(sp)\n" + " 30: 00812583 lw a1,8(sp)\n" + " 34: 00c12603 lw a2,12(sp)\n" + " 38: 01010113 addi sp,sp,16\n" + " 3c: 03c62f03 lw t5,60(a2)\n" + " 40: fe010113 addi sp,sp,-32\n" + " 44: 00112023 sw ra,0(sp)\n" + " 48: 00a12223 sw a0,4(sp)\n" + " 4c: 00b12423 sw a1,8(sp)\n" + " 50: 00c12623 sw a2,12(sp)\n" + " 54: 01f12823 sw t6,16(sp)\n" + " 58: 3b7ff5b7 lui a1,0x3b7ff\n" + " 5c: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" + " 60: 000f00e7 jalr t5\n" + " 64: 00050f13 mv t5,a0\n" + " 68: 00012083 lw ra,0(sp)\n" + " 6c: 00412503 lw a0,4(sp)\n" + " 70: 00812583 lw a1,8(sp)\n" + " 74: 00c12603 lw a2,12(sp)\n" + " 78: 01012f83 lw t6,16(sp)\n" + " 7c: 02010113 addi sp,sp,32\n" + " 80: ff010113 addi sp,sp,-16\n" + " 84: 00112023 sw ra,0(sp)\n" + " 88: 00a12223 sw a0,4(sp)\n" + " 8c: 00b12423 sw a1,8(sp)\n" + " 90: 00c12623 sw a2,12(sp)\n" + " 94: 00000593 li a1,0\n" + " 98: 00100613 li a2,1\n" + " 9c: 01852683 lw a3,24(a0)\n" + " a0: 000f0713 mv a4,t5\n" + " a4: 000f80e7 jalr t6\n" + " a8: 00050f93 mv t6,a0\n" + " ac: 00012083 lw ra,0(sp)\n" + " b0: 00412503 lw a0,4(sp)\n" + " b4: 00812583 lw a1,8(sp)\n" + " b8: 00c12603 lw a2,12(sp)\n" + " bc: 01010113 addi sp,sp,16\n" + " c0: 000f9863 bnez t6,0xd0\n" + " c4: 01862f83 lw t6,24(a2)\n" + " c8: 0c800613 li a2,200\n" + " cc: 000f8067 jr t6\n" + " d0: 01f52c23 sw t6,24(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 00300f13 li t5,3\n" + " 8: ffff4f13 not t5,t5\n" + " c: 01efffb3 and t6,t6,t5\n" + " 10: 004fae83 lw t4,4(t6)\n" + " 14: 01452f03 lw t5,20(a0)\n" + " 18: 01df2223 sw t4,4(t5)\n" + " 1c: 000fae83 lw t4,0(t6)\n" + " 20: 01452f03 lw t5,20(a0)\n" + " 24: 01df2023 sw t4,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 01cf1f13 slli t5,t5,0x1c\n" + " c: 040f0c63 beqz t5,0x64\n" + " 10: 000f8f13 mv t5,t6\n" + " 14: 00300e93 li t4,3\n" + " 18: 01df7f33 and t5,t5,t4\n" + " 1c: 00200e93 li t4,2\n" + " 20: 01df0a63 beq t5,t4,0x34\n" + " 24: 0dc0006f j 0x100\n" + " 28: 00000013 nop\n" + " 2c: 00000013 nop\n" + " 30: 00000013 nop\n" + " 34: 00300f13 li t5,3\n" + " 38: ffff4f13 not t5,t5\n" + " 3c: 01efffb3 and t6,t6,t5\n" + " 40: 000faf83 lw t6,0(t6)\n" + " 44: 03f00f13 li t5,63\n" + " 48: 01efffb3 and t6,t6,t5\n" + " 4c: 00800f13 li t5,8\n" + " 50: 01ef8a63 beq t6,t5,0x64\n" + " 54: 0ac0006f j 0x100\n" + " 58: 00000013 nop\n" + " 5c: 00000013 nop\n" + " 60: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +%% Keep the unoptimized version to test the and case. +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 01cf1f13 slli t5,t5,0x1c\n" + " c: 060f0663 beqz t5,0x78\n" + " 10: 000f8f13 mv t5,t6\n" + " 14: 00300e93 li t4,3\n" + " 18: 01df7f33 and t5,t5,t4\n" + " 1c: 00200e93 li t4,2\n" + " 20: 01df0a63 beq t5,t4,0x34\n" + " 24: 0dc0006f j 0x100\n" + " 28: 00000013 nop\n" + " 2c: 00000013 nop\n" + " 30: 00000013 nop\n" + " 34: 00300f13 li t5,3\n" + " 38: ffff4f13 not t5,t5\n" + " 3c: 01efffb3 and t6,t6,t5\n" + " 40: 000faf83 lw t6,0(t6)\n" + " 44: 000f8f13 mv t5,t6\n" + " 48: 03f00e93 li t4,63\n" + " 4c: 01df7f33 and t5,t5,t4\n" + " 50: 00800e93 li t4,8\n" + " 54: 03df0263 beq t5,t4,0x78\n" + " 58: 03f00f13 li t5,63\n" + " 5c: 01efffb3 and t6,t6,t5\n" + " 60: 01800f13 li t5,24\n" + " 64: 01ef8a63 beq t6,t5,0x78\n" + " 68: 0980006f j 0x100\n" + " 6c: 00000013 nop\n" + " 70: 00000013 nop\n" + " 74: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8e63 beq t6,t5,0x24\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8a63 beq t6,t5,0x24\n" + " 14: 0ec0006f j 0x100\n" + " 18: 00000013 nop\n" + " 1c: 00000013 nop\n" + " 20: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8e63 beq t6,t5,0x24\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8a63 beq t6,t5,0x24\n" + " 14: 7ed0006f j 0x1000\n" + " 18: 00000013 nop\n" + " 1c: 00000013 nop\n" + " 20: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8e63 beq t6,t5,0x24\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8a63 beq t6,t5,0x24\n" + " 14: 00000f17 auipc t5,0x0\n" + " 18: 008f2f03 lw t5,8(t5)\n" + " 1c: 000f0067 jr t5\n" + " 20: 00001000 .word 0x00001000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 024f8f93 addi t6,t6,36\n" + " 8: 01f5a223 sw t6,4(a1)\n" + " c: 00001fb7 lui t6,0x1\n" + " 10: 388f8f93 addi t6,t6,904\n" + " 14: 07862f03 lw t5,120(a2)\n" + " 18: 000f8613 mv a2,t6\n" + " 1c: 02a00693 li a3,42\n" + " 20: 000f0067 jr t5\n" + " 24: 05462f83 lw t6,84(a2)\n" + " 28: ff010113 addi sp,sp,-16\n" + " 2c: 00112023 sw ra,0(sp)\n" + " 30: 00a12223 sw a0,4(sp)\n" + " 34: 00b12423 sw a1,8(sp)\n" + " 38: 00c12623 sw a2,12(sp)\n" + " 3c: 000f80e7 jalr t6\n" + " 40: 00050f93 mv t6,a0\n" + " 44: 00012083 lw ra,0(sp)\n" + " 48: 00412503 lw a0,4(sp)\n" + " 4c: 00812583 lw a1,8(sp)\n" + " 50: 00c12603 lw a2,12(sp)\n" + " 54: 01010113 addi sp,sp,16\n" + " 58: 00af8463 beq t6,a0,0x60\n" + " 5c: 000f8513 mv a0,t6\n" + " 60: 00008067 ret\n" + " 64: 08400f93 li t6,132\n" + " 68: 00cf8fb3 add t6,t6,a2\n" + " 6c: 000faf83 lw t6,0(t6)\n" + " 70: ff010113 addi sp,sp,-16\n" + " 74: 00112023 sw ra,0(sp)\n" + " 78: 00a12223 sw a0,4(sp)\n" + " 7c: 00b12423 sw a1,8(sp)\n" + " 80: 00c12623 sw a2,12(sp)\n" + " 84: 00200593 li a1,2\n" + " 88: 000f80e7 jalr t6\n" + " 8c: 00050f93 mv t6,a0\n" + " 90: 00012083 lw ra,0(sp)\n" + " 94: 00412503 lw a0,4(sp)\n" + " 98: 00812583 lw a1,8(sp)\n" + " 9c: 00c12603 lw a2,12(sp)\n" + " a0: 01010113 addi sp,sp,16\n" + " a4: 000f9863 bnez t6,0xb4\n" + " a8: 07c62f83 lw t6,124(a2)\n" + " ac: 02a00613 li a2,42\n" + " b0: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 00000697 auipc a3,0x0\n" + " 4: 00068067 jr a3\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 00068067 jr a3\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 00068067 jr a3\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 00068067 jr a3\n" + " 20: 00000697 auipc a3,0x0\n" + " 24: 00068067 jr a3\n" + " 28: 00000697 auipc a3,0x0\n" + " 2c: 00068067 jr a3\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 004f8f93 addi t6,t6,4\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/2 function +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated auipc + addi + ret + labels table + lines table + % auipc = 4 bytes, addi = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Total minimum: 36 bytes + ?assert(byte_size(Stream) >= 36), + + % Expected: auipc a0, 0 + addi a0, a0, 12 + ret + labels table + lines table + % The data tables start at offset 0xC (12), so we load PC + 12 into a0 + Dump = << + " 0: 00000517 auipc a0,0x0\n" + " 4: 00c50513 addi a0,a0,12\n" + " 8: 00008067 ret\n" + " c: 0200 addi s0,sp,256\n" + " e: 0100 addi s0,sp,128\n" + " 10: 0000 unimp\n" + " 12: 1000 addi s0,sp,32\n" + " 14: 0200 addi s0,sp,256\n" + " 16: 0000 unimp\n" + " 18: 2000 fld fs0,0(s0)\n" + " 1a: 0200 addi s0,sp,256\n" + " 1c: 0a00 addi s0,sp,272\n" + " 1e: 0000 unimp\n" + " 20: 1000 addi s0,sp,32\n" + " 22: 1400 addi s0,sp,544\n" + " 24: 0000 unimp\n" + " 26: 2000 fld fs0,0(s0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 00058513 mv a0,a1\n" + " 1c: 02a00593 li a1,42\n" + " 20: 000f80e7 jalr t6\n" + " 24: 00050f93 mv t6,a0\n" + " 28: 00012083 lw ra,0(sp)\n" + " 2c: 00412503 lw a0,4(sp)\n" + " 30: 00812583 lw a1,8(sp)\n" + " 34: 00c12603 lw a2,12(sp)\n" + " 38: 01010113 addi sp,sp,16\n" + " 3c: ff010113 addi sp,sp,-16\n" + " 40: 00112023 sw ra,0(sp)\n" + " 44: 00a12223 sw a0,4(sp)\n" + " 48: 00b12423 sw a1,8(sp)\n" + " 4c: 00c12623 sw a2,12(sp)\n" + " 50: 00000593 li a1,0\n" + " 54: 00300613 li a2,3\n" + " 58: 01452f03 lw t5,20(a0)\n" + " 5c: 000f2683 lw a3,0(t5)\n" + " 60: 01852703 lw a4,24(a0)\n" + " 64: 000f80e7 jalr t6\n" + " 68: 00050f93 mv t6,a0\n" + " 6c: 00012083 lw ra,0(sp)\n" + " 70: 00412503 lw a0,4(sp)\n" + " 74: 00812583 lw a1,8(sp)\n" + " 78: 00c12603 lw a2,12(sp)\n" + " 7c: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in a1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, a1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0b000f93 li t6,176\n" + " 4: 00cf8fb3 add t6,t6,a2\n" + " 8: 000faf83 lw t6,0(t6)\n" + " c: ff010113 addi sp,sp,-16\n" + " 10: 00112023 sw ra,0(sp)\n" + " 14: 00a12223 sw a0,4(sp)\n" + " 18: 00b12423 sw a1,8(sp)\n" + " 1c: 00c12623 sw a2,12(sp)\n" + " 20: 00058f13 mv t5,a1\n" + " 24: 000f0613 mv a2,t5\n" + " 28: 00400693 li a3,4\n" + " 2c: 00100713 li a4,1\n" + " 30: 000f80e7 jalr t6\n" + " 34: 00050f93 mv t6,a0\n" + " 38: 00012083 lw ra,0(sp)\n" + " 3c: 00412503 lw a0,4(sp)\n" + " 40: 00812583 lw a1,8(sp)\n" + " 44: 00c12603 lw a2,12(sp)\n" + " 48: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 0005af03 lw t5,0(a1)\n" + " 24: 000f2f03 lw t5,0(t5)\n" + " 28: 018f1f13 slli t5,t5,0x18\n" + " 2c: 13000f93 li t6,304\n" + " 30: 01ff6f33 or t5,t5,t6\n" + " 34: 05e52e23 sw t5,92(a0)\n" + " 38: 01062f83 lw t6,16(a2)\n" + " 3c: 00200613 li a2,2\n" + " 40: 00500693 li a3,5\n" + " 44: fff00713 li a4,-1\n" + " 48: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 01852f83 lw t6,24(a0)\n" + " 24: 000f8f13 mv t5,t6\n" + " 28: 000f0e93 mv t4,t5\n" + " 2c: 00300e13 li t3,3\n" + " 30: 01cefeb3 and t4,t4,t3\n" + " 34: 00200e13 li t3,2\n" + " 38: 01ce8c63 beq t4,t3,0x50\n" + " 3c: 04c62f83 lw t6,76(a2)\n" + " 40: 04000613 li a2,64\n" + " 44: 18b00693 li a3,395\n" + " 48: 000f0713 mv a4,t5\n" + " 4c: 000f8067 jr t6\n" + " 50: 00300e93 li t4,3\n" + " 54: fffece93 not t4,t4\n" + " 58: 01df7f33 and t5,t5,t4\n" + " 5c: 000f2f03 lw t5,0(t5)\n" + " 60: 000f0e93 mv t4,t5\n" + " 64: 03f00e13 li t3,63\n" + " 68: 01cefeb3 and t4,t4,t3\n" + " 6c: 01400e13 li t3,20\n" + " 70: 01ce8c63 beq t4,t3,0x88\n" + " 74: 04c62f83 lw t6,76(a2)\n" + " 78: 07800613 li a2,120\n" + " 7c: 18b00693 li a3,395\n" + " 80: 000f0713 mv a4,t5\n" + " 84: 000f8067 jr t6\n" + " 88: 0005ae83 lw t4,0(a1)\n" + " 8c: 000eae83 lw t4,0(t4)\n" + " 90: 018e9e93 slli t4,t4,0x18\n" + " 94: 2e000f13 li t5,736\n" + " 98: 01eeeeb3 or t4,t4,t5\n" + " 9c: 05d52e23 sw t4,92(a0)\n" + " a0: 08000f13 li t5,128\n" + " a4: 00cf0f33 add t5,t5,a2\n" + " a8: 000f2f03 lw t5,0(t5)\n" + " ac: 000f8613 mv a2,t6\n" + " b0: 00000693 li a3,0\n" + " b4: 000f0067 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: 00000f93 li t6,0\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: 00000f93 li t6,0\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, t5}, << + " 0: 00000f93 li t6,0\n" + " 4: 01ff2023 sw t6,0(t5)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: 00000f13 li t5,0\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: 00000f13 li t5,0\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: 02a00f93 li t6,42\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, a3}, << + " 0: 06300f93 li t6,99\n" + " 4: 01f6a023 sw t6,0(a3)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 03f52023 sw t6,32(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f5a023 sw t6,0(a1)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << + " 0: 000e2f83 lw t6,0(t3)\n" + " 4: 03f52223 sw t6,36(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 01ff2223 sw t6,4(t5)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 004f2f83 lw t6,4(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, t4, {x_reg, 0}, << + " 0: 01d52c23 sw t4,24(a0)\n" + " 4: 0fc0006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, t5, {x_reg, extra}, << + " 0: 05e52c23 sw t5,88(a0)\n" + " 4: 0fc0006f j 0x100" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, t3, {ptr, a3}, << + " 0: 01c6a023 sw t3,0(a3)\n" + " 4: 0fc0006f j 0x100" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, a1, {y_reg, 0}, << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 00bfa023 sw a1,0(t6)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: Large immediate to x_reg (uses lui + addi in RISC-V) + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f52c23 sw t6,24(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 05f52c23 sw t6,88(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 01ff2423 sw t6,8(t5)\n" + " 10: 0f00006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 05ff2823 sw t6,80(t5)\n" + " 10: 0f00006f j 0x100" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f6a023 sw t6,0(a3)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: 05452f83 lw t6,84(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 07ff2e23 sw t6,124(t5)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)\n" + " 8: 05f52a23 sw t6,84(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Large y_reg index (32) that exceeds str immediate offset limit + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 32}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 08000e93 li t4,128\n" + " c: 01fe8eb3 add t4,t4,t6\n" + " 10: 01eea023 sw t5,0(t4)\n" + " 14: 0ec0006f j 0x100" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: fff00f93 li t6,-1\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -100, {x_reg, 0}, << + " 0: f9c00f93 li t6,-100\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << + " 0: c1800f93 li t6,-1000\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 2, {x_reg, 0}, << + " 0: 0086af83 lw t6,8(a3)\n" + " 4: 01f52c23 sw t6,24(a0)" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, a3, 3, {ptr, t4}, << + " 0: 00c6af83 lw t6,12(a3)\n" + " 4: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 1, {y_reg, 2}, << + " 0: 0046af03 lw t5,4(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)" + >>) + end), + %% move_array_element: reg[x] to native reg (t4) + ?_test(begin + move_array_element_test0(State0, a3, 1, t4, << + " 0: 0046ae83 lw t4,4(a3)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {y_reg, 31}, << + " 0: 01c6af03 lw t5,28(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 07efae23 sw t5,124(t6)" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {x_reg, 15}, << + " 0: 01c6af83 lw t6,28(a3)\n" + " 4: 05f52a23 sw t6,84(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 01f68fb3 add t6,a3,t6\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: 03f52023 sw t6,32(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 01f68fb3 add t6,a3,t6\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 01f68fb3 add t6,a3,t6\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: 01452f03 lw t5,20(a0)\n" + " 14: 07ff2e23 sw t6,124(t5)" + >>) + end), + %% move_array_element with integer index and x_reg destination + ?_test(begin + {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 008faf03 lw t5,8(t6)\n" + " 8: 03e52623 sw t5,44(a0)" + >>) + end) + ] + end}. + +get_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 010e2f83 lw t6,16(t3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg) + end) + ] + end}. + +move_to_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 000e0f13 mv t5,t3\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000faf83 lw t6,0(t6)\n" + " 4: 000e0f13 mv t5,t3\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 008f2f83 lw t6,8(t5)\n" + " 8: 000e0f13 mv t5,t3\n" + " c: 002f1f13 slli t5,t5,0x2\n" + " 10: 01e68f33 add t5,a3,t5\n" + " 14: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 02a00f93 li t6,42\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02a00f93 li t6,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: negative value + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: fd600f93 li t6,-42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -255 (boundary case) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0100f93 li t6,-255" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -256 (boundary case, fits in immediate for RISC-V) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0000f93 li t6,-256\n" + " 4: 0fc0006f j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, t5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t5, Reg), + Dump = << + " 0: 000f2f03 lw t5,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02c52f83 lw t6,44(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 00cf2f83 lw t6,12(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, t5), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 02a00f13 li t5,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, t6, t4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000f8e93 mv t4,t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000fae03 lw t3,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 02052683 lw a3,32(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008fa583 lw a1,8(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 06052f03 lw t5,96(a0)\n" + " 8: 004fae83 lw t4,4(t6)\n" + " c: 01df2c23 sw t4,24(t5)\n" + " 10: 008fae83 lw t4,8(t6)\n" + " 14: 01df2e23 sw t4,28(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, a2, 2, << + " 0: 00260613 addi a2,a2,2\n" + " 4: 0fc0006f j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 01f60633 add a2,a2,t6\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, a3, << + " 0: 00d60633 add a2,a2,a3\n" + " 4: 0fc0006f j 0x100" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, a2, 2, << + " 0: ffe60613 addi a2,a2,-2\n" + " 4: 0fc0006f j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 41f60633 sub a2,a2,t6\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, a3, << + " 0: 40d60633 sub a2,a2,a3\n" + " 4: 0fc0006f j 0x100" + >>) + end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, a2, 2, << + " 0: 00161613 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 3, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 4, << + " 0: 00261613 slli a2,a2,0x2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 5, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 6, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 00161613 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 7, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 40cf8633 sub a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 8, << + " 0: 00361613 slli a2,a2,0x3" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 9, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 10, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 00161613 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 11, << + " 0: 00b00f93 li t6,11\n" + " 4: 03f60633 mul a2,a2,t6" + >>) + end) + ] + end}. + +%% Test set_args1 with y_reg pattern +set_args1_y_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1 + % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}]) + % but with {y_reg, 5} instead of {free, Src} + {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [ + {y_reg, 5} + ]), + + Stream = ?BACKEND:stream(State1), + % Expected disassembly for loading from y_reg and calling primitive + Dump = << + " 0: 04300f93 li t6,67\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 00cf8fb3 add t6,t6,a2\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: ff010113 addi sp,sp,-16\n" + " 14: 00112023 sw ra,0(sp)\n" + " 18: 00a12223 sw a0,4(sp)\n" + " 1c: 00b12423 sw a1,8(sp)\n" + " 20: 00c12623 sw a2,12(sp)\n" + " 24: 01452f03 lw t5,20(a0)\n" + " 28: 014f2503 lw a0,20(t5)\n" + " 2c: 000f80e7 jalr t6\n" + " 30: 00050f93 mv t6,a0\n" + " 34: 00012083 lw ra,0(sp)\n" + " 38: 00412503 lw a0,4(sp)\n" + " 3c: 00812583 lw a1,8(sp)\n" + " 40: 00c12603 lw a2,12(sp)\n" + " 44: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read (Y=123, offset=492, exceeds immediate limit) +large_y_reg_read_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move from a large Y register (123 * 4 = 492 bytes, exceeds immediate limit) + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp register for large offset + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 1ec00f93 li t6,492\n" + " 8: 01ef8fb3 add t6,t6,t5\n" + " c: 000faf83 lw t6,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test large Y register write with immediate value +large_y_reg_write_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move immediate to a large Y register (123 * 4 = 492 bytes) + State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp registers for large offset + Dump = << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 1ec00e93 li t4,492\n" + " c: 01fe8eb3 add t4,t4,t6\n" + " 10: 01eea023 sw t5,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read with limited registers (uses IP_REG fallback) +large_y_reg_read_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper) + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback + {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t0+t1 fallback sequence when temps are exhausted + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452283 lw t0,20(a0)\n" + " 18: 08c00313 li t1,140\n" + " 1c: 00530333 add t1,t1,t0\n" + " 20: 00032303 lw t1,0(t1)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t1, ResultReg). + +%% Test large Y register write with register exhaustion (uses t1/t0 fallback) +large_y_reg_write_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a source register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Allocate most remaining registers to simulate exhaustion + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Try to write to large Y register when only one temp register is available + StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t1/t0 fallback sequence + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452303 lw t1,20(a0)\n" + " 18: 0c800293 li t0,200\n" + " 1c: 006282b3 add t0,t0,t1\n" + " 20: 01f2a023 sw t6,0(t0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing) +y_reg_boundary_direct_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}), + Stream = ?BACKEND:stream(State1), + % Expected: uses direct addressing since 31 * 4 = 124 < 2048 + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test debugger function +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 00100073 ebreak" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +and_register_exhaustion_negative_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test negative immediate (-4) which should use NOT+AND with t0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, t6, -4), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 00300293 li t0,3\n" + " 1c: fff2c293 not t0,t0\n" + " 20: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +and_register_exhaustion_positive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test positive immediate (0x3F) which should use AND with t0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, t6, 16#3F), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 03f00293 li t0,63\n" + " 1c: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +jump_table_large_labels_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 512), + Stream = ?BACKEND:stream(State1), + % RISC-V: Each jump table entry is 8 bytes (AUIPC + JALR) + ?assertEqual((512 + 1) * 8, byte_size(Stream)). + +alloc_boxed_integer_fragment_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 42} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 02a00613 li a2,42\n" + " 1c: 00000693 li a3,0\n" + " 20: 000f80e7 jalr t6\n" + " 24: 00050f93 mv t6,a0\n" + " 28: 00012083 lw ra,0(sp)\n" + " 2c: 00412503 lw a0,4(sp)\n" + " 30: 00812583 lw a1,8(sp)\n" + " 34: 00c12603 lw a2,12(sp)\n" + " 38: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +alloc_boxed_integer_fragment_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 16#123456789ABCDEF0} + ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 9abce637 lui a2,0x9abce\n" + " 1c: ef060613 addi a2,a2,-272\n" + " 20: 123456b7 lui a3,0x12345\n" + " 24: 67868693 addi a3,a3,1656\n" + " 28: 000f80e7 jalr t6\n" + " 2c: 00050f93 mv t6,a0\n" + " 30: 00012083 lw ra,0(sp)\n" + " 34: 00412503 lw a0,4(sp)\n" + " 38: 00812583 lw a1,8(sp)\n" + " 3c: 00c12603 lw a2,12(sp)\n" + " 40: 01010113 addi sp,sp,16\n" + " 44: 04c62f03 lw t5,76(a2)\n" + " 48: 04800613 li a2,72\n" + " 4c: 28b00693 li a3,651\n" + " 50: 000f8713 mv a4,t6\n" + " 54: 000f0067 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for stack alignment issue in call_func_ptr +%% RISC-V maintains 16-byte stack alignment (RISC-V calling convention) +call_func_ptr_stack_alignment_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _ResultReg} = ?BACKEND:call_func_ptr(State4, {free, t3}, [42]), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: fe010113 addi sp,sp,-32\n" + " 14: 00112023 sw ra,0(sp)\n" + " 18: 00a12223 sw a0,4(sp)\n" + " 1c: 00b12423 sw a1,8(sp)\n" + " 20: 00c12623 sw a2,12(sp)\n" + " 24: 01d12823 sw t4,16(sp)\n" + " 28: 01e12a23 sw t5,20(sp)\n" + " 2c: 01f12c23 sw t6,24(sp)\n" + " 30: 02a00513 li a0,42\n" + " 34: 000e00e7 jalr t3\n" + " 38: fea12e23 sw a0,-4(sp)\n" + " 3c: 00012083 lw ra,0(sp)\n" + " 40: 00412503 lw a0,4(sp)\n" + " 44: 00812583 lw a1,8(sp)\n" + " 48: 00c12603 lw a2,12(sp)\n" + " 4c: 01012e83 lw t4,16(sp)\n" + " 50: 01412f03 lw t5,20(sp)\n" + " 54: 01812f83 lw t6,24(sp)\n" + " 58: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for register exhaustion issue in call_func_ptr with 5+ arguments +%% When all registers are used and we call a function with 5+ args, +%% set_args needs temporary registers but none are available +call_func_ptr_register_exhaustion_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Allocate all available registers to simulate register pressure + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + State6 + end, + fun(State6) -> + [ + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 3, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fe010113 addi sp,sp,-32\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 01c12a23 sw t3,20(sp)\n" + " 34: 01d12c23 sw t4,24(sp)\n" + " 38: 01f12e23 sw t6,28(sp)\n" + " 3c: 00038613 mv a2,t2\n" + " 40: 00300693 li a3,3\n" + " 44: 00100713 li a4,1\n" + " 48: 000f00e7 jalr t5\n" + " 4c: fea12e23 sw a0,-4(sp)\n" + " 50: 00012083 lw ra,0(sp)\n" + " 54: 00412503 lw a0,4(sp)\n" + " 58: 00812583 lw a1,8(sp)\n" + " 5c: 00c12603 lw a2,12(sp)\n" + " 60: 01012303 lw t1,16(sp)\n" + " 64: 01412e03 lw t3,20(sp)\n" + " 68: 01812e83 lw t4,24(sp)\n" + " 6c: 01c12f83 lw t6,28(sp)\n" + " 70: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 1, t1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fe010113 addi sp,sp,-32\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 01c12a23 sw t3,20(sp)\n" + " 34: 01d12c23 sw t4,24(sp)\n" + " 38: 01f12e23 sw t6,28(sp)\n" + " 3c: 00038613 mv a2,t2\n" + " 40: 00100693 li a3,1\n" + " 44: 00030713 mv a4,t1\n" + " 48: 000f00e7 jalr t5\n" + " 4c: fea12e23 sw a0,-4(sp)\n" + " 50: 00012083 lw ra,0(sp)\n" + " 54: 00412503 lw a0,4(sp)\n" + " 58: 00812583 lw a1,8(sp)\n" + " 5c: 00c12603 lw a2,12(sp)\n" + " 60: 01012303 lw t1,16(sp)\n" + " 64: 01412e03 lw t3,20(sp)\n" + " 68: 01812e83 lw t4,24(sp)\n" + " 6c: 01c12f83 lw t6,28(sp)\n" + " 70: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, t1, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fe010113 addi sp,sp,-32\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 01c12a23 sw t3,20(sp)\n" + " 34: 01d12c23 sw t4,24(sp)\n" + " 38: 01f12e23 sw t6,28(sp)\n" + " 3c: 00038613 mv a2,t2\n" + " 40: 00030693 mv a3,t1\n" + " 44: 00100713 li a4,1\n" + " 48: 000f00e7 jalr t5\n" + " 4c: fea12e23 sw a0,-4(sp)\n" + " 50: 00012083 lw ra,0(sp)\n" + " 54: 00412503 lw a0,4(sp)\n" + " 58: 00812583 lw a1,8(sp)\n" + " 5c: 00c12603 lw a2,12(sp)\n" + " 60: 01012303 lw t1,16(sp)\n" + " 64: 01412e03 lw t3,20(sp)\n" + " 68: 01812e83 lw t4,24(sp)\n" + " 6c: 01c12f83 lw t6,28(sp)\n" + " 70: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t5, ResultReg) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, a1}, + [t5, a3] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 00712a23 sw t2,20(sp)\n" + " 34: 01c12c23 sw t3,24(sp)\n" + " 38: 01d12e23 sw t4,28(sp)\n" + " 3c: 03e12023 sw t5,32(sp)\n" + " 40: 03f12223 sw t6,36(sp)\n" + " 44: 00058313 mv t1,a1\n" + " 48: 000f0513 mv a0,t5\n" + " 4c: 00068593 mv a1,a3\n" + " 50: 000300e7 jalr t1\n" + " 54: 00a12423 sw a0,8(sp)\n" + " 58: 00012083 lw ra,0(sp)\n" + " 5c: 00412503 lw a0,4(sp)\n" + " 60: 00812583 lw a1,8(sp)\n" + " 64: 00c12603 lw a2,12(sp)\n" + " 68: 01012303 lw t1,16(sp)\n" + " 6c: 01412383 lw t2,20(sp)\n" + " 70: 01812e03 lw t3,24(sp)\n" + " 74: 01c12e83 lw t4,28(sp)\n" + " 78: 02012f03 lw t5,32(sp)\n" + " 7c: 02412f83 lw t6,36(sp)\n" + " 80: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {primitive, 2}, + [{free, t5}, a3] + ), + ?assertEqual(ResultReg, t5), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 00712a23 sw t2,20(sp)\n" + " 34: 01c12c23 sw t3,24(sp)\n" + " 38: 01d12e23 sw t4,28(sp)\n" + " 3c: 03f12023 sw t6,32(sp)\n" + " 40: 00862303 lw t1,8(a2)\n" + " 44: 000f0513 mv a0,t5\n" + " 48: 00068593 mv a1,a3\n" + " 4c: 000300e7 jalr t1\n" + " 50: 00050f13 mv t5,a0\n" + " 54: 00012083 lw ra,0(sp)\n" + " 58: 00412503 lw a0,4(sp)\n" + " 5c: 00812583 lw a1,8(sp)\n" + " 60: 00c12603 lw a2,12(sp)\n" + " 64: 01012303 lw t1,16(sp)\n" + " 68: 01412383 lw t2,20(sp)\n" + " 6c: 01812e03 lw t3,24(sp)\n" + " 70: 01c12e83 lw t4,28(sp)\n" + " 74: 02012f83 lw t6,32(sp)\n" + " 78: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test_() -> + [ + ?_test(begin + % Test 1: jump_to_continuation at offset 0 + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}), + Stream = ?BACKEND:stream(State1), + % Expected: riscv32 PIC sequence - simpler than ARM, no prolog/epilog needed + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 00af8fb3 add t6,t6,a0\n" + " 8: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + % Test 2: jump_to_continuation after jump table (non-zero relative address) + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Generate a jump table for 3 labels (4 entries * 8 bytes = 32 bytes) + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:jump_to_continuation(State1, {free, a0}), + Stream = ?BACKEND:stream(State2), + % Expected: jump table (32 bytes) + jump_to_continuation + % NetOffset = 0 - 32 = -32 (0xFFFFFFE0) + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 00068067 jr a3\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 00068067 jr a3\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 00068067 jr a3\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 00068067 jr a3\n" + " 20: 00000f97 auipc t6,0x0\n" + " 24: fe0f8f93 addi t6,t6,-32\n" + " 28: 00af8fb3 add t6,t6,a0\n" + " 2c: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +%% Mimic part of add.beam +add_beam_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}), + State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}), + State5 = ?BACKEND:call_only_or_schedule_next(State4, 2), + State6 = ?BACKEND:add_label(State5, 2), + {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [ + ctx, jit_state, 1, 0, 1 + ]), + State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + State10 = ?BACKEND:call_or_schedule_next(State9, 3), + State11 = ?BACKEND:add_label(State10, 3), + State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [ + ctx, jit_state + ]), + % OP_INT_CALL_END + State13 = ?BACKEND:add_label(State12, 0), + State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), + State15 = ?BACKEND:update_branches(State14), + Stream = ?BACKEND:stream(State15), + riscv32_helper:disassemble(Stream), + Dump = + << + % jump table (new 8-byte format) + " 0: 00000697 auipc a3,0x0\n" + " 4: 11868067 jr 280(a3)\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01868067 jr 24(a3)\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 05468067 jr 84(a3)\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 0f868067 jr 248(a3)\n" + % label 1 + % {move,{integer,9},{x,1}}. + " 20: 09f00f93 li t6,159\n" + " 24: 01f52e23 sw t6,28(a0)\n" + % {move,{integer,8},{x,0}} + " 28: 08f00f93 li t6,143\n" + " 2c: 01f52c23 sw t6,24(a0)\n" + % {call_only,2,{f,2}}. + " 30: 0085af83 lw t6,8(a1)\n" + " 34: ffff8f93 addi t6,t6,-1\n" + " 38: 01f5a423 sw t6,8(a1)\n" + " 3c: 000f8a63 beqz t6,0x50\n" + " 40: 0240006f j 0x64\n" + " 44: 00000013 nop\n" + " 48: 00000013 nop\n" + " 4c: 00000013 nop\n" + " 50: 00000f97 auipc t6,0x0\n" + " 54: 014f8f93 addi t6,t6,20\n" + " 58: 01f5a223 sw t6,4(a1)\n" + " 5c: 00862f83 lw t6,8(a2)\n" + " 60: 000f8067 jr t6\n" + % label 2 + % {allocate,1,1}. + " 64: 01462f83 lw t6,20(a2)\n" + " 68: ff010113 addi sp,sp,-16\n" + " 6c: 00112023 sw ra,0(sp)\n" + " 70: 00a12223 sw a0,4(sp)\n" + " 74: 00b12423 sw a1,8(sp)\n" + " 78: 00c12623 sw a2,12(sp)\n" + " 7c: 00100613 li a2,1\n" + " 80: 00000693 li a3,0\n" + " 84: 00100713 li a4,1\n" + " 88: 000f80e7 jalr t6\n" + " 8c: 00050f93 mv t6,a0\n" + " 90: 00012083 lw ra,0(sp)\n" + " 94: 00412503 lw a0,4(sp)\n" + " 98: 00812583 lw a1,8(sp)\n" + " 9c: 00c12603 lw a2,12(sp)\n" + " a0: 01010113 addi sp,sp,16\n" + " a4: 01ff9f13 slli t5,t6,0x1f\n" + " a8: 000f4863 bltz t5,0xb8\n" + " ac: 01862f83 lw t6,24(a2)\n" + " b0: 0b000613 li a2,176\n" + " b4: 000f8067 jr t6\n" + % {init_yregs,{list,[{y,0}]}}. + %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + " b8: 03b00f13 li t5,59\n" + " bc: 01452f83 lw t6,20(a0)\n" + " c0: 01efa023 sw t5,0(t6)\n" + % {call,1,{f,3}} + %% call_or_schedule_next(State9, 3), + " c4: 0005af03 lw t5,0(a1)\n" + " c8: 000f2f03 lw t5,0(t5)\n" + " cc: 018f1f13 slli t5,t5,0x18\n" + " d0: 44000f93 li t6,1088\n" + " d4: 01ff6f33 or t5,t5,t6\n" + " d8: 05e52e23 sw t5,92(a0)\n" + " dc: 0085af83 lw t6,8(a1)\n" + " e0: ffff8f93 addi t6,t6,-1\n" + " e4: 01f5a423 sw t6,8(a1)\n" + " e8: 000f8a63 beqz t6,0xfc\n" + " ec: 0240006f j 0x110\n" + " f0: 00000013 nop\n" + " f4: 00000013 nop\n" + " f8: 00000013 nop\n" + " fc: 00000f97 auipc t6,0x0\n" + " 100: 014f8f93 addi t6,t6,20\n" + " 104: 01f5a223 sw t6,4(a1)\n" + " 108: 00862f83 lw t6,8(a2)\n" + " 10c: 000f8067 jr t6\n" + %% (continuation) + % label 3 + " 110: 00462f83 lw t6,4(a2)\n" + " 114: 000f8067 jr t6\n" + % label 0 + " 118: 00462f83 lw t6,4(a2)\n" + " 11c: 000f8067 jr t6\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$ , Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$ , Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +%% Handle RISC-V 32-bit instructions (8 consecutive hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $ orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + %% RISC-V instructions are 32-bit little-endian + Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +%% Handle 32-bits undefined instruction (ARM format with space: "1234 5678") +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $ orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + InstrA = list_to_integer([H1, H2, H3, H4], 16), + InstrB = list_to_integer([H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<>, <> | Acc]); +%% Handle 16-bit ARM32 Thumb instructions (4 hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $ orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) +-> + %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index a289a01a6f..2d130cad03 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -31,6 +31,7 @@ start() -> jit_aarch64_asm_tests, jit_armv6m_tests, jit_armv6m_asm_tests, + jit_riscv32_tests, jit_riscv32_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests From cbe7b410392dc23ab8c44f78d4cf389b400baab0 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 15 Oct 2025 22:25:39 +0200 Subject: [PATCH 84/97] riscv32: add to workflow using temporary files Signed-off-by: Paul Guyot --- .github/workflows/build-and-test.yaml | 82 ++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 9ece61a36f..5a983dc3e2 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -366,6 +366,19 @@ jobs: arch: "s390x" library-arch: s390x-linux-gnu + # riscv32-ilp32 build + - os: "ubuntu-24.04" + cc: "riscv32-unknown-linux-gnu-gcc" + cxx: "riscv32-unknown-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake" + compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support" + arch: "riscv32" + library-arch: riscv32-linux-gnu-ilp32 + env: ImageOS: ${{ matrix.container == 'ubuntu:20.04' && 'ubuntu20' || matrix.os == 'ubuntu-20.04' && 'ubuntu20' || matrix.os == 'ubuntu-22.04' && 'ubuntu22' || matrix.os == 'ubuntu-24.04' && 'ubuntu24' || 'ubuntu24' }} CC: ${{ matrix.cc }} @@ -386,7 +399,7 @@ jobs: run: sudo dpkg --add-architecture i386 - name: "Setup cross compilation architecture" - if: matrix.library-arch != '' + if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32' run: | sudo dpkg --add-architecture ${{ matrix.arch }} cat > ${RUNNER_TEMP}/cross-compile-sources.list <> $GITHUB_PATH + + # Install the libs + sudo dpkg -i libc6-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb + + sudo dpkg -i zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb + sudo dpkg -i zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb + + sudo dpkg -i libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb + + sudo sed -i '/Types: deb/a Architectures: amd64' /etc/apt/sources.list.d/ubuntu.sources + + cat > ${RUNNER_TEMP}/${{ matrix.arch }}_toolchain.cmake < Date: Sat, 18 Oct 2025 19:09:59 +0200 Subject: [PATCH 85/97] riscv32: add to precompiled targets and test infrastructure - Add riscv32 to AVM_PRECOMPILED_TARGETS - Enable ENABLE_TRACE for debugging - Add riscv32 directory handling in test.c Signed-off-by: Paul Guyot --- CMakeLists.txt | 2 +- src/libAtomVM/jit.c | 2 +- src/libAtomVM/opcodesswitch.h | 2 +- tests/test.c | 5 +++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 32484ee851..307917422f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32;riscv32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 8704bd6db1..9cebc6cffd 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -39,7 +39,7 @@ #include #include -// #define ENABLE_TRACE +#define ENABLE_TRACE #include "trace.h" // Verify matching atom index in default_atoms.hrl diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index e9d49698a0..f4c94787af 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -43,7 +43,7 @@ #include "stacktrace.h" #endif -//#define ENABLE_TRACE +#define ENABLE_TRACE #include "trace.h" // These constants can be used to reduce the size of the VM for a specific diff --git a/tests/test.c b/tests/test.c index 577572c967..5db01bfff0 100644 --- a/tests/test.c +++ b/tests/test.c @@ -713,6 +713,11 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) perror("Error: cannot find armv6m directory"); return EXIT_FAILURE; } +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + if (chdir("riscv32") != 0) { + perror("Error: cannot find riscv32 directory"); + return EXIT_FAILURE; + } #else #error Unknown JIT target #endif From de2e4d370b371e6ad6aa054b9320af24c5560320 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 15:40:01 +0200 Subject: [PATCH 86/97] JIT: Do not precompile pico libs if armv6m+float32 is not in precompiled targets Signed-off-by: Paul Guyot --- CMakeModules/BuildErlang.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeModules/BuildErlang.cmake b/CMakeModules/BuildErlang.cmake index 3b10565cc7..2bc6754d87 100644 --- a/CMakeModules/BuildErlang.cmake +++ b/CMakeModules/BuildErlang.cmake @@ -196,7 +196,7 @@ macro(pack_lib avm_name) ) set(target_deps ${target_deps} ${avm_name}-pico.uf2 ${avm_name}-pico2.uf2) - if(NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) + if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m" IN_LIST AVM_PRECOMPILED_TARGETS OR "armv6m+float32" IN_LIST AVM_PRECOMPILED_TARGETS)) add_custom_command( OUTPUT ${avm_name}-armv6m-pico.uf2 DEPENDS ${avm_name}-armv6m.avm UF2Tool From f58107a685b8321b8ab55e706db31c07fd2be352 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 15:40:59 +0200 Subject: [PATCH 87/97] riscv32: remove unused literal pool logic Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 44 ++++-------------------------------- 1 file changed, 5 insertions(+), 39 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 56887fb064..b0f3ba0b62 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -166,8 +166,7 @@ available_regs :: [riscv32_register()], used_regs :: [riscv32_register()], labels :: [{integer() | reference(), integer()}], - variant :: non_neg_integer(), - literal_pool :: [{non_neg_integer(), riscv32_register(), non_neg_integer()}] + variant :: non_neg_integer() }). -type state() :: #state{}. @@ -275,8 +274,7 @@ new(Variant, StreamModule, Stream) -> available_regs = ?AVAILABLE_REGS, used_regs = [], labels = [], - variant = Variant, - literal_pool = [] + variant = Variant }. %%----------------------------------------------------------------------------- @@ -630,8 +628,7 @@ call_primitive_last( State2 = set_registers_args(State1, ArgsForTailCall, 0), tail_call_with_jit_state_registers_only(State2, Temp) end, - State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, - flush_literal_pool(State5). + State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. %%----------------------------------------------------------------------------- %% @doc Tail call to address in register. @@ -708,15 +705,13 @@ jump_to_label( Offset = StreamModule:offset(Stream0), {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), - State2 = State1#state{stream = Stream1}, - flush_literal_pool(State2). + State1#state{stream = Stream1}. jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> Offset = StreamModule:offset(Stream0), CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), Stream1 = StreamModule:append(Stream0, CodeBlock), - State2 = State#state{stream = Stream1}, - flush_literal_pool(State2). + State#state{stream = Stream1}. %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register @@ -2531,35 +2526,6 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}. -flush_literal_pool(#state{literal_pool = []} = State) -> - State; -flush_literal_pool( - #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State -) -> - % Align - Offset = StreamModule:offset(Stream0), - Stream1 = - if - Offset rem 4 =:= 0 -> Stream0; - true -> StreamModule:append(Stream0, <<0:16>>) - end, - % Lay all values and update ldr instructions - Stream2 = lists:foldl( - fun({LdrInstructionAddr, Reg, Val}, AccStream) -> - LiteralPosition = StreamModule:offset(AccStream), - LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, - LiteralOffset = LiteralPosition - LdrPC, - LdrInstruction = jit_riscv32_asm:lw(Reg, pc, LiteralOffset), - AccStream1 = StreamModule:append(AccStream, <>), - StreamModule:replace( - AccStream1, LdrInstructionAddr, LdrInstruction - ) - end, - Stream1, - lists:reverse(LP) - ), - State#state{stream = Stream2, literal_pool = []}. - sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= 0 andalso Val =< 255 -> From a315bb7a09f03cea09044625a60d2e3dd1f64c29 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 15:41:33 +0200 Subject: [PATCH 88/97] Aarch64: add tests for better coverage Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_tests.erl | 333 +++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 23291a400c..247728dd29 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -88,6 +88,51 @@ call_primitive_2_args_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401447 ldr x7, [x2, #40]\n" + " 4: d2800202 mov x2, #0x10 // #16\n" + " 8: d2800403 mov x3, #0x20 // #32\n" + " c: d2800044 mov x4, #0x2 // #2\n" + " 10: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 8: f9401c08 ldr x8, [x0, #56]\n" + " c: f940b850 ldr x16, [x2, #368]\n" + " 10: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 14: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 18: aa0703e2 mov x2, x7\n" + " 1c: d2800803 mov x3, #0x40 // #64\n" + " 20: d2800104 mov x4, #0x8 // #8\n" + " 24: aa0803e5 mov x5, x8\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_primitive_extended_regs_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), @@ -146,6 +191,44 @@ call_primitive_extended_regs_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, 1), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, 2), + {State3, Reg3} = ?BACKEND:move_to_native_register(State2, 3), + {State4, Reg4} = ?BACKEND:move_to_native_register(State3, 4), + {State5, Reg5} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + Reg2, Reg1, {free, Reg4}, Reg3, {free, Reg5} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, Reg2, Reg1, Reg3]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: d2800027 mov x7, #0x1 // #1\n" + " 4: d2800048 mov x8, #0x2 // #2\n" + " 8: d2800069 mov x9, #0x3 // #3\n" + " c: d280008a mov x10, #0x4 // #4\n" + " 10: d28000ab mov x11, #0x5 // #5\n" + " 14: f940e450 ldr x16, [x2, #456]\n" + " 18: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 1c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 20: a9bf23e9 stp x9, x8, [sp, #-16]!\n" + " 24: f81f0fe7 str x7, [sp, #-16]!\n" + " 28: aa0803e0 mov x0, x8\n" + " 2c: aa0703e1 mov x1, x7\n" + " 30: aa0a03e2 mov x2, x10\n" + " 34: aa0903e3 mov x3, x9\n" + " 38: aa0b03e4 mov x4, x11\n" + " 3c: d63f0200 blr x16\n" + " 40: aa0003ea mov x10, x0\n" + " 44: f84107e7 ldr x7, [sp], #16\n" + " 48: a8c123e9 ldp x9, x8, [sp], #16\n" + " 4c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 50: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -168,6 +251,23 @@ call_ext_only_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9404c48 ldr x8, [x2, #152]\n" + " 8: d2800102 mov x2, #0x8 // #8\n" + " c: d2805963 mov x3, #0x2cb // #715\n" + " 10: aa0703e4 mov x4, x7\n" + " 14: d61f0100 br x8" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_last_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -1038,6 +1138,179 @@ is_boolean_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test OP_WAIT_TIMEOUT pattern +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = << + " 0: 100000e7 adr x7, 0x1c\n" + " 4: f9000427 str x7, [x1, #8]\n" + " 8: d2827107 mov x7, #0x1388 // #5000\n" + " c: f9407848 ldr x8, [x2, #240]\n" + " 10: aa0703e2 mov x2, x7\n" + " 14: d2800543 mov x3, #0x2a // #42\n" + " 18: d61f0100 br x8\n" + " 1c: f9405450 ldr x16, [x2, #168]\n" + " 20: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 24: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16\n" + " 38: eb0000ff cmp x7, x0\n" + " 3c: 54000060 b.eq 0x48 // b.none\n" + " 40: aa0703e0 mov x0, x7\n" + " 44: d65f03c0 ret\n" + " 48: f9408450 ldr x16, [x2, #264]\n" + " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 54: d2800041 mov x1, #0x2 // #2\n" + " 58: d63f0200 blr x16\n" + " 5c: aa0003e7 mov x7, x0\n" + " 60: a8c10be1 ldp x1, x2, [sp], #16\n" + " 64: a8c103fe ldp x30, x0, [sp], #16\n" + " 68: b5000087 cbnz x7, 0x78\n" + " 6c: f9407c47 ldr x7, [x2, #248]\n" + " 70: d2800542 mov x2, #0x2a // #42\n" + " 74: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000000 b 0x4\n" + " 8: 14000000 b 0x8\n" + " c: 14000000 b 0xc\n" + " 10: 14000000 b 0x10\n" + " 14: 14000000 b 0x14\n" + " 18: 10000007 adr x7, 0x18\n" + " 1c: f9000427 str x7, [x1, #8]\n" + " 20: f9407447 ldr x7, [x2, #232]\n" + " 24: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated adr + ret + labels table + lines table + % adr = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Total minimum: 36 bytes + ?assert(byte_size(Stream) >= 36), + + % Expected: adr x0, #8 + ret + labels table + lines table + % The data tables start at offset 0x8, so we load PC + 8 into x0 + Dump = << + " 0: 10000040 adr x0, 0x8\n" + " 4: d65f03c0 ret\n" + " 8: 01000200 .word 0x01000200\n" + " c: 10000000 adr x0, 0xc\n" + " 10: 00000200 .word 0x00000200\n" + " 14: 02002000 .word 0x02002000\n" + " 18: 00000a00 .word 0x00000a00\n" + " 1c: 14001000 .word 0x14001000\n" + " 20: 20000000 .word 0x20000000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9402050 ldr x16, [x2, #64]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e0 mov x0, x1\n" + " 10: d2800541 mov x1, #0x2a // #42\n" + " 14: d63f0200 blr x16\n" + " 18: aa0003e7 mov x7, x0\n" + " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 20: a8c103fe ldp x30, x0, [sp], #16\n" + " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 2c: d2800001 mov x1, #0x0 // #0\n" + " 30: d2800062 mov x2, #0x3 // #3\n" + " 34: f9401403 ldr x3, [x0, #40]\n" + " 38: f9400063 ldr x3, [x3]\n" + " 3c: f9401804 ldr x4, [x0, #48]\n" + " 40: d63f00e0 blr x7\n" + " 44: aa0003e7 mov x7, x0\n" + " 48: a8c10be1 ldp x1, x2, [sp], #16\n" + " 4c: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in r1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, r1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f940b050 ldr x16, [x2, #352]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e2 mov x2, x1\n" + " 10: d2800083 mov x3, #0x4 // #4\n" + " 14: d2800024 mov x4, #0x1 // #1\n" + " 18: d63f0200 blr x16\n" + " 1c: aa0003e7 mov x7, x0\n" + " 20: a8c10be1 ldp x1, x2, [sp], #16\n" + " 24: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -1662,6 +1935,66 @@ move_to_native_register_test_() -> ] end}. +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, r2, 2, << + " 0: 91000842 add x2, x2, #0x2" + >>) + end), + ?_test(begin + add_test0(State0, r2, 256, << + " 0: 91040042 add x2, x2, #0x100" + >>) + end), + ?_test(begin + add_test0(State0, r2, r3, << + " 0: 8b030042 add x2, x2, x3" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, r2, 2, << + " 0: d1000842 sub x2, x2, #0x2" + >>) + end), + ?_test(begin + sub_test0(State0, r2, 256, << + " 0: d1040042 sub x2, x2, #0x100" + >>) + end), + ?_test(begin + sub_test0(State0, r2, r3, << + " 0: cb030042 sub x2, x2, x3" + >>) + end) + ] + end}. + mul_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:mul(State0, Reg, Imm), Stream = ?BACKEND:stream(State1), From 88c22fe194b57813fcdd7a2078ee635f02723a78 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 17:08:52 +0200 Subject: [PATCH 89/97] riscv32: fix several backend bugs Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 148 +++++---- src/libAtomVM/jit.c | 2 +- src/libAtomVM/opcodesswitch.h | 2 +- tests/libs/jit/jit_riscv32_tests.erl | 464 +++++++++++++-------------- 4 files changed, 310 insertions(+), 306 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index b0f3ba0b62..83fed2585c 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -468,19 +468,13 @@ update_branches( >>, <>; true -> - % Keep far branch sequence: auipc + lw + jalr + data - % RISC-V far branch is always 16 bytes - case Size of - 16 -> - % 16-byte sequence: auipc + lw + jalr + data - I1 = jit_riscv32_asm:auipc(TempReg, 0), - I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), - I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), - % Calculate absolute target address - TargetAddress = LabelOffset, - I4 = <>, - <> - end + % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes) + % Split the relative offset into upper 20 bits and lower 12 bits + Hi20 = (Rel + 16#800) bsr 12, + Lo12 = Rel - (Hi20 bsl 12), + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + <> end; jump_table_auipc_jalr -> % Calculate PC-relative offset from AUIPC instruction to target @@ -679,7 +673,8 @@ return_if_not_equal_to_ctx( end, I3 = jit_riscv32_asm:ret(), % Branch if equal (skip the return) - I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, byte_size(I2) + byte_size(I3)), + % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3 + I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)), Stream1 = StreamModule:append(Stream0, <>), {AvailableRegs1, UsedRegs1} = free_reg( AvailableRegs0, UsedRegs0, Reg @@ -754,20 +749,30 @@ branch_to_offset_code(_State, Offset, TargetOffset) when Rel = TargetOffset - Offset, jit_riscv32_asm:j(Rel); branch_to_offset_code( - #state{available_regs = [TempReg | _]}, _Offset, TargetOffset + #state{available_regs = [TempReg | _]}, Offset, TargetOffset ) -> - % Far branch: use auipc + lw + jalr sequence (RISC-V) - % This creates a PC-relative load sequence - always 16 bytes (4-byte aligned) + % Far branch: use auipc + jalr sequence for PC-relative addressing + % This computes: PC + Immediate and jumps to it - % TempReg = PC - I1 = jit_riscv32_asm:auipc(TempReg, 0), - % TempReg = *(PC+8) - I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), - % Jump to TempReg - I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), - % The literal value is the absolute target offset - I4 = <>, - <>. + Rel = TargetOffset - Offset, + % Split the relative offset into upper 20 bits and lower 12 bits + % RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12) + % Since jalr's imm12 is sign-extended, if bit 11 of Rel is set, + % we need to add 0x800 before splitting to compensate + Hi20 = (Rel + 16#800) bsr 12, + Lo12Unsigned = Rel band 16#FFF, + % Convert to signed 12-bit value: if bit 11 is set, subtract 4096 + Lo12 = + if + Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000; + true -> Lo12Unsigned + end, + + % TempReg = PC + (Hi20 << 12) + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + % Jump to TempReg + sign_extend(Lo12) + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + <>. branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), @@ -775,17 +780,13 @@ branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> branch_to_label_code( #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false ) -> - % RISC-V: Far branch sequence - always 16 bytes (4-byte aligned) + % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) - % Load PC into temp + % Placeholder: auipc TempReg, 0 I1 = jit_riscv32_asm:auipc(TempReg, 0), - % Load offset from PC+8 - I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), - % Jump to address - I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), - % Placeholder offset - I4 = <<0:32/little>>, - CodeBlock = <>, + % Placeholder: jalr zero, TempReg, 0 + I2 = jit_riscv32_asm:jalr(zero, TempReg, 0), + CodeBlock = <>, SequenceSize = byte_size(CodeBlock), % Add relocation entry Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, @@ -795,17 +796,13 @@ branch_to_label_code( #state{available_regs = [], branches = Branches} = State0, Offset, Label, false ) -> % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) - % Same sequence as when we have available regs - always 16 bytes (4-byte aligned) + % Far branch sequence using PC-relative auipc + jalr (8 bytes) - % Load PC into t6 + % Placeholder: auipc t6, 0 I1 = jit_riscv32_asm:auipc(t6, 0), - % Load offset from PC+8 - I2 = jit_riscv32_asm:lw(t6, t6, 8), - % Jump to address - I3 = jit_riscv32_asm:jalr(zero, t6, 0), - % Placeholder offset - I4 = <<0:32/little>>, - CodeBlock = <>, + % Placeholder: jalr zero, t6, 0 + I2 = jit_riscv32_asm:jalr(zero, t6, 0), + CodeBlock = <>, SequenceSize = byte_size(CodeBlock), % Add relocation entry Reloc = {Label, Offset, {far_branch, SequenceSize, t6}}, @@ -1528,9 +1525,17 @@ call_func_ptr( % Calculate stack offset: find register index in SavedRegs * 4 bytes ResultReg = element(2, FuncPtrTuple), RegIndex = index_of(ResultReg, SavedRegs), - StoreResultStackOffset = RegIndex * 4, - StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), - {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + case RegIndex >= 0 of + true -> + StoreResultStackOffset = RegIndex * 4, + StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + false -> + % FuncPtrReg was not in SavedRegs, use an available register + [ResultReg1 | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg1, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]} + end; _ -> % Use any free that is not in SavedRegs [ResultReg | _] = AvailableRegs1 -- SavedRegs, @@ -1632,8 +1637,8 @@ parameter_regs0([], _, Acc) -> lists:reverse(Acc); parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) -> parameter_regs0(T, Rest, [a1, a0 | Acc]); -parameter_regs0([{avm_int64_t, _} | T], [a1, a2, a3 | Rest], Acc) -> - parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) -> + parameter_regs0(T, Rest, [a2, a1 | Acc]); parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) -> parameter_regs0(T, Rest, [a3, a2 | Acc]); parameter_regs0([_Other | T], [Reg | Rest], Acc) -> @@ -2637,7 +2642,9 @@ decrement_reductions_and_maybe_schedule_next( I4 = jit_riscv32_asm:bne(Temp, zero, 0), % Set continuation to the next instruction ADROffset = BNEOffset + byte_size(I4), - I5 = pc_relative_address(Temp, 0), + % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address + % This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes) + I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), % Append the instructions to the stream Stream2 = StreamModule:append(Stream1, <>), @@ -2647,7 +2654,17 @@ decrement_reductions_and_maybe_schedule_next( #state{stream = Stream3} = State2, NewOffset = StreamModule:offset(Stream3), NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset), - NewI5 = pc_relative_address(Temp, NewOffset - ADROffset), + NewI5Offset = NewOffset - ADROffset, + % Generate the new pc_relative_address instruction, padding with NOP if needed + NewI5 = + case pc_relative_address(Temp, NewI5Offset) of + I when byte_size(I) =:= 4 -> + % Only auipc, pad with NOP + <>; + I when byte_size(I) =:= 8 -> + % auipc + addi, no padding needed + I + end, Stream4 = StreamModule:replace( Stream3, BNEOffset, <> ), @@ -2753,17 +2770,12 @@ set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State % Reserve space for offset load instruction % li can generate 1 instruction (4 bytes) for small immediates (< 2048) % or 2 instructions (8 bytes) for large immediates - % Since we use (offset bsl 2), threshold is when offset >= 512 bytes - % To be safe, use same threshold as AArch64 relative to instruction encoding limits - {I2, I3} = - if - Offset >= 512 -> - % Need 2 instructions (lui + addi) for large offsets - {jit_riscv32_asm:nop(), jit_riscv32_asm:nop()}; - true -> - % Need 1 instruction (addi) for small offsets - {jit_riscv32_asm:nop(), <<>>} - end, + % Since we don't know the final CP value yet (it depends on code size), + % we must always reserve 2 instructions (8 bytes) to be safe + % The final CP value is (final_offset << 2), and final_offset is unknown + % Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0) + I2 = <<16#FFFFFFFF:32/little>>, + I3 = <<16#FFFFFFFF:32/little>>, MOVOffset = Offset + byte_size(I1), % OR the module index with the offset (loaded in temp register) I4 = jit_riscv32_asm:or_(Reg, TempReg), @@ -2783,8 +2795,16 @@ rewrite_cp_offset( TempReg ) -> NewOffset = StreamModule:offset(Stream0) - CodeOffset, - NewMoveInstr = jit_riscv32_asm:li(TempReg, NewOffset bsl 2), - Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), + CPValue = NewOffset bsl 2, + NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue), + % We reserved 8 bytes (2 instructions) for the CP value + % If li generates only 4 bytes, pad with a NOP to maintain alignment + PaddedInstr = + case byte_size(NewMoveInstr) of + 4 -> <>; + 8 -> NewMoveInstr + end, + Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), State0#state{stream = Stream1}. set_bs( diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 9cebc6cffd..90c97ba8c9 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -39,7 +39,7 @@ #include #include -#define ENABLE_TRACE +//#define ENABLE_TRACE #include "trace.h" // Verify matching atom index in default_atoms.hrl diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index f4c94787af..e9d49698a0 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -43,7 +43,7 @@ #include "stacktrace.h" #endif -#define ENABLE_TRACE +//#define ENABLE_TRACE #include "trace.h" // These constants can be used to reduce the size of the VM for a specific diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 4a4fba5593..475e96bd5d 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -279,7 +279,7 @@ call_primitive_few_free_regs_test() -> " 4c: 000e8693 mv a3,t4\n" " 50: 00038713 mv a4,t2\n" " 54: 000300e7 jalr t1\n" - " 58: fea12e23 sw a0,-4(sp)\n" + " 58: 00050313 mv t1,a0\n" " 5c: 00012083 lw ra,0(sp)\n" " 60: 00412503 lw a0,4(sp)\n" " 64: 00812583 lw a1,8(sp)\n" @@ -300,17 +300,18 @@ call_ext_only_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 01062f83 lw t6,16(a2)\n" - " 24: 02400613 li a2,36\n" - " 28: 00200693 li a3,2\n" - " 2c: 00200713 li a4,2\n" - " 30: fff00793 li a5,-1\n" - " 34: 000f8067 jr t6" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 01062f83 lw t6,16(a2)\n" + " 28: 02800613 li a2,40\n" + " 2c: 00200693 li a3,2\n" + " 30: 00200713 li a4,2\n" + " 34: fff00793 li a5,-1\n" + " 38: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -340,17 +341,18 @@ call_ext_last_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 01062f83 lw t6,16(a2)\n" - " 24: 02400613 li a2,36\n" - " 28: 00200693 li a3,2\n" - " 2c: 00200713 li a4,2\n" - " 30: 00a00793 li a5,10\n" - " 34: 000f8067 jr t6" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 01062f83 lw t6,16(a2)\n" + " 28: 02800613 li a2,40\n" + " 2c: 00200693 li a3,2\n" + " 30: 00200713 li a4,2\n" + " 34: 00a00793 li a5,10\n" + " 38: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -397,7 +399,7 @@ return_if_not_equal_to_ctx_test_() -> " 28: 00812583 lw a1,8(sp)\n" " 2c: 00c12603 lw a2,12(sp)\n" " 30: 01010113 addi sp,sp,16\n" - " 34: 00af8463 beq t6,a0,0x3c\n" + " 34: 00af8663 beq t6,a0,0x40\n" " 38: 000f8513 mv a0,t6\n" " 3c: 00008067 ret" >>, @@ -430,7 +432,7 @@ return_if_not_equal_to_ctx_test_() -> " 2c: 00c12603 lw a2,12(sp)\n" " 30: 01010113 addi sp,sp,16\n" " 34: 000f8f13 mv t5,t6\n" - " 38: 00af0463 beq t5,a0,0x40\n" + " 38: 00af0663 beq t5,a0,0x44\n" " 3c: 000f0513 mv a0,t5\n" " 40: 00008067 ret" >>, @@ -1159,28 +1161,26 @@ call_only_or_schedule_next_and_label_relocation_test() -> Dump = << " 0: 00000697 auipc a3,0x0\n" - " 4: 05468067 jr 84(a3)\n" + " 4: 04c68067 jr 76(a3) # 0x4c\n" " 8: 00000697 auipc a3,0x0\n" - " c: 01068067 jr 16(a3)\n" + " c: 01068067 jr 16(a3) # 0x18\n" " 10: 00000697 auipc a3,0x0\n" - " 14: 03c68067 jr 60(a3)\n" + " 14: 03468067 jr 52(a3) # 0x44\n" " 18: 0085af83 lw t6,8(a1)\n" " 1c: ffff8f93 addi t6,t6,-1\n" " 20: 01f5a423 sw t6,8(a1)\n" - " 24: 000f8a63 beqz t6,0x38\n" - " 28: 0240006f j 0x4c\n" + " 24: 000f8663 beqz t6,0x30\n" + " 28: 01c0006f j 0x44\n" " 2c: 00000013 nop\n" - " 30: 00000013 nop\n" - " 34: 00000013 nop\n" - " 38: 00000f97 auipc t6,0x0\n" - " 3c: 014f8f93 addi t6,t6,20\n" - " 40: 01f5a223 sw t6,4(a1)\n" - " 44: 00862f83 lw t6,8(a2)\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 014f8f93 addi t6,t6,20 # 0x44\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 00862f83 lw t6,8(a2)\n" + " 40: 000f8067 jr t6\n" + " 44: 00062f83 lw t6,0(a2)\n" " 48: 000f8067 jr t6\n" - " 4c: 00062f83 lw t6,0(a2)\n" - " 50: 000f8067 jr t6\n" - " 54: 00462f83 lw t6,4(a2)\n" - " 58: 000f8067 jr t6" + " 4c: 00462f83 lw t6,4(a2)\n" + " 50: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1213,20 +1213,18 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 218: 0085af83 lw t6,8(a1)\n" " 21c: ffff8f93 addi t6,t6,-1\n" " 220: 01f5a423 sw t6,8(a1)\n" - " 224: 000f8a63 beqz t6,0x238\n" - " 228: 0240006f j 0x24c\n" + " 224: 000f8663 beqz t6,0x230\n" + " 228: 01c0006f j 0x244\n" " 22c: 00000013 nop\n" - " 230: 00000013 nop\n" - " 234: 00000013 nop\n" - " 238: 00000f97 auipc t6,0x0\n" - " 23c: 014f8f93 addi t6,t6,20\n" - " 240: 01f5a223 sw t6,4(a1)\n" - " 244: 00862f83 lw t6,8(a2)\n" + " 230: 00000f97 auipc t6,0x0\n" + " 234: 014f8f93 addi t6,t6,20 # 0x244\n" + " 238: 01f5a223 sw t6,4(a1)\n" + " 23c: 00862f83 lw t6,8(a2)\n" + " 240: 000f8067 jr t6\n" + " 244: 00062f83 lw t6,0(a2)\n" " 248: 000f8067 jr t6\n" - " 24c: 00062f83 lw t6,0(a2)\n" - " 250: 000f8067 jr t6\n" - " 254: 00462f83 lw t6,4(a2)\n" - " 258: 000f8067 jr t6" + " 24c: 00462f83 lw t6,4(a2)\n" + " 250: 000f8067 jr t6" >>, {_, RelevantBinary} = split_binary(Stream, 16#218), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1358,28 +1356,24 @@ is_integer_test() -> " 0: 01852f83 lw t6,24(a0)\n" " 4: ffffcf13 not t5,t6\n" " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 040f0c63 beqz t5,0x64\n" + " c: 040f0463 beqz t5,0x54\n" " 10: 000f8f13 mv t5,t6\n" " 14: 00300e93 li t4,3\n" " 18: 01df7f33 and t5,t5,t4\n" " 1c: 00200e93 li t4,2\n" - " 20: 01df0a63 beq t5,t4,0x34\n" + " 20: 01df0663 beq t5,t4,0x2c\n" " 24: 0dc0006f j 0x100\n" " 28: 00000013 nop\n" - " 2c: 00000013 nop\n" - " 30: 00000013 nop\n" - " 34: 00300f13 li t5,3\n" - " 38: ffff4f13 not t5,t5\n" - " 3c: 01efffb3 and t6,t6,t5\n" - " 40: 000faf83 lw t6,0(t6)\n" - " 44: 03f00f13 li t5,63\n" - " 48: 01efffb3 and t6,t6,t5\n" - " 4c: 00800f13 li t5,8\n" - " 50: 01ef8a63 beq t6,t5,0x64\n" - " 54: 0ac0006f j 0x100\n" - " 58: 00000013 nop\n" - " 5c: 00000013 nop\n" - " 60: 00000013 nop" + " 2c: 00300f13 li t5,3\n" + " 30: ffff4f13 not t5,t5\n" + " 34: 01efffb3 and t6,t6,t5\n" + " 38: 000faf83 lw t6,0(t6)\n" + " 3c: 03f00f13 li t5,63\n" + " 40: 01efffb3 and t6,t6,t5\n" + " 44: 00800f13 li t5,8\n" + " 48: 01ef8663 beq t6,t5,0x54\n" + " 4c: 0b40006f j 0x100\n" + " 50: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1421,33 +1415,29 @@ is_number_test() -> " 0: 01852f83 lw t6,24(a0)\n" " 4: ffffcf13 not t5,t6\n" " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 060f0663 beqz t5,0x78\n" + " c: 040f0e63 beqz t5,0x68\n" " 10: 000f8f13 mv t5,t6\n" " 14: 00300e93 li t4,3\n" " 18: 01df7f33 and t5,t5,t4\n" " 1c: 00200e93 li t4,2\n" - " 20: 01df0a63 beq t5,t4,0x34\n" + " 20: 01df0663 beq t5,t4,0x2c\n" " 24: 0dc0006f j 0x100\n" " 28: 00000013 nop\n" - " 2c: 00000013 nop\n" - " 30: 00000013 nop\n" - " 34: 00300f13 li t5,3\n" - " 38: ffff4f13 not t5,t5\n" - " 3c: 01efffb3 and t6,t6,t5\n" - " 40: 000faf83 lw t6,0(t6)\n" - " 44: 000f8f13 mv t5,t6\n" - " 48: 03f00e93 li t4,63\n" - " 4c: 01df7f33 and t5,t5,t4\n" - " 50: 00800e93 li t4,8\n" - " 54: 03df0263 beq t5,t4,0x78\n" - " 58: 03f00f13 li t5,63\n" - " 5c: 01efffb3 and t6,t6,t5\n" - " 60: 01800f13 li t5,24\n" - " 64: 01ef8a63 beq t6,t5,0x78\n" - " 68: 0980006f j 0x100\n" - " 6c: 00000013 nop\n" - " 70: 00000013 nop\n" - " 74: 00000013 nop" + " 2c: 00300f13 li t5,3\n" + " 30: ffff4f13 not t5,t5\n" + " 34: 01efffb3 and t6,t6,t5\n" + " 38: 000faf83 lw t6,0(t6)\n" + " 3c: 000f8f13 mv t5,t6\n" + " 40: 03f00e93 li t4,63\n" + " 44: 01df7f33 and t5,t5,t4\n" + " 48: 00800e93 li t4,8\n" + " 4c: 01df0e63 beq t5,t4,0x68\n" + " 50: 03f00f13 li t5,63\n" + " 54: 01efffb3 and t6,t6,t5\n" + " 58: 01800f13 li t5,24\n" + " 5c: 01ef8663 beq t6,t5,0x68\n" + " 60: 0a00006f j 0x100\n" + " 64: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1466,15 +1456,13 @@ is_boolean_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8e63 beq t6,t5,0x24\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8a63 beq t6,t5,0x24\n" - " 14: 0ec0006f j 0x100\n" - " 18: 00000013 nop\n" - " 1c: 00000013 nop\n" - " 20: 00000013 nop" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8a63 beq t6,t5,0x1c\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8663 beq t6,t5,0x1c\n" + " 14: 0ec0006f j 0x100\n" + " 18: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1493,15 +1481,13 @@ is_boolean_far_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8e63 beq t6,t5,0x24\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8a63 beq t6,t5,0x24\n" - " 14: 7ed0006f j 0x1000\n" - " 18: 00000013 nop\n" - " 1c: 00000013 nop\n" - " 20: 00000013 nop" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8a63 beq t6,t5,0x1c\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8663 beq t6,t5,0x1c\n" + " 14: 7ed0006f j 0x1000\n" + " 18: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1522,13 +1508,11 @@ is_boolean_far_known_test() -> Dump = << " 0: 01852f83 lw t6,24(a0)\n" " 4: 04b00f13 li t5,75\n" - " 8: 01ef8e63 beq t6,t5,0x24\n" + " 8: 01ef8a63 beq t6,t5,0x1c\n" " c: 00b00f13 li t5,11\n" - " 10: 01ef8a63 beq t6,t5,0x24\n" - " 14: 00000f17 auipc t5,0x0\n" - " 18: 008f2f03 lw t5,8(t5)\n" - " 1c: 000f0067 jr t5\n" - " 20: 00001000 .word 0x00001000" + " 10: 01ef8663 beq t6,t5,0x1c\n" + " 14: 00001f17 auipc t5,0x1\n" + " 18: fecf0067 jr -20(t5) # 0x1000" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1581,7 +1565,7 @@ wait_timeout_test() -> " 4c: 00812583 lw a1,8(sp)\n" " 50: 00c12603 lw a2,12(sp)\n" " 54: 01010113 addi sp,sp,16\n" - " 58: 00af8463 beq t6,a0,0x60\n" + " 58: 00af8663 beq t6,a0,0x60\n" " 5c: 000f8513 mv a0,t6\n" " 60: 00008067 ret\n" " 64: 08400f93 li t6,132\n" @@ -1767,22 +1751,24 @@ call_ext_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 0005af03 lw t5,0(a1)\n" - " 24: 000f2f03 lw t5,0(t5)\n" - " 28: 018f1f13 slli t5,t5,0x18\n" - " 2c: 13000f93 li t6,304\n" - " 30: 01ff6f33 or t5,t5,t6\n" - " 34: 05e52e23 sw t5,92(a0)\n" - " 38: 01062f83 lw t6,16(a2)\n" - " 3c: 00200613 li a2,2\n" - " 40: 00500693 li a3,5\n" - " 44: fff00713 li a4,-1\n" - " 48: 000f8067 jr t6" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 0005af03 lw t5,0(a1)\n" + " 28: 000f2f03 lw t5,0(t5)\n" + " 2c: 018f1f13 slli t5,t5,0x18\n" + " 30: 15000f93 li t6,336\n" + " 34: 00000013 nop\n" + " 38: 01ff6f33 or t5,t5,t6\n" + " 3c: 05e52e23 sw t5,92(a0)\n" + " 40: 01062f83 lw t6,16(a2)\n" + " 44: 00200613 li a2,2\n" + " 48: 00500693 li a3,5\n" + " 4c: fff00713 li a4,-1\n" + " 50: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1819,49 +1805,51 @@ call_fun_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 01852f83 lw t6,24(a0)\n" - " 24: 000f8f13 mv t5,t6\n" - " 28: 000f0e93 mv t4,t5\n" - " 2c: 00300e13 li t3,3\n" - " 30: 01cefeb3 and t4,t4,t3\n" - " 34: 00200e13 li t3,2\n" - " 38: 01ce8c63 beq t4,t3,0x50\n" - " 3c: 04c62f83 lw t6,76(a2)\n" - " 40: 04000613 li a2,64\n" - " 44: 18b00693 li a3,395\n" - " 48: 000f0713 mv a4,t5\n" - " 4c: 000f8067 jr t6\n" - " 50: 00300e93 li t4,3\n" - " 54: fffece93 not t4,t4\n" - " 58: 01df7f33 and t5,t5,t4\n" - " 5c: 000f2f03 lw t5,0(t5)\n" - " 60: 000f0e93 mv t4,t5\n" - " 64: 03f00e13 li t3,63\n" - " 68: 01cefeb3 and t4,t4,t3\n" - " 6c: 01400e13 li t3,20\n" - " 70: 01ce8c63 beq t4,t3,0x88\n" - " 74: 04c62f83 lw t6,76(a2)\n" - " 78: 07800613 li a2,120\n" - " 7c: 18b00693 li a3,395\n" - " 80: 000f0713 mv a4,t5\n" - " 84: 000f8067 jr t6\n" - " 88: 0005ae83 lw t4,0(a1)\n" - " 8c: 000eae83 lw t4,0(t4)\n" - " 90: 018e9e93 slli t4,t4,0x18\n" - " 94: 2e000f13 li t5,736\n" - " 98: 01eeeeb3 or t4,t4,t5\n" - " 9c: 05d52e23 sw t4,92(a0)\n" - " a0: 08000f13 li t5,128\n" - " a4: 00cf0f33 add t5,t5,a2\n" - " a8: 000f2f03 lw t5,0(t5)\n" - " ac: 000f8613 mv a2,t6\n" - " b0: 00000693 li a3,0\n" - " b4: 000f0067 jr t5" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 01852f83 lw t6,24(a0)\n" + " 28: 000f8f13 mv t5,t6\n" + " 2c: 000f0e93 mv t4,t5\n" + " 30: 00300e13 li t3,3\n" + " 34: 01cefeb3 and t4,t4,t3\n" + " 38: 00200e13 li t3,2\n" + " 3c: 01ce8c63 beq t4,t3,0x54\n" + " 40: 04c62f83 lw t6,76(a2)\n" + " 44: 04400613 li a2,68\n" + " 48: 18b00693 li a3,395\n" + " 4c: 000f0713 mv a4,t5\n" + " 50: 000f8067 jr t6\n" + " 54: 00300e93 li t4,3\n" + " 58: fffece93 not t4,t4\n" + " 5c: 01df7f33 and t5,t5,t4\n" + " 60: 000f2f03 lw t5,0(t5)\n" + " 64: 000f0e93 mv t4,t5\n" + " 68: 03f00e13 li t3,63\n" + " 6c: 01cefeb3 and t4,t4,t3\n" + " 70: 01400e13 li t3,20\n" + " 74: 01ce8c63 beq t4,t3,0x8c\n" + " 78: 04c62f83 lw t6,76(a2)\n" + " 7c: 07c00613 li a2,124\n" + " 80: 18b00693 li a3,395\n" + " 84: 000f0713 mv a4,t5\n" + " 88: 000f8067 jr t6\n" + " 8c: 0005ae83 lw t4,0(a1)\n" + " 90: 000eae83 lw t4,0(t4)\n" + " 94: 018e9e93 slli t4,t4,0x18\n" + " 98: 30000f13 li t5,768\n" + " 9c: 00000013 nop\n" + " a0: 01eeeeb3 or t4,t4,t5\n" + " a4: 05d52e23 sw t4,92(a0)\n" + " a8: 08000f13 li t5,128\n" + " ac: 00cf0f33 add t5,t5,a2\n" + " b0: 000f2f03 lw t5,0(t5)\n" + " b4: 000f8613 mv a2,t6\n" + " b8: 00000693 li a3,0\n" + " bc: 000f0067 jr t5" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2861,8 +2849,8 @@ alloc_boxed_integer_fragment_small_test() -> " c: 00a12223 sw a0,4(sp)\n" " 10: 00b12423 sw a1,8(sp)\n" " 14: 00c12623 sw a2,12(sp)\n" - " 18: 02a00613 li a2,42\n" - " 1c: 00000693 li a3,0\n" + " 18: 02a00593 li a1,42\n" + " 1c: 00000613 li a2,0\n" " 20: 000f80e7 jalr t6\n" " 24: 00050f93 mv t6,a0\n" " 28: 00012083 lw ra,0(sp)\n" @@ -2892,10 +2880,10 @@ alloc_boxed_integer_fragment_large_test() -> " c: 00a12223 sw a0,4(sp)\n" " 10: 00b12423 sw a1,8(sp)\n" " 14: 00c12623 sw a2,12(sp)\n" - " 18: 9abce637 lui a2,0x9abce\n" - " 1c: ef060613 addi a2,a2,-272\n" - " 20: 123456b7 lui a3,0x12345\n" - " 24: 67868693 addi a3,a3,1656\n" + " 18: 9abce5b7 lui a1,0x9abce\n" + " 1c: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" + " 20: 12345637 lui a2,0x12345\n" + " 24: 67860613 addi a2,a2,1656 # 0x12345678\n" " 28: 000f80e7 jalr t6\n" " 2c: 00050f93 mv t6,a0\n" " 30: 00012083 lw ra,0(sp)\n" @@ -2937,7 +2925,7 @@ call_func_ptr_stack_alignment_test() -> " 2c: 01f12c23 sw t6,24(sp)\n" " 30: 02a00513 li a0,42\n" " 34: 000e00e7 jalr t3\n" - " 38: fea12e23 sw a0,-4(sp)\n" + " 38: 00050e13 mv t3,a0\n" " 3c: 00012083 lw ra,0(sp)\n" " 40: 00412503 lw a0,4(sp)\n" " 44: 00812583 lw a1,8(sp)\n" @@ -2996,7 +2984,7 @@ call_func_ptr_register_exhaustion_test_() -> " 40: 00300693 li a3,3\n" " 44: 00100713 li a4,1\n" " 48: 000f00e7 jalr t5\n" - " 4c: fea12e23 sw a0,-4(sp)\n" + " 4c: 00050f13 mv t5,a0\n" " 50: 00012083 lw ra,0(sp)\n" " 54: 00412503 lw a0,4(sp)\n" " 58: 00812583 lw a1,8(sp)\n" @@ -3037,7 +3025,7 @@ call_func_ptr_register_exhaustion_test_() -> " 40: 00100693 li a3,1\n" " 44: 00030713 mv a4,t1\n" " 48: 000f00e7 jalr t5\n" - " 4c: fea12e23 sw a0,-4(sp)\n" + " 4c: 00050f13 mv t5,a0\n" " 50: 00012083 lw ra,0(sp)\n" " 54: 00412503 lw a0,4(sp)\n" " 58: 00812583 lw a1,8(sp)\n" @@ -3078,7 +3066,7 @@ call_func_ptr_register_exhaustion_test_() -> " 40: 00030693 mv a3,t1\n" " 44: 00100713 li a4,1\n" " 48: 000f00e7 jalr t5\n" - " 4c: fea12e23 sw a0,-4(sp)\n" + " 4c: 00050f13 mv t5,a0\n" " 50: 00012083 lw ra,0(sp)\n" " 54: 00412503 lw a0,4(sp)\n" " 58: 00812583 lw a1,8(sp)\n" @@ -3255,18 +3243,17 @@ add_beam_test() -> State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), State15 = ?BACKEND:update_branches(State14), Stream = ?BACKEND:stream(State15), - riscv32_helper:disassemble(Stream), Dump = << % jump table (new 8-byte format) " 0: 00000697 auipc a3,0x0\n" - " 4: 11868067 jr 280(a3)\n" + " 4: 10c68067 jr 268(a3) # 0x10c\n" " 8: 00000697 auipc a3,0x0\n" - " c: 01868067 jr 24(a3)\n" + " c: 01868067 jr 24(a3) # 0x20\n" " 10: 00000697 auipc a3,0x0\n" - " 14: 05468067 jr 84(a3)\n" + " 14: 04c68067 jr 76(a3) # 0x5c\n" " 18: 00000697 auipc a3,0x0\n" - " 1c: 0f868067 jr 248(a3)\n" + " 1c: 0ec68067 jr 236(a3) # 0x104\n" % label 1 % {move,{integer,9},{x,1}}. " 20: 09f00f93 li t6,159\n" @@ -3278,72 +3265,69 @@ add_beam_test() -> " 30: 0085af83 lw t6,8(a1)\n" " 34: ffff8f93 addi t6,t6,-1\n" " 38: 01f5a423 sw t6,8(a1)\n" - " 3c: 000f8a63 beqz t6,0x50\n" - " 40: 0240006f j 0x64\n" + " 3c: 000f8663 beqz t6,0x48\n" + " 40: 01c0006f j 0x5c\n" " 44: 00000013 nop\n" - " 48: 00000013 nop\n" - " 4c: 00000013 nop\n" - " 50: 00000f97 auipc t6,0x0\n" - " 54: 014f8f93 addi t6,t6,20\n" - " 58: 01f5a223 sw t6,4(a1)\n" - " 5c: 00862f83 lw t6,8(a2)\n" - " 60: 000f8067 jr t6\n" + " 48: 00000f97 auipc t6,0x0\n" + " 4c: 014f8f93 addi t6,t6,20 # 0x5c\n" + " 50: 01f5a223 sw t6,4(a1)\n" + " 54: 00862f83 lw t6,8(a2)\n" + " 58: 000f8067 jr t6\n" % label 2 % {allocate,1,1}. - " 64: 01462f83 lw t6,20(a2)\n" - " 68: ff010113 addi sp,sp,-16\n" - " 6c: 00112023 sw ra,0(sp)\n" - " 70: 00a12223 sw a0,4(sp)\n" - " 74: 00b12423 sw a1,8(sp)\n" - " 78: 00c12623 sw a2,12(sp)\n" - " 7c: 00100613 li a2,1\n" - " 80: 00000693 li a3,0\n" - " 84: 00100713 li a4,1\n" - " 88: 000f80e7 jalr t6\n" - " 8c: 00050f93 mv t6,a0\n" - " 90: 00012083 lw ra,0(sp)\n" - " 94: 00412503 lw a0,4(sp)\n" - " 98: 00812583 lw a1,8(sp)\n" - " 9c: 00c12603 lw a2,12(sp)\n" - " a0: 01010113 addi sp,sp,16\n" - " a4: 01ff9f13 slli t5,t6,0x1f\n" - " a8: 000f4863 bltz t5,0xb8\n" - " ac: 01862f83 lw t6,24(a2)\n" - " b0: 0b000613 li a2,176\n" - " b4: 000f8067 jr t6\n" + " 5c: 01462f83 lw t6,20(a2)\n" + " 60: ff010113 addi sp,sp,-16\n" + " 64: 00112023 sw ra,0(sp)\n" + " 68: 00a12223 sw a0,4(sp)\n" + " 6c: 00b12423 sw a1,8(sp)\n" + " 70: 00c12623 sw a2,12(sp)\n" + " 74: 00100613 li a2,1\n" + " 78: 00000693 li a3,0\n" + " 7c: 00100713 li a4,1\n" + " 80: 000f80e7 jalr t6\n" + " 84: 00050f93 mv t6,a0\n" + " 88: 00012083 lw ra,0(sp)\n" + " 8c: 00412503 lw a0,4(sp)\n" + " 90: 00812583 lw a1,8(sp)\n" + " 94: 00c12603 lw a2,12(sp)\n" + " 98: 01010113 addi sp,sp,16\n" + " 9c: 01ff9f13 slli t5,t6,0x1f\n" + " a0: 000f4863 bltz t5,0xb0\n" + " a4: 01862f83 lw t6,24(a2)\n" + " a8: 0a800613 li a2,168\n" + " ac: 000f8067 jr t6\n" % {init_yregs,{list,[{y,0}]}}. %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), - " b8: 03b00f13 li t5,59\n" - " bc: 01452f83 lw t6,20(a0)\n" - " c0: 01efa023 sw t5,0(t6)\n" + " b0: 03b00f13 li t5,59\n" + " b4: 01452f83 lw t6,20(a0)\n" + " b8: 01efa023 sw t5,0(t6)\n" % {call,1,{f,3}} %% call_or_schedule_next(State9, 3), - " c4: 0005af03 lw t5,0(a1)\n" - " c8: 000f2f03 lw t5,0(t5)\n" - " cc: 018f1f13 slli t5,t5,0x18\n" - " d0: 44000f93 li t6,1088\n" - " d4: 01ff6f33 or t5,t5,t6\n" - " d8: 05e52e23 sw t5,92(a0)\n" - " dc: 0085af83 lw t6,8(a1)\n" - " e0: ffff8f93 addi t6,t6,-1\n" - " e4: 01f5a423 sw t6,8(a1)\n" - " e8: 000f8a63 beqz t6,0xfc\n" - " ec: 0240006f j 0x110\n" - " f0: 00000013 nop\n" - " f4: 00000013 nop\n" - " f8: 00000013 nop\n" - " fc: 00000f97 auipc t6,0x0\n" - " 100: 014f8f93 addi t6,t6,20\n" - " 104: 01f5a223 sw t6,4(a1)\n" - " 108: 00862f83 lw t6,8(a2)\n" - " 10c: 000f8067 jr t6\n" + " bc: 0005af03 lw t5,0(a1)\n" + " c0: 000f2f03 lw t5,0(t5)\n" + " c4: 018f1f13 slli t5,t5,0x18\n" + " c8: 41000f93 li t6,1040\n" + " cc: 00000013 nop\n" + " d0: 01ff6f33 or t5,t5,t6\n" + " d4: 05e52e23 sw t5,92(a0)\n" + " d8: 0085af83 lw t6,8(a1)\n" + " dc: ffff8f93 addi t6,t6,-1\n" + " e0: 01f5a423 sw t6,8(a1)\n" + " e4: 000f8663 beqz t6,0xf0\n" + " e8: 01c0006f j 0x104\n" + " ec: 00000013 nop\n" + " f0: 00000f97 auipc t6,0x0\n" + " f4: 014f8f93 addi t6,t6,20 # 0x104\n" + " f8: 01f5a223 sw t6,4(a1)\n" + " fc: 00862f83 lw t6,8(a2)\n" + " 100: 000f8067 jr t6\n" %% (continuation) % label 3 - " 110: 00462f83 lw t6,4(a2)\n" - " 114: 000f8067 jr t6\n" + " 104: 00462f83 lw t6,4(a2)\n" + " 108: 000f8067 jr t6\n" % label 0 - " 118: 00462f83 lw t6,4(a2)\n" - " 11c: 000f8067 jr t6\n" + " 10c: 00462f83 lw t6,4(a2)\n" + " 110: 000f8067 jr t6\n" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 3f8f9407c5286dbb06243028639d5075e1e300a5 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 22:07:29 +0200 Subject: [PATCH 90/97] riscv32: fix compilation with JIT disabled on esp32 Signed-off-by: Paul Guyot --- src/platforms/esp32/CMakeLists.txt | 5 +++-- src/platforms/esp32/components/libatomvm/CMakeLists.txt | 9 +++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 1212a15fe9..4ddc362924 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,9 +51,8 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -project(atomvm-esp32) - # JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# Configuration comes from idf.py menuconfig (KConfig), not CMake options if(CONFIG_JIT_ENABLED) if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") set(AVM_DISABLE_JIT OFF) @@ -68,6 +67,8 @@ else() message(STATUS "JIT compilation disabled") endif() +project(atomvm-esp32) + # esp-idf does not use compile_feature but instead sets version in # c_compile_options # Ensure project is compiled with at least C11 diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index c8e3ede411..00595afeef 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -31,7 +31,12 @@ if (HAVE_PLATFORM_ATOMIC_H) target_include_directories(libAtomVM PUBLIC ../avm_sys/) endif() -target_link_libraries(${COMPONENT_LIB} - INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") +if (AVM_DISABLE_JIT) + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +else() + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") +endif() target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11) From 49c26b4c80ce04c6cf57535fced8e2e58a126b8a Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 21 Oct 2025 08:00:11 +0200 Subject: [PATCH 91/97] riscv32: enable JIT for pico2, add it to CI Signed-off-by: Paul Guyot --- .github/workflows/pico-build.yaml | 34 ++++++++++++++++++++++++------- src/platforms/rp2/CMakeLists.txt | 7 ++++++- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pico-build.yaml b/.github/workflows/pico-build.yaml index 9cf01d045a..2e9de47109 100644 --- a/.github/workflows/pico-build.yaml +++ b/.github/workflows/pico-build.yaml @@ -41,7 +41,17 @@ jobs: strategy: matrix: board: ["pico", "pico_w", "pico2"] + platform: [""] language: ["cpp"] + jit: ["", "-DAVM_DISABLE_JIT=OFF"] + include: + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "" + + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "-DAVM_DISABLE_JIT=OFF" steps: - name: Checkout repo @@ -57,6 +67,16 @@ jobs: libnewlib-arm-none-eabi libstdc++-arm-none-eabi-newlib \ erlang-base erlang-dev erlang-dialyzer erlang-eunit rebar3 + - name: Install riscv32 toolchain + if: matrix.platform == "-DPICO_PLATFORM=rp2350-riscv" + run: | + sudo mkdir -p /opt + cd /opt + sudo wget https://github.com/raspberrypi/pico-sdk-tools/releases/download/v2.2.0-3/riscv-toolchain-15-x86_64-lin.tar.gz + sudo tar xzf riscv-toolchain-15-x86_64-lin.tar.gz + ls /opt + echo "/opt/riscv-toolchain-15-x86_64-lin/bin" >> $GITHUB_PATH + - name: "Git config safe.directory for codeql" run: git config --global --add safe.directory /__w/AtomVM/AtomVM @@ -74,7 +94,7 @@ jobs: set -euo pipefail mkdir build cd build - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.platform }} ${{ matrix.jit }} ninja - name: "Perform CodeQL Analysis" @@ -97,7 +117,7 @@ jobs: mkdir build.nosmp cd build.nosmp # TODO: fix all warnings and enable -DAVM_WARNINGS_ARE_ERRORS=ON - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} -DAVM_DISABLE_SMP=1 + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.jit }} -DAVM_DISABLE_SMP=1 cmake --build . --target=rp2_tests - name: Run tests with rp2040js @@ -112,7 +132,7 @@ jobs: npx tsx run-tests.ts ../build.nosmp/tests/rp2_tests.uf2 ../build.nosmp/tests/test_erl_sources/rp2_test_modules.uf2 - name: Build atomvmlib.uf2 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | set -euo pipefail @@ -122,7 +142,7 @@ jobs: make atomvmlib-${{ matrix.board }}.uf2 - name: Rename AtomVM and write sha256sum - if: startsWith(github.ref, 'refs/tags/') + if: startsWith(github.ref, 'refs/tags/') && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd src/platforms/rp2/build @@ -137,7 +157,7 @@ jobs: popd - name: Rename atomvmlib and write sha256sum - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd build/libs @@ -148,7 +168,7 @@ jobs: - name: Release (Pico & Pico2) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true @@ -160,7 +180,7 @@ jobs: - name: Release (PicoW) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index 6dbcdf7bb8..0732f6e490 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -69,8 +69,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH "armv6m") endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^hazard3$") + # Pico2 RISC-V processor (Hazard3) + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "riscv32") + endif() else() - # Typically riscv is not supported yet + # Other processors not supported yet if (NOT AVM_DISABLE_JIT) message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE) From 03d4c834129a3f891e316146ec17f53128be92cb Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 21 Oct 2025 08:00:39 +0200 Subject: [PATCH 92/97] Add missing define for HAVE_GETCWD for rp2 Signed-off-by: Paul Guyot --- src/platforms/rp2/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index 0732f6e490..86e5e6683a 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -54,6 +54,8 @@ set(HAVE_MKFIFO "" CACHE INTERNAL "Have symbol mkfifo" FORCE) set(HAVE_UNLINK "" CACHE INTERNAL "Have symbol unlink" FORCE) # Likewise with EXECVE set(HAVE_EXECVE "" CACHE INTERNAL "Have symbol execve" FORCE) +# getcwd is defined in newlib header but not implemented +set(HAVE_GETCWD "" CACHE INTERNAL "Have symbol getcwd" FORCE) # Options that make sense for this platform option(AVM_DISABLE_SMP "Disable SMP support." OFF) From caff9a52b6af5ef0f0de5394c9fc0bcce9e65942 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 20 Oct 2025 09:06:59 +0200 Subject: [PATCH 93/97] riscv32: Implement and use C extension Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 110 +- libs/jit/src/jit_riscv32_asm.erl | 840 ++++++- tests/libs/jit/jit_riscv32_asm_tests.erl | 443 +++- tests/libs/jit/jit_riscv32_tests.erl | 2839 +++++++++++----------- tests/libs/jit/jit_tests_common.erl | 3 +- 5 files changed, 2717 insertions(+), 1518 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 83fed2585c..191fc3cdc8 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -317,7 +317,7 @@ flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> %%----------------------------------------------------------------------------- -spec debugger(state()) -> state(). debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> - Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:bkpt(0)), + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:c_ebreak()), State#state{stream = Stream1}. %%----------------------------------------------------------------------------- @@ -416,12 +416,7 @@ jump_table0( % Create jump table entry: AUIPC + JALR (8 bytes total) % This will be patched later in update_branches/2 Offset = StreamModule:offset(Stream0), - % Placeholder: Load PC + upper20 bits - I1 = jit_riscv32_asm:auipc(a3, 0), - % Placeholder: Jump to a3 + lower12 bits - I2 = jit_riscv32_asm:jalr(zero, a3, 0), - - JumpEntry = <>, + JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, Stream1 = StreamModule:append(Stream0, JumpEntry), % Record both AUIPC and JALR offsets for patching @@ -451,22 +446,37 @@ update_branches( Rel = LabelOffset - Offset, NewInstr = case Type of - {adr, Reg} when Rel rem 4 =:= 0 -> pc_relative_address(Reg, Rel); - {adr, Reg} when Rel rem 4 =:= 2 -> pc_relative_address(Reg, Rel + 2); - {far_branch, Size, TempReg} -> + {adr, Reg} when Rel rem 4 =:= 0 -> + % Generate pc_relative_address and pad to 8 bytes with NOP + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {adr, Reg} when Rel rem 4 =:= 2; Rel rem 4 =:= -2 -> + % Handle 2-byte aligned offsets and pad to 8 bytes + % Handle both positive and negative offsets (Erlang rem can be negative) + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {far_branch, TempReg} -> % Check if branch can now be optimized to near branch if Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 -> % RISC-V jal has ±1MB range % Optimize to near branch: jal + nops to fill original size DirectBranch = jit_riscv32_asm:jal(zero, Rel), - % Fill remaining bytes with NOPs (RISC-V instructions are 4 bytes) - NopCount = (Size - 4) div 4, - Nops = << - <<(jit_riscv32_asm:nop())/binary>> - || _ <- lists:seq(1, NopCount) - >>, - <>; + case byte_size(DirectBranch) of + 2 -> + <>; + 4 -> + <> + end; true -> % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes) % Split the relative offset into upper 20 bits and lower 12 bits @@ -474,7 +484,11 @@ update_branches( Lo12 = Rel - (Hi20 bsl 12), I1 = jit_riscv32_asm:auipc(TempReg, Hi20), I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), - <> + Entry = <>, + case byte_size(Entry) of + 6 -> <>; + 8 -> Entry + end end; jump_table_auipc_jalr -> % Calculate PC-relative offset from AUIPC instruction to target @@ -498,7 +512,12 @@ update_branches( % Encode AUIPC and JALR with computed offsets I1 = jit_riscv32_asm:auipc(a3, Upper20), I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), - <> + % Map to 8 bytes + JumpTableEntry = <>, + case byte_size(JumpTableEntry) of + 6 -> <>; + 8 -> JumpTableEntry + end end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), update_branches(State#state{stream = Stream1, branches = BranchesT}). @@ -783,13 +802,10 @@ branch_to_label_code( % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) % Placeholder: auipc TempReg, 0 - I1 = jit_riscv32_asm:auipc(TempReg, 0), % Placeholder: jalr zero, TempReg, 0 - I2 = jit_riscv32_asm:jalr(zero, TempReg, 0), - CodeBlock = <>, - SequenceSize = byte_size(CodeBlock), + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, % Add relocation entry - Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, + Reloc = {Label, Offset, {far_branch, TempReg}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code( @@ -799,13 +815,10 @@ branch_to_label_code( % Far branch sequence using PC-relative auipc + jalr (8 bytes) % Placeholder: auipc t6, 0 - I1 = jit_riscv32_asm:auipc(t6, 0), % Placeholder: jalr zero, t6, 0 - I2 = jit_riscv32_asm:jalr(zero, t6, 0), - CodeBlock = <>, - SequenceSize = byte_size(CodeBlock), + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, % Add relocation entry - Reloc = {Label, Offset, {far_branch, SequenceSize, t6}}, + Reloc = {Label, Offset, {far_branch, t6}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> @@ -2356,8 +2369,9 @@ set_continuation_to_label( % resolved to point directly to the label's actual address (not the jump table entry) Offset = StreamModule:offset(Stream0), % Emit placeholder for pc_relative_address (auipc + addi) + % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming % The relocation will replace these with the correct offset - I1 = pc_relative_address(Temp, 4), + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, Reloc = {Label, Offset, {adr, Temp}}, % Store continuation (jit_state is in a1) I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), @@ -2379,7 +2393,8 @@ set_continuation_to_offset( ) -> OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), - I1 = pc_relative_address(Temp, 4), + % Reserve 8 bytes with all-1s placeholder for flash programming + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, Reloc = {OffsetRef, Offset, {adr, Temp}}, % Store continuation (jit_state is in a1) I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), @@ -2659,8 +2674,11 @@ decrement_reductions_and_maybe_schedule_next( NewI5 = case pc_relative_address(Temp, NewI5Offset) of I when byte_size(I) =:= 4 -> - % Only auipc, pad with NOP + % Only auipc, pad with NOP (4 bytes) <>; + I when byte_size(I) =:= 6 -> + % auipc + c.addi, pad with c.nop (2 bytes) + <>; I when byte_size(I) =:= 8 -> % auipc + addi, no padding needed I @@ -2798,10 +2816,11 @@ rewrite_cp_offset( CPValue = NewOffset bsl 2, NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue), % We reserved 8 bytes (2 instructions) for the CP value - % If li generates only 4 bytes, pad with a NOP to maintain alignment + % Pad with NOP if needed to maintain alignment PaddedInstr = case byte_size(NewMoveInstr) of 4 -> <>; + 6 -> <>; 8 -> NewMoveInstr end, Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), @@ -2841,13 +2860,22 @@ return_labels_and_lines( || {Label, LabelOffset} <- Labels, is_integer(Label) ]), - I1 = pc_relative_address(a0, 12), I2 = jit_riscv32_asm:ret(), + % Assume total size is 10 bytes (8-byte I1 + 2-byte c.ret) + % If actual is 8 bytes (6-byte I1 + 2-byte c.ret), we'll pad with 2 bytes + I1 = pc_relative_address(a0, 10), + Prologue = <>, + ProloguePadded = + case byte_size(Prologue) of + 10 -> Prologue; + % 2-byte padding + 8 -> <> + end, LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, LinesTable = <<<> || {Line, Offset} <- SortedLines>>, Stream1 = StreamModule:append( Stream0, - <> ), State#state{stream = Stream1}. @@ -3005,7 +3033,7 @@ args_regs(Args) -> ). %%----------------------------------------------------------------------------- -%% @doc Add a label at the current offset. Eventually align it with a nop. +%% @doc Add a label at the current offset. %% @end %% @param State current backend state %% @param Label the label number or reference @@ -3014,15 +3042,7 @@ args_regs(Args) -> -spec add_label(state(), integer() | reference()) -> state(). add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> Offset0 = StreamModule:offset(Stream0), - {State1, Offset1} = - if - Offset0 rem 4 =:= 0 -> - {State0, Offset0}; - true -> - Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:nop()), - {State0#state{stream = Stream1}, Offset0 + 2} - end, - add_label(State1, Label, Offset1). + add_label(State0, Label, Offset0). %%----------------------------------------------------------------------------- %% @doc Add a label at a specific offset diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl index 64d42c10ad..25bf1ff574 100644 --- a/libs/jit/src/jit_riscv32_asm.erl +++ b/libs/jit/src/jit_riscv32_asm.erl @@ -87,9 +87,40 @@ call/2, % M extension (multiply/divide) mul/3, - % System instructions - bkpt/1, - ebreak/0 + % C extension (compressed) - arithmetic/logical + c_add/2, + c_sub/2, + c_and/2, + c_or/2, + c_xor/2, + c_mv/2, + % C extension - immediate instructions + c_addi/2, + c_andi/2, + c_li/2, + c_lui/2, + c_addi16sp/1, + c_addi4spn/2, + % C extension - shift instructions + c_slli/2, + c_srli/2, + c_srai/2, + % C extension - load/store + c_lw/2, + c_sw/2, + c_lwsp/2, + c_swsp/2, + % C extension - branches and jumps + c_beqz/2, + c_bnez/2, + c_j/1, + c_jal/1, + c_jr/1, + c_jalr/1, + % C extension - system instructions + c_ebreak/0, + % C extension - pseudo-instructions + c_nop/0 ]). -export_type([ @@ -264,6 +295,9 @@ encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) -> %% ADD - Add %% rd = rs1 + rs2 -spec add(riscv_register(), riscv_register(), riscv_register()) -> binary(). +add(Rd, Rs1, Rs2) when Rd =:= Rs1, Rd =/= zero, Rs2 =/= zero -> + % Use c.add when rd == rs1 and neither register is zero + c_add(Rd, Rs2); add(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000 encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00). @@ -271,6 +305,11 @@ add(Rd, Rs1, Rs2) -> %% SUB - Subtract %% rd = rs1 - rs2 -spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sub(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_sub(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20) + end; sub(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000 encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20). @@ -278,6 +317,11 @@ sub(Rd, Rs1, Rs2) -> %% AND - Bitwise AND %% rd = rs1 & rs2 -spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +and_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_and(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00) + end; and_(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000 encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00). @@ -285,6 +329,11 @@ and_(Rd, Rs1, Rs2) -> %% OR - Bitwise OR %% rd = rs1 | rs2 -spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_or(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00) + end; or_(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000 encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00). @@ -298,6 +347,11 @@ or_(Rd, Rs) -> %% XOR - Bitwise XOR %% rd = rs1 ^ rs2 -spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +xor_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_xor(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00) + end; xor_(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000 encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00). @@ -372,6 +426,9 @@ encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) -> %% ADDI - Add Immediate %% rd = rs1 + imm -spec addi(riscv_register(), riscv_register(), integer()) -> binary(). +addi(Rd, Rs1, Imm) when Rd =:= Rs1, Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.addi when rd == rs1, rd != zero, and imm fits in 6 bits (signed) + c_addi(Rd, Imm); addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> % Opcode: 0010011 (0x13), Funct3: 000 encode_i_type(16#13, Rd, 16#0, Rs1, Imm); @@ -381,6 +438,11 @@ addi(_Rd, _Rs1, Imm) -> %% ANDI - AND Immediate %% rd = rs1 & imm -spec andi(riscv_register(), riscv_register(), integer()) -> binary(). +andi(Rd, Rs1, Imm) when Rd =:= Rs1, Imm >= -32, Imm =< 31 -> + case is_compressed_reg(Rd) of + true -> c_andi(Rd, Imm); + false -> encode_i_type(16#13, Rd, 16#7, Rs1, Imm) + end; andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> % Opcode: 0010011 (0x13), Funct3: 111 encode_i_type(16#13, Rd, 16#7, Rs1, Imm); @@ -430,6 +492,9 @@ sltiu(_Rd, _Rs1, Imm) -> %% SLLI - Shift Left Logical Immediate %% rd = rs1 << shamt -spec slli(riscv_register(), riscv_register(), 0..31) -> binary(). +slli(Rd, Rs1, Shamt) when Rd =:= Rs1, Rd =/= zero, Shamt >= 1, Shamt =< 31 -> + % Use c.slli when rd == rs1, rd != zero, and shamt != 0 (c.slli with shamt=0 is reserved) + c_slli(Rd, Shamt); slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000 encode_i_type(16#13, Rd, 16#1, Rs1, Shamt); @@ -439,6 +504,11 @@ slli(_Rd, _Rs1, Shamt) -> %% SRLI - Shift Right Logical Immediate %% rd = rs1 >> shamt (zero-extend) -spec srli(riscv_register(), riscv_register(), 0..31) -> binary(). +srli(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> c_srli(Rd, Shamt); + false -> encode_i_type(16#13, Rd, 16#5, Rs1, Shamt) + end; srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000 encode_i_type(16#13, Rd, 16#5, Rs1, Shamt); @@ -448,6 +518,14 @@ srli(_Rd, _Rs1, Shamt) -> %% SRAI - Shift Right Arithmetic Immediate %% rd = rs1 >> shamt (sign-extend) -spec srai(riscv_register(), riscv_register(), 0..31) -> binary(). +srai(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> + c_srai(Rd, Shamt); + false -> + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30) + end; srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000 % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI @@ -470,6 +548,15 @@ lw(Rd, Rs1) when is_atom(Rs1) -> lw(Rd, Rs1, 0). -spec lw(riscv_register(), riscv_register(), integer()) -> binary(). +lw(Rd, sp, Offset) when Rd =/= zero, Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.lwsp for loads from sp with aligned offset in range + c_lwsp(Rd, Offset); +lw(Rd, Rs1, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.lw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs1) of + true -> c_lw(Rd, {Rs1, Offset}); + false -> encode_i_type(16#03, Rd, 16#2, Rs1, Offset) + end; lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> % Opcode: 0000011 (0x03), Funct3: 010 encode_i_type(16#03, Rd, 16#2, Rs1, Offset); @@ -585,6 +672,15 @@ sw(Rs2, Rs1) when is_atom(Rs1) -> sw(Rs1, Rs2, 0). -spec sw(riscv_register(), riscv_register(), integer()) -> binary(). +sw(sp, Rs2, Offset) when Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.swsp for stores to sp with aligned offset in range + c_swsp(Rs2, Offset); +sw(Rs1, Rs2, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.sw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rs1) andalso is_compressed_reg(Rs2) of + true -> c_sw(Rs2, {Rs1, Offset}); + false -> encode_s_type(16#23, 16#2, Rs1, Rs2, Offset) + end; sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> % Opcode: 0100011 (0x23), Funct3: 010 encode_s_type(16#23, 16#2, Rs1, Rs2, Offset); @@ -674,6 +770,12 @@ encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) -> %% BEQ - Branch if Equal %% if (rs1 == rs2) pc += offset -spec beq(riscv_register(), riscv_register(), integer()) -> binary(). +beq(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.beqz when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_beqz(Rs1, Offset); + false -> encode_b_type(16#63, 16#0, Rs1, zero, Offset) + end; beq(Rs1, Rs2, Offset) when Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 -> @@ -687,6 +789,12 @@ beq(_Rs1, _Rs2, Offset) -> %% BNE - Branch if Not Equal %% if (rs1 != rs2) pc += offset -spec bne(riscv_register(), riscv_register(), integer()) -> binary(). +bne(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.bnez when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_bnez(Rs1, Offset); + false -> encode_b_type(16#63, 16#1, Rs1, zero, Offset) + end; bne(Rs1, Rs2, Offset) when Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 -> @@ -809,6 +917,12 @@ encode_u_type(Opcode, Rd, Imm) -> %% JAL - Jump and Link %% rd = pc + 4; pc += offset -spec jal(riscv_register(), integer()) -> binary(). +jal(zero, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.j when rd is zero (no link) and offset fits + c_j(Offset); +jal(ra, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.jal when rd is ra and offset fits (RV32C only) + c_jal(Offset); jal(Rd, Offset) when Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0 -> @@ -822,6 +936,12 @@ jal(_Rd, Offset) -> %% JALR - Jump and Link Register %% rd = pc + 4; pc = (rs1 + offset) & ~1 -spec jalr(riscv_register(), riscv_register(), integer()) -> binary(). +jalr(zero, Rs1, 0) when Rs1 =/= zero -> + % Use c.jr for jump to register without link (rd=zero, offset=0) + c_jr(Rs1); +jalr(ra, Rs1, 0) when Rs1 =/= zero -> + % Use c.jalr for jump to register with link (rd=ra, offset=0) + c_jalr(Rs1); jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> % Opcode: 1100111 (0x67), Funct3: 000 encode_i_type(16#67, Rd, 16#0, Rs1, Offset); @@ -841,6 +961,9 @@ jalr(Rd, Rs1) -> %% LUI - Load Upper Immediate %% rd = imm << 12 -spec lui(riscv_register(), integer()) -> binary(). +lui(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31, Imm =/= 0 -> + % Use c.lui when rd != zero and imm fits in 6 bits (signed) and imm != 0 + c_lui(Rd, Imm); lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> % Opcode: 0110111 (0x37) encode_u_type(16#37, Rd, Imm bsl 12); @@ -872,6 +995,9 @@ nop() -> %% For small immediates (-2048 to 2047): addi rd, x0, imm %% For larger immediates: lui + addi sequence -spec li(riscv_register(), integer()) -> binary(). +li(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.li when rd != zero and imm fits in 6 bits (signed) + c_li(Rd, Imm); li(Rd, Imm) when Imm >= -2048, Imm =< 2047 -> % Small immediate: addi rd, x0, imm addi(Rd, zero, Imm); @@ -913,8 +1039,11 @@ li(_Rd, Imm) -> error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}). %% MV - Move (copy register) -%% Expands to: addi rd, rs, 0 +%% Expands to: addi rd, rs, 0 or c.mv rd, rs -spec mv(riscv_register(), riscv_register()) -> binary(). +mv(Rd, Rs) when Rd =/= zero, Rs =/= zero -> + % Use c.mv when both rd and rs are not zero + c_mv(Rd, Rs); mv(Rd, Rs) -> addi(Rd, Rs, 0). @@ -975,21 +1104,6 @@ call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF -> call(_Rd, Offset) -> error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}). -%% EBREAK - Environment Breakpoint -%% Causes a breakpoint exception to be raised. -%% This is the RISC-V equivalent of ARM's BKPT instruction. -%% Encoding: 0x00100073 --spec ebreak() -> binary(). -ebreak() -> - <<16#73, 16#00, 16#10, 16#00>>. - -%% BKPT - Breakpoint (for ARM compatibility) -%% In RISC-V, this is implemented as EBREAK. -%% The immediate parameter is ignored for compatibility with ARM. --spec bkpt(integer()) -> binary(). -bkpt(_Imm) -> - ebreak(). - %% MUL - Multiply (RV32M extension) %% Multiplies rs1 by rs2 and places the lower 32 bits in rd %% Format: mul rd, rs1, rs2 @@ -998,3 +1112,691 @@ bkpt(_Imm) -> mul(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001 encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01). + +%%----------------------------------------------------------------------------- +%% C Extension (RV32C) - Compressed Instructions +%%----------------------------------------------------------------------------- +%% The C extension adds 16-bit compressed instructions to reduce code size. +%% All compressed instructions are 16 bits (2 bytes) and use a different +%% encoding format from the base 32-bit instructions. +%% +%% Register encoding for compressed instructions: +%% - Some instructions use the full 5-bit register encoding (x0-x31) +%% - Others use 3-bit encoding for registers x8-x15 (s0, s1, a0-a5) +%% This is called the "compressed register set" or "C register set" +%% +%% Instruction formats: +%% - CR (Register): funct4 | rd/rs1 | rs2 | op +%% - CI (Immediate): funct3 | imm | rd/rs1 | imm | op +%% - CSS (Stack Store): funct3 | imm | rs2 | op +%% - CIW (Wide Immediate): funct3 | imm | rd' | op +%% - CL (Load): funct3 | imm | rs1' | imm | rd' | op +%% - CS (Store): funct3 | imm | rs1' | imm | rs2' | op +%% - CA (Arithmetic): funct6 | rd'/rs1' | funct2 | rs2' | op +%% - CB (Branch): funct3 | offset | rs1' | offset | op +%% - CJ (Jump): funct3 | jump target | op +%% +%% See: RISC-V Instruction Set Manual, Volume I, Chapter 16 +%%----------------------------------------------------------------------------- + +%% Convert register to compressed register encoding (3 bits for x8-x15) +%% Returns the 3-bit encoding (0-7 maps to x8-x15) +-spec reg_to_c_num(riscv_register()) -> 0..7. +reg_to_c_num(s0) -> 0; +reg_to_c_num(fp) -> 0; +reg_to_c_num(s1) -> 1; +reg_to_c_num(a0) -> 2; +reg_to_c_num(a1) -> 3; +reg_to_c_num(a2) -> 4; +reg_to_c_num(a3) -> 5; +reg_to_c_num(a4) -> 6; +reg_to_c_num(a5) -> 7; +reg_to_c_num(Reg) -> error({register_not_in_compressed_set, Reg, 's0/fp, s1, a0-a5'}). + +%% Check if a register is in the compressed register set (s0/fp, s1, a0-a5) +-spec is_compressed_reg(riscv_register()) -> boolean(). +is_compressed_reg(s0) -> true; +is_compressed_reg(fp) -> true; +is_compressed_reg(s1) -> true; +is_compressed_reg(a0) -> true; +is_compressed_reg(a1) -> true; +is_compressed_reg(a2) -> true; +is_compressed_reg(a3) -> true; +is_compressed_reg(a4) -> true; +is_compressed_reg(a5) -> true; +is_compressed_reg(_) -> false. + +%%----------------------------------------------------------------------------- +%% CR-type instruction encoding (Compressed Register format) +%%----------------------------------------------------------------------------- +%% CR format: funct4 (4) | rd/rs1 (5) | rs2 (5) | op (2) +%% Bits: 15-12 11-7 6-2 1-0 + +-spec encode_cr_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct4 :: 0..15 +) -> binary(). +encode_cr_type(Opcode, Rd, Rs2, Funct4) -> + RdNum = reg_to_num(Rd), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct4 bsl 12) bor + (RdNum bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CI-type instruction encoding (Compressed Immediate format) +%%----------------------------------------------------------------------------- +%% CI format: funct3 (3) | imm[5] (1) | rd/rs1 (5) | imm[4:0] (5) | op (2) +%% Bits: 15-13 12 11-7 6-2 1-0 + +-spec encode_ci_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ci_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Funct3 bsl 13) bor + (Imm5 bsl 12) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CSS-type instruction encoding (Compressed Stack Store format) +%%----------------------------------------------------------------------------- +%% CSS format: funct3 (3) | imm[5:0] (6) | rs2 (5) | op (2) +%% Bits: 15-13 12-7 6-2 1-0 + +-spec encode_css_type( + Opcode :: 0..3, + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_css_type(Opcode, Rs2, Imm, Funct3) -> + Rs2Num = reg_to_num(Rs2), + % Extract immediate bits (typically scaled for word access) + ImmMasked = Imm band 16#3F, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CIW-type instruction encoding (Compressed Wide Immediate format) +%%----------------------------------------------------------------------------- +%% CIW format: funct3 (3) | imm[7:0] (8) | rd' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 + +-spec encode_ciw_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ciw_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#FF, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CL-type instruction encoding (Compressed Load format) +%%----------------------------------------------------------------------------- +%% CL format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rd' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cl_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs1 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cl_type(Opcode, Rd, Rs1, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + Rs1Num = reg_to_c_num(Rs1), + % For LW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CS-type instruction encoding (Compressed Store format) +%%----------------------------------------------------------------------------- +%% CS format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rs2' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cs_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cs_type(Opcode, Rs1, Rs2, Imm, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + Rs2Num = reg_to_c_num(Rs2), + % For SW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CA-type instruction encoding (Compressed Arithmetic format) +%%----------------------------------------------------------------------------- +%% CA format: funct6 (6) | rd'/rs1' (3) | funct2 (2) | rs2' (3) | op (2) +%% Bits: 15-10 9-7 6-5 4-2 1-0 + +-spec encode_ca_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct2 :: 0..3, + Funct6 :: 0..63 +) -> binary(). +encode_ca_type(Opcode, Rd, Rs2, Funct2, Funct6) -> + RdNum = reg_to_c_num(Rd), + Rs2Num = reg_to_c_num(Rs2), + Instr = + (Funct6 bsl 10) bor + (RdNum bsl 7) bor + (Funct2 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CB-type instruction encoding (Compressed Branch format) +%%----------------------------------------------------------------------------- +%% CB format: funct3 (3) | offset (8) | rs1' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 +%% Offset encoding: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + +-spec encode_cb_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Offset :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cb_type(Opcode, Rs1, Offset, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + % Extract offset bits: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + OffsetMasked = Offset band 16#1FF, + Offset8 = (OffsetMasked bsr 8) band 1, + Offset4_3 = (OffsetMasked bsr 3) band 3, + Offset7_6 = (OffsetMasked bsr 6) band 3, + Offset2_1 = (OffsetMasked bsr 1) band 3, + Offset5 = (OffsetMasked bsr 5) band 1, + Instr = + (Funct3 bsl 13) bor + (Offset8 bsl 12) bor + (Offset4_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Offset7_6 bsl 5) bor + (Offset2_1 bsl 3) bor + (Offset5 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CJ-type instruction encoding (Compressed Jump format) +%%----------------------------------------------------------------------------- +%% CJ format: funct3 (3) | jump target (11) | op (2) +%% Bits: 15-13 12-2 1-0 +%% Target encoding: target[11|4|9:8|10|6|7|3:1|5] -> bits [12|11|10:9|8|7|6|5:3|2] + +-spec encode_cj_type(Opcode :: 0..3, Offset :: integer(), Funct3 :: 0..7) -> binary(). +encode_cj_type(Opcode, Offset, Funct3) -> + % Extract offset bits: offset[11|4|9:8|10|6|7|3:1|5] + OffsetMasked = Offset band 16#FFF, + Offset11 = (OffsetMasked bsr 11) band 1, + Offset4 = (OffsetMasked bsr 4) band 1, + Offset9_8 = (OffsetMasked bsr 8) band 3, + Offset10 = (OffsetMasked bsr 10) band 1, + Offset6 = (OffsetMasked bsr 6) band 1, + Offset7 = (OffsetMasked bsr 7) band 1, + Offset3_1 = (OffsetMasked bsr 1) band 7, + Offset5 = (OffsetMasked bsr 5) band 1, + OffsetBits = + (Offset11 bsl 10) bor + (Offset4 bsl 9) bor + (Offset9_8 bsl 7) bor + (Offset10 bsl 6) bor + (Offset6 bsl 5) bor + (Offset7 bsl 4) bor + (Offset3_1 bsl 1) bor + Offset5, + Instr = + (Funct3 bsl 13) bor + (OffsetBits bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical Instructions +%%----------------------------------------------------------------------------- + +%% C.ADD - Compressed Add +%% rd = rd + rs2 (both rd and rs2 are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), op=10 (0x2) +-spec c_add(riscv_register(), riscv_register()) -> binary(). +c_add(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#9). + +%% C.MV - Compressed Move (copy register) +%% rd = rs2 (both are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), op=10 (0x2) +-spec c_mv(riscv_register(), riscv_register()) -> binary(). +c_mv(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#8). + +%% C.SUB - Compressed Subtract +%% rd' = rd' - rs2' (both use 3-bit compressed register encoding) +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=00, op=01 (0x1) +-spec c_sub(riscv_register(), riscv_register()) -> binary(). +c_sub(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#0, 16#23). + +%% C.AND - Compressed Bitwise AND +%% rd' = rd' & rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=11, op=01 (0x1) +-spec c_and(riscv_register(), riscv_register()) -> binary(). +c_and(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#3, 16#23). + +%% C.OR - Compressed Bitwise OR +%% rd' = rd' | rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=10, op=01 (0x1) +-spec c_or(riscv_register(), riscv_register()) -> binary(). +c_or(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#2, 16#23). + +%% C.XOR - Compressed Bitwise XOR +%% rd' = rd' ^ rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=01, op=01 (0x1) +-spec c_xor(riscv_register(), riscv_register()) -> binary(). +c_xor(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#1, 16#23). + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate Instructions +%%----------------------------------------------------------------------------- + +%% C.ADDI - Compressed Add Immediate +%% rd = rd + imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=000, op=01 (0x1) +-spec c_addi(riscv_register(), integer()) -> binary(). +c_addi(Rd, Imm) when Imm >= -32, Imm =< 31, Rd =/= zero -> + encode_ci_type(16#1, Rd, Imm, 16#0); +c_addi(zero, _Imm) -> + error({invalid_compressed_instruction, c_addi, 'rd cannot be zero'}); +c_addi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ANDI - Compressed AND Immediate +%% rd' = rd' & imm (rd' uses 3-bit encoding, imm is 6-bit signed) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, imm[5]=bit12, funct2=10, imm[4:0]=bits 6:2, op=01 +-spec c_andi(riscv_register(), integer()) -> binary(). +c_andi(Rd, Imm) when Imm >= -32, Imm =< 31 -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (16#4 bsl 13) bor + (Imm5 bsl 12) bor + (16#2 bsl 10) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + 16#1, + <>; +c_andi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LI - Compressed Load Immediate +%% rd = imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=010, op=01 (0x1) +-spec c_li(riscv_register(), integer()) -> binary(). +c_li(Rd, Imm) when Imm >= -32, Imm =< 31 -> + encode_ci_type(16#1, Rd, Imm, 16#2); +c_li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LUI - Compressed Load Upper Immediate +%% rd = imm << 12 (rd is full 5-bit register, imm is 6-bit signed non-zero) +%% Format: CI-type +%% Encoding: funct3=011, op=01 (0x1) +-spec c_lui(riscv_register(), integer()) -> binary(). +c_lui(Rd, Imm) when Imm >= -32, Imm =< 31, Imm =/= 0, Rd =/= zero, Rd =/= sp -> + encode_ci_type(16#1, Rd, Imm, 16#3); +c_lui(Rd, _Imm) when Rd =:= zero; Rd =:= sp -> + error({invalid_compressed_instruction, c_lui, 'rd cannot be zero or sp'}); +c_lui(_Rd, 0) -> + error({invalid_compressed_instruction, c_lui, 'immediate cannot be zero'}); +c_lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ADDI16SP - Compressed Add Immediate to SP (scaled by 16) +%% sp = sp + imm (imm is 10-bit signed, must be multiple of 16, non-zero) +%% Format: CI-type (special encoding) +%% Encoding: funct3=011, rd/rs1=sp (x2), op=01 +-spec c_addi16sp(integer()) -> binary(). +c_addi16sp(Imm) when + Imm >= -512, Imm =< 496, (Imm rem 16) =:= 0, Imm =/= 0 +-> + % Immediate encoding: nzimm[9|4|6|8:7|5] -> bits [12|6|5|4:3|2] + ImmMasked = Imm band 16#3FF, + Imm9 = (ImmMasked bsr 9) band 1, + Imm4 = (ImmMasked bsr 4) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Imm8_7 = (ImmMasked bsr 7) band 3, + Imm5 = (ImmMasked bsr 5) band 1, + ImmBits = (Imm9 bsl 5) bor (Imm4 bsl 4) bor (Imm6 bsl 3) bor (Imm8_7 bsl 1) bor Imm5, + encode_ci_type(16#1, sp, ImmBits, 16#3); +c_addi16sp(0) -> + error({invalid_compressed_instruction, c_addi16sp, 'immediate cannot be zero'}); +c_addi16sp(Imm) when (Imm rem 16) =/= 0 -> + error({immediate_not_aligned, Imm, 16}); +c_addi16sp(Imm) -> + error({immediate_out_of_range, Imm, -512, 496}). + +%% C.ADDI4SPN - Compressed Add Immediate (scaled by 4) to SP, store in rd' +%% rd' = sp + imm (imm is 10-bit unsigned, must be multiple of 4, non-zero) +%% Format: CIW-type +%% Encoding: funct3=000, op=00 (0x0) +-spec c_addi4spn(riscv_register(), integer()) -> binary(). +c_addi4spn(Rd, Imm) when + Imm >= 4, Imm =< 1020, (Imm rem 4) =:= 0 +-> + % Immediate encoding: nzuimm[5:4|9:6|2|3] -> bits [12:11|10:7|6|5] + ImmMasked = Imm band 16#3FF, + Imm5_4 = (ImmMasked bsr 4) band 3, + Imm9_6 = (ImmMasked bsr 6) band 15, + Imm2 = (ImmMasked bsr 2) band 1, + Imm3 = (ImmMasked bsr 3) band 1, + ImmBits = (Imm5_4 bsl 6) bor (Imm9_6 bsl 2) bor (Imm2 bsl 1) bor Imm3, + encode_ciw_type(16#0, Rd, ImmBits, 16#0); +c_addi4spn(_Rd, Imm) when Imm =:= 0 -> + error({invalid_compressed_instruction, c_addi4spn, 'immediate cannot be zero'}); +c_addi4spn(_Rd, Imm) when (Imm rem 4) =/= 0 -> + error({immediate_not_aligned, Imm, 4}); +c_addi4spn(_Rd, Imm) -> + error({immediate_out_of_range, Imm, 4, 1020}). + +%%----------------------------------------------------------------------------- +%% C Extension - Shift Instructions +%%----------------------------------------------------------------------------- + +%% C.SLLI - Compressed Shift Left Logical Immediate +%% rd = rd << shamt (rd is full 5-bit register, shamt is 6-bit unsigned) +%% Format: CI-type +%% Encoding: funct3=000, op=10 (0x2) +-spec c_slli(riscv_register(), 0..63) -> binary(). +c_slli(Rd, Shamt) when Shamt >= 0, Shamt =< 63, Rd =/= zero -> + encode_ci_type(16#2, Rd, Shamt, 16#0); +c_slli(zero, _Shamt) -> + error({invalid_compressed_instruction, c_slli, 'rd cannot be zero'}); +c_slli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRLI - Compressed Shift Right Logical Immediate +%% rd' = rd' >> shamt (rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=00, shamt[4:0]=bits 6:2, op=01 +-spec c_srli(riscv_register(), 0..63) -> binary(). +c_srli(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#0 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRAI - Compressed Shift Right Arithmetic Immediate +%% rd' = rd' >> shamt (sign-extend, rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=01, shamt[4:0]=bits 6:2, op=01 +-spec c_srai(riscv_register(), 0..63) -> binary(). +c_srai(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#1 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srai(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store Instructions +%%----------------------------------------------------------------------------- + +%% C.LW - Compressed Load Word +%% rd' = mem[rs1' + offset] (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CL-type +%% Encoding: funct3=010, op=00 (0x0) +-spec c_lw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_lw(Rd, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cl_type(16#0, Rd, Rs1, Offset, 16#2); +c_lw(_Rd, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lw(_Rd, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.SW - Compressed Store Word +%% mem[rs1' + offset] = rs2' (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CS-type +%% Encoding: funct3=110, op=00 (0x0) +-spec c_sw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_sw(Rs2, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cs_type(16#0, Rs1, Rs2, Offset, 16#6); +c_sw(_Rs2, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_sw(_Rs2, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.LWSP - Compressed Load Word from Stack Pointer +%% rd = mem[sp + offset] (rd is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CI-type (special encoding) +%% Encoding: funct3=010, op=10 (0x2) +-spec c_lwsp(riscv_register(), integer()) -> binary(). +c_lwsp(Rd, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0, Rd =/= zero +-> + % Offset encoding: offset[5|4:2|7:6] -> bits [12|6:4|3:2] + OffsetMasked = Offset band 16#FF, + Offset5 = (OffsetMasked bsr 5) band 1, + Offset4_2 = (OffsetMasked bsr 2) band 7, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5 bsl 5) bor (Offset4_2 bsl 2) bor Offset7_6, + encode_ci_type(16#2, Rd, ImmBits, 16#2); +c_lwsp(zero, _Offset) -> + error({invalid_compressed_instruction, c_lwsp, 'rd cannot be zero'}); +c_lwsp(_Rd, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lwsp(_Rd, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%% C.SWSP - Compressed Store Word to Stack Pointer +%% mem[sp + offset] = rs2 (rs2 is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CSS-type +%% Encoding: funct3=110, op=10 (0x2) +-spec c_swsp(riscv_register(), integer()) -> binary(). +c_swsp(Rs2, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0 +-> + % Offset encoding: offset[5:2|7:6] -> bits [12:9|8:7] + OffsetMasked = Offset band 16#FF, + Offset5_2 = (OffsetMasked bsr 2) band 15, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5_2 bsl 2) bor Offset7_6, + encode_css_type(16#2, Rs2, ImmBits, 16#6); +c_swsp(_Rs2, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_swsp(_Rs2, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump Instructions +%%----------------------------------------------------------------------------- + +%% C.BEQZ - Compressed Branch if Equal to Zero +%% if (rs1' == 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=110, op=01 (0x1) +-spec c_beqz(riscv_register(), integer()) -> binary(). +c_beqz(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#6); +c_beqz(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_beqz(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.BNEZ - Compressed Branch if Not Equal to Zero +%% if (rs1' != 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=111, op=01 (0x1) +-spec c_bnez(riscv_register(), integer()) -> binary(). +c_bnez(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#7); +c_bnez(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_bnez(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.J - Compressed Unconditional Jump +%% pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=101, op=01 (0x1) +-spec c_j(integer()) -> binary(). +c_j(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#5); +c_j(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_j(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JAL - Compressed Jump and Link (RV32C only, rd is implicitly ra) +%% ra = pc + 2; pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=001 (0x1), op=01 (0x1) +-spec c_jal(integer()) -> binary(). +c_jal(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#1); +c_jal(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_jal(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JR - Compressed Jump Register +%% pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), rs2=x0, op=10 (0x2) +-spec c_jr(riscv_register()) -> binary(). +c_jr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#8); +c_jr(zero) -> + error({invalid_compressed_instruction, c_jr, 'rs1 cannot be zero'}). + +%% C.JALR - Compressed Jump and Link Register +%% ra = pc + 2; pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs2=x0, op=10 (0x2) +-spec c_jalr(riscv_register()) -> binary(). +c_jalr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#9); +c_jalr(zero) -> + error({invalid_compressed_instruction, c_jalr, 'rs1 cannot be zero'}). + +%% C.EBREAK - Compressed Environment Breakpoint +%% Causes a breakpoint exception to be raised +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs1/rd=x0, rs2=x0, op=10 (0x2) +-spec c_ebreak() -> binary(). +c_ebreak() -> + encode_cr_type(16#2, zero, zero, 16#9). + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instructions +%%----------------------------------------------------------------------------- + +%% C.NOP - Compressed No Operation +%% Expands to: c.addi x0, 0 +%% Format: CI-type +%% Encoding: funct3=000, rd/rs1=x0, imm=0, op=01 (0x1) +-spec c_nop() -> binary(). +c_nop() -> + encode_ci_type(16#1, zero, 0, 16#0). diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl index 94e4942db5..28a0f4fa58 100644 --- a/tests/libs/jit/jit_riscv32_asm_tests.erl +++ b/tests/libs/jit/jit_riscv32_asm_tests.erl @@ -36,10 +36,10 @@ add_test_() -> <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00a585b3:32/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) + <<16#95aa:16/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) ), ?_assertAsmEqual( - <<16#01e787b3:32/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) + <<16#97fa:16/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) ) ]. @@ -49,7 +49,7 @@ sub_test_() -> <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1) ), ?_assertAsmEqual( - <<16#40a585b3:32/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) + <<16#8d89:16/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) ), ?_assertAsmEqual( <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5) @@ -62,7 +62,7 @@ and_test_() -> <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00c5f5b3:32/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) + <<16#8df1:16/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) ) ]. @@ -72,7 +72,7 @@ or_test_() -> <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00c5e5b3:32/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) + <<16#8dd1:16/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) ) ]. @@ -82,7 +82,7 @@ xor_test_() -> <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00c5c5b3:32/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) + <<16#8db1:16/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) ) ]. @@ -146,7 +146,7 @@ addi_test_() -> <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20) ), ?_assertAsmEqual( - <<16#fff58593:32/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) + <<16#15fd:16/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) ), ?_assertAsmEqual( <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047) @@ -162,7 +162,7 @@ andi_test_() -> <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255) ), ?_assertAsmEqual( - <<16#00f5f593:32/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) + <<16#89bd:16/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) ) ]. @@ -192,7 +192,7 @@ slli_test_() -> <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3) ), ?_assertAsmEqual( - <<16#01f59593:32/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) + <<16#05fe:16/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) ), ?_assertAsmEqual( <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0) @@ -205,7 +205,7 @@ srli_test_() -> <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3) ), ?_assertAsmEqual( - <<16#01f5d593:32/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) + <<16#81fd:16/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) ) ]. @@ -215,7 +215,7 @@ srai_test_() -> <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3) ), ?_assertAsmEqual( - <<16#41f5d593:32/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) + <<16#85fd:16/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) ) ]. @@ -245,9 +245,9 @@ sltiu_test_() -> lw_test_() -> [ - ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), - ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), - ?_assertAsmEqual(<<16#00452583:32/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), + ?_assertAsmEqual(<<16#414c:16/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), ?_assertAsmEqual( <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4) ), @@ -294,9 +294,9 @@ lbu_test_() -> sw_test_() -> [ - ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), - ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), - ?_assertAsmEqual(<<16#00b52223:32/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), + ?_assertAsmEqual(<<16#c14c:16/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4)) ]. @@ -327,7 +327,7 @@ beq_test_() -> <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4) ), ?_assertAsmEqual( - <<16#00050063:32/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) + <<16#c101:16/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) ) ]. @@ -388,13 +388,13 @@ bgeu_test_() -> jal_test_() -> [ ?_assertAsmEqual( - <<16#008000ef:32/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) + <<16#2021:16/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) ), ?_assertAsmEqual( - <<16#ffdff0ef:32/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) + <<16#3ff5:16/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) ), ?_assertAsmEqual( - <<16#00000517:32/little, 16#000500e7:32/little>>, + <<16#00000517:32/little, 16#9502:16/little>>, "auipc a0, 0\njalr a0", jit_riscv32_asm:call(a0, 0) ), @@ -407,8 +407,8 @@ jal_test_() -> jalr_test_() -> [ - ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), - ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4)) ]. @@ -418,9 +418,9 @@ jalr_test_() -> lui_test_() -> [ - ?_assertAsmEqual(<<16#000125b7:32/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), - ?_assertAsmEqual(<<16#00001537:32/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), - ?_assertAsmEqual(<<16#fffff5b7:32/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) + ?_assertAsmEqual(<<16#65c9:16/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), + ?_assertAsmEqual(<<16#6505:16/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), + ?_assertAsmEqual(<<16#75fd:16/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) ]. auipc_test_() -> @@ -435,13 +435,14 @@ auipc_test_() -> nop_test_() -> [ - ?_assertAsmEqual(<<16#00000013:32/little>>, "nop", jit_riscv32_asm:nop()) + % We want a 4-byte NOP for padding, so use .option norvc to force non-compressed + ?_assertAsmEqual(<<16#00000013:32/little>>, ".option norvc\nnop", jit_riscv32_asm:nop()) ]. li_small_test_() -> [ - ?_assertAsmEqual(<<16#00a00513:32/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), - ?_assertAsmEqual(<<16#fff00513:32/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), + ?_assertAsmEqual(<<16#4529:16/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), + ?_assertAsmEqual(<<16#557d:16/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047)) ]. @@ -449,19 +450,19 @@ li_large_test_() -> [ % 0x12345 = 74565 - requires lui + addi ?_assertAsmEqual( - <<16#00012537:32/little, 16#34550513:32/little>>, + <<16#6549:16/little, 16#34550513:32/little>>, "lui a0, 0x12\naddi a0, a0, 0x345", jit_riscv32_asm:li(a0, 16#12345) ), % 0x80000000 = -2147483648 (minimum 32-bit signed) ?_assertAsmEqual( - <<16#800005b7:32/little, 16#00058593:32/little>>, - "lui a1, 0x80000\naddi a1, a1, 0", + <<16#800005b7:32/little, 16#0581:16/little>>, + "lui a1, 0x80000\nc.addi a1, 0", jit_riscv32_asm:li(a1, -16#80000000) ), % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed) ?_assertAsmEqual( - <<16#80000537:32/little, 16#fff50513:32/little>>, + <<16#80000537:32/little, 16#157d:16/little>>, "lui a0, 0x80000\naddi a0, a0, -1", jit_riscv32_asm:li(a0, 16#7FFFFFFF) ) @@ -469,8 +470,8 @@ li_large_test_() -> mv_test_() -> [ - ?_assertAsmEqual(<<16#00050513:32/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), - ?_assertAsmEqual(<<16#00058593:32/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) + ?_assertAsmEqual(<<16#852a:16/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), + ?_assertAsmEqual(<<16#85ae:16/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) ]. not_test_() -> @@ -488,22 +489,22 @@ neg_test_() -> j_test_() -> [ ?_assertAsmEqual( - <<16#0080006f:32/little>>, "j .+8", jit_riscv32_asm:j(8) + <<16#a021:16/little>>, "j .+8", jit_riscv32_asm:j(8) ), ?_assertAsmEqual( - <<16#ffdff06f:32/little>>, "j .-4", jit_riscv32_asm:j(-4) + <<16#bff5:16/little>>, "j .-4", jit_riscv32_asm:j(-4) ) ]. jr_test_() -> [ - ?_assertAsmEqual(<<16#00050067:32/little>>, "jr a0", jit_riscv32_asm:jr(a0)), - ?_assertAsmEqual(<<16#00028067:32/little>>, "jr t0", jit_riscv32_asm:jr(t0)) + ?_assertAsmEqual(<<16#8502:16/little>>, "jr a0", jit_riscv32_asm:jr(a0)), + ?_assertAsmEqual(<<16#8282:16/little>>, "jr t0", jit_riscv32_asm:jr(t0)) ]. ret_test_() -> [ - ?_assertAsmEqual(<<16#00008067:32/little>>, "ret", jit_riscv32_asm:ret()) + ?_assertAsmEqual(<<16#8082:16/little>>, "ret", jit_riscv32_asm:ret()) ]. %%----------------------------------------------------------------------------- @@ -530,24 +531,370 @@ mul_test_() -> %% System instruction tests %%----------------------------------------------------------------------------- -ebreak_test_() -> +c_ebreak_test_() -> [ ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:ebreak() + <<16#9002:16/little>>, "c.ebreak", jit_riscv32_asm:c_ebreak() ) ]. -bkpt_test_() -> +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical instruction tests +%%----------------------------------------------------------------------------- + +c_add_test_() -> + [ + ?_assertAsmEqual( + <<16#9532:16/little>>, "c.add a0, a2", jit_riscv32_asm:c_add(a0, a2) + ), + ?_assertAsmEqual( + <<16#95be:16/little>>, "c.add a1, a5", jit_riscv32_asm:c_add(a1, a5) + ), + ?_assertAsmEqual( + <<16#9522:16/little>>, "c.add a0, s0", jit_riscv32_asm:c_add(a0, s0) + ) + ]. + +c_mv_test_() -> + [ + ?_assertAsmEqual( + <<16#8532:16/little>>, "c.mv a0, a2", jit_riscv32_asm:c_mv(a0, a2) + ), + ?_assertAsmEqual( + <<16#85be:16/little>>, "c.mv a1, a5", jit_riscv32_asm:c_mv(a1, a5) + ), + ?_assertAsmEqual( + <<16#842a:16/little>>, "c.mv s0, a0", jit_riscv32_asm:c_mv(s0, a0) + ) + ]. + +c_sub_test_() -> + [ + ?_assertAsmEqual( + <<16#8d09:16/little>>, "c.sub a0, a0", jit_riscv32_asm:c_sub(a0, a0) + ), + ?_assertAsmEqual( + <<16#8d8d:16/little>>, "c.sub a1, a1", jit_riscv32_asm:c_sub(a1, a1) + ), + ?_assertAsmEqual( + <<16#8c0d:16/little>>, "c.sub s0, a1", jit_riscv32_asm:c_sub(s0, a1) + ) + ]. + +c_and_test_() -> + [ + ?_assertAsmEqual( + <<16#8d6d:16/little>>, "c.and a0, a1", jit_riscv32_asm:c_and(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fed:16/little>>, "c.and a5, a1", jit_riscv32_asm:c_and(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c6d:16/little>>, "c.and s0, a1", jit_riscv32_asm:c_and(s0, a1) + ) + ]. + +c_or_test_() -> + [ + ?_assertAsmEqual( + <<16#8d4d:16/little>>, "c.or a0, a1", jit_riscv32_asm:c_or(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fcd:16/little>>, "c.or a5, a1", jit_riscv32_asm:c_or(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c4d:16/little>>, "c.or s0, a1", jit_riscv32_asm:c_or(s0, a1) + ) + ]. + +c_xor_test_() -> + [ + ?_assertAsmEqual( + <<16#8d2d:16/little>>, "c.xor a0, a1", jit_riscv32_asm:c_xor(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fad:16/little>>, "c.xor a5, a1", jit_riscv32_asm:c_xor(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c2d:16/little>>, "c.xor s0, a1", jit_riscv32_asm:c_xor(s0, a1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate instruction tests +%%----------------------------------------------------------------------------- + +c_addi_test_() -> + [ + ?_assertAsmEqual( + <<16#0511:16/little>>, "c.addi a0, 4", jit_riscv32_asm:c_addi(a0, 4) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "c.addi a1, -1", jit_riscv32_asm:c_addi(a1, -1) + ), + ?_assertAsmEqual( + <<16#0541:16/little>>, "c.addi a0, 16", jit_riscv32_asm:c_addi(a0, 16) + ), + ?_assertAsmEqual( + <<16#1561:16/little>>, "c.addi a0, -8", jit_riscv32_asm:c_addi(a0, -8) + ) + ]. + +c_andi_test_() -> + [ + ?_assertAsmEqual( + <<16#8929:16/little>>, "c.andi a0, 10", jit_riscv32_asm:c_andi(a0, 10) + ), + ?_assertAsmEqual( + <<16#99fd:16/little>>, "c.andi a1, -1", jit_riscv32_asm:c_andi(a1, -1) + ), + ?_assertAsmEqual( + <<16#8941:16/little>>, "c.andi a0, 16", jit_riscv32_asm:c_andi(a0, 16) + ) + ]. + +c_li_test_() -> + [ + ?_assertAsmEqual( + <<16#4529:16/little>>, "c.li a0, 10", jit_riscv32_asm:c_li(a0, 10) + ), + ?_assertAsmEqual( + <<16#55fd:16/little>>, "c.li a1, -1", jit_riscv32_asm:c_li(a1, -1) + ), + ?_assertAsmEqual( + <<16#4505:16/little>>, "c.li a0, 1", jit_riscv32_asm:c_li(a0, 1) + ), + ?_assertAsmEqual( + <<16#5501:16/little>>, "c.li a0, -32", jit_riscv32_asm:c_li(a0, -32) + ) + ]. + +c_lui_test_() -> + [ + ?_assertAsmEqual( + <<16#6529:16/little>>, "c.lui a0, 10", jit_riscv32_asm:c_lui(a0, 10) + ), + ?_assertAsmEqual( + <<16#75fd:16/little>>, "c.lui a1, 0xfffff", jit_riscv32_asm:c_lui(a1, -1) + ), + ?_assertAsmEqual( + <<16#6505:16/little>>, "c.lui a0, 1", jit_riscv32_asm:c_lui(a0, 1) + ) + ]. + +c_addi16sp_test_() -> + [ + ?_assertAsmEqual( + <<16#6141:16/little>>, "c.addi16sp sp, 16", jit_riscv32_asm:c_addi16sp(16) + ), + ?_assertAsmEqual( + <<16#7101:16/little>>, "c.addi16sp sp, -512", jit_riscv32_asm:c_addi16sp(-512) + ), + ?_assertAsmEqual( + <<16#6161:16/little>>, "c.addi16sp sp, 80", jit_riscv32_asm:c_addi16sp(80) + ) + ]. + +c_addi4spn_test_() -> + [ + ?_assertAsmEqual( + <<16#0048:16/little>>, "c.addi4spn a0, sp, 4", jit_riscv32_asm:c_addi4spn(a0, 4) + ), + ?_assertAsmEqual( + <<16#1010:16/little>>, "c.addi4spn a2, sp, 32", jit_riscv32_asm:c_addi4spn(a2, 32) + ), + ?_assertAsmEqual( + <<16#1ffc:16/little>>, + "c.addi4spn a5, sp, 1020", + jit_riscv32_asm:c_addi4spn(a5, 1020) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Shift instruction tests +%%----------------------------------------------------------------------------- + +c_slli_test_() -> [ - % bkpt is an ARM compatibility wrapper that generates ebreak - % The immediate parameter is ignored ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(0) + <<16#050e:16/little>>, "c.slli a0, 3", jit_riscv32_asm:c_slli(a0, 3) ), ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(42) + <<16#05fe:16/little>>, "c.slli a1, 31", jit_riscv32_asm:c_slli(a1, 31) ), ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(255) + <<16#0542:16/little>>, "c.slli a0, 16", jit_riscv32_asm:c_slli(a0, 16) + ) + ]. + +c_srli_test_() -> + [ + ?_assertAsmEqual( + <<16#810d:16/little>>, "c.srli a0, 3", jit_riscv32_asm:c_srli(a0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "c.srli a1, 31", jit_riscv32_asm:c_srli(a1, 31) + ), + ?_assertAsmEqual( + <<16#8141:16/little>>, "c.srli a0, 16", jit_riscv32_asm:c_srli(a0, 16) + ) + ]. + +c_srai_test_() -> + [ + ?_assertAsmEqual( + <<16#850d:16/little>>, "c.srai a0, 3", jit_riscv32_asm:c_srai(a0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "c.srai a1, 31", jit_riscv32_asm:c_srai(a1, 31) + ), + ?_assertAsmEqual( + <<16#8541:16/little>>, "c.srai a0, 16", jit_riscv32_asm:c_srai(a0, 16) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store instruction tests +%%----------------------------------------------------------------------------- + +c_lw_test_() -> + [ + ?_assertAsmEqual( + <<16#4188:16/little>>, "c.lw a0, 0(a1)", jit_riscv32_asm:c_lw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#41d8:16/little>>, "c.lw a4, 4(a1)", jit_riscv32_asm:c_lw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#5ffc:16/little>>, "c.lw a5, 124(a5)", jit_riscv32_asm:c_lw(a5, {a5, 124}) + ) + ]. + +c_sw_test_() -> + [ + ?_assertAsmEqual( + <<16#c188:16/little>>, "c.sw a0, 0(a1)", jit_riscv32_asm:c_sw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#c1d8:16/little>>, "c.sw a4, 4(a1)", jit_riscv32_asm:c_sw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#dffc:16/little>>, "c.sw a5, 124(a5)", jit_riscv32_asm:c_sw(a5, {a5, 124}) + ) + ]. + +c_lwsp_test_() -> + [ + ?_assertAsmEqual( + <<16#4502:16/little>>, "c.lwsp a0, 0(sp)", jit_riscv32_asm:c_lwsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#4512:16/little>>, "c.lwsp a0, 4(sp)", jit_riscv32_asm:c_lwsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#50fe:16/little>>, "c.lwsp ra, 252(sp)", jit_riscv32_asm:c_lwsp(ra, 252) + ) + ]. + +c_swsp_test_() -> + [ + ?_assertAsmEqual( + <<16#c02a:16/little>>, "c.swsp a0, 0(sp)", jit_riscv32_asm:c_swsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#c22a:16/little>>, "c.swsp a0, 4(sp)", jit_riscv32_asm:c_swsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#dfe6:16/little>>, "c.swsp s9, 252(sp)", jit_riscv32_asm:c_swsp(s9, 252) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump instruction tests +%%----------------------------------------------------------------------------- + +c_beqz_test_() -> + [ + ?_assertAsmEqual( + <<16#c111:16/little>>, "c.beqz a0, .+4", jit_riscv32_asm:c_beqz(a0, 4) + ), + ?_assertAsmEqual( + <<16#dced:16/little>>, "c.beqz s1, .-6", jit_riscv32_asm:c_beqz(s1, -6) + ), + ?_assertAsmEqual( + <<16#c101:16/little>>, "c.beqz a0, .", jit_riscv32_asm:c_beqz(a0, 0) + ) + ]. + +c_bnez_test_() -> + [ + ?_assertAsmEqual( + <<16#e111:16/little>>, "c.bnez a0, .+4", jit_riscv32_asm:c_bnez(a0, 4) + ), + ?_assertAsmEqual( + <<16#fced:16/little>>, "c.bnez s1, .-6", jit_riscv32_asm:c_bnez(s1, -6) + ), + ?_assertAsmEqual( + <<16#e101:16/little>>, "c.bnez a0, .", jit_riscv32_asm:c_bnez(a0, 0) + ) + ]. + +c_j_test_() -> + [ + ?_assertAsmEqual( + <<16#a011:16/little>>, "c.j .+4", jit_riscv32_asm:c_j(4) + ), + ?_assertAsmEqual( + <<16#bfed:16/little>>, "c.j .-6", jit_riscv32_asm:c_j(-6) + ), + ?_assertAsmEqual( + <<16#a001:16/little>>, "c.j .", jit_riscv32_asm:c_j(0) + ) + ]. + +c_jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "c.jal .+8", jit_riscv32_asm:c_jal(8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "c.jal .-4", jit_riscv32_asm:c_jal(-4) + ), + ?_assertAsmEqual( + <<16#2001:16/little>>, "c.jal .", jit_riscv32_asm:c_jal(0) + ) + ]. + +c_jr_test_() -> + [ + ?_assertAsmEqual( + <<16#8502:16/little>>, "c.jr a0", jit_riscv32_asm:c_jr(a0) + ), + ?_assertAsmEqual( + <<16#8402:16/little>>, "c.jr s0", jit_riscv32_asm:c_jr(s0) + ), + ?_assertAsmEqual( + <<16#8082:16/little>>, "c.jr ra", jit_riscv32_asm:c_jr(ra) + ) + ]. + +c_jalr_test_() -> + [ + ?_assertAsmEqual( + <<16#9502:16/little>>, "c.jalr a0", jit_riscv32_asm:c_jalr(a0) + ), + ?_assertAsmEqual( + <<16#9402:16/little>>, "c.jalr s0", jit_riscv32_asm:c_jalr(s0) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +c_nop_test_() -> + [ + ?_assertAsmEqual( + <<16#0001:16/little>>, "c.nop", jit_riscv32_asm:c_nop() ) ]. diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 475e96bd5d..f398cb3f49 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -41,19 +41,19 @@ call_primitive_0_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00062f83 lw t6,0(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16" + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -64,19 +64,19 @@ call_primitive_1_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00462f83 lw t6,4(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16" + " 0: 00462f83 lw t6,4(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -87,22 +87,22 @@ call_primitive_2_args_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00862f83 lw t6,8(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 02a00593 li a1,42\n" - " 1c: 02b00613 li a2,43\n" - " 20: 02c00693 li a3,44\n" - " 24: 000f80e7 jalr t6\n" - " 28: 00050f93 mv t6,a0\n" - " 2c: 00012083 lw ra,0(sp)\n" - " 30: 00412503 lw a0,4(sp)\n" - " 34: 00812583 lw a1,8(sp)\n" - " 38: 00c12603 lw a2,12(sp)\n" - " 3c: 01010113 addi sp,sp,16" + " 0: 00862f83 lw t6,8(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 02b00613 li a2,43\n" + " 16: 02c00693 li a3,44\n" + " 1a: 9f82 jalr t6\n" + " 1c: 8faa mv t6,a0\n" + " 1e: 4082 lw ra,0(sp)\n" + " 20: 4512 lw a0,4(sp)\n" + " 22: 45a2 lw a1,8(sp)\n" + " 24: 4632 lw a2,12(sp)\n" + " 26: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -112,11 +112,11 @@ call_primitive_5_args_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01462f83 lw t6,20(a2)\n" - " 4: 01000613 li a2,16\n" - " 8: 02000693 li a3,32\n" - " c: 00200713 li a4,2\n" - " 10: 000f8067 jr t6" + " 0: 01462f83 lw t6,20(a2)\n" + " 4: 4641 li a2,16\n" + " 6: 02000693 li a3,32\n" + " a: 4709 li a4,2\n" + " c: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -134,30 +134,30 @@ call_primitive_6_args_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 00300f13 li t5,3\n" - " 8: ffff4f13 not t5,t5\n" - " c: 01efffb3 and t6,t6,t5\n" - " 10: 01c52f03 lw t5,28(a0)\n" - " 14: 0b800e93 li t4,184\n" - " 18: 00ce8eb3 add t4,t4,a2\n" - " 1c: 000eae83 lw t4,0(t4)\n" - " 20: ff010113 addi sp,sp,-16\n" - " 24: 00112023 sw ra,0(sp)\n" - " 28: 00a12223 sw a0,4(sp)\n" - " 2c: 00b12423 sw a1,8(sp)\n" - " 30: 00c12623 sw a2,12(sp)\n" - " 34: 000f8613 mv a2,t6\n" - " 38: 04000693 li a3,64\n" - " 3c: 00800713 li a4,8\n" - " 40: 000f0793 mv a5,t5\n" - " 44: 000e80e7 jalr t4\n" - " 48: 00050e93 mv t4,a0\n" - " 4c: 00012083 lw ra,0(sp)\n" - " 50: 00412503 lw a0,4(sp)\n" - " 54: 00812583 lw a1,8(sp)\n" - " 58: 00c12603 lw a2,12(sp)\n" - " 5c: 01010113 addi sp,sp,16" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 4f0d li t5,3\n" + " 6: ffff4f13 not t5,t5\n" + " a: 01efffb3 and t6,t6,t5\n" + " e: 01c52f03 lw t5,28(a0)\n" + " 12: 0b800e93 li t4,184\n" + " 16: 9eb2 add t4,t4,a2\n" + " 18: 000eae83 lw t4,0(t4)\n" + " 1c: 1141 addi sp,sp,-16\n" + " 1e: c006 sw ra,0(sp)\n" + " 20: c22a sw a0,4(sp)\n" + " 22: c42e sw a1,8(sp)\n" + " 24: c632 sw a2,12(sp)\n" + " 26: 867e mv a2,t6\n" + " 28: 04000693 li a3,64\n" + " 2c: 4721 li a4,8\n" + " 2e: 87fa mv a5,t5\n" + " 30: 9e82 jalr t4\n" + " 32: 8eaa mv t4,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -174,72 +174,72 @@ call_primitive_extended_regs_test() -> ?BACKEND:assert_all_native_free(State6), Stream = ?BACKEND:stream(State6), Dump = << - " 0: 04862f83 lw t6,72(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 01300593 li a1,19\n" - " 1c: 000f80e7 jalr t6\n" - " 20: 00050f93 mv t6,a0\n" - " 24: 00012083 lw ra,0(sp)\n" - " 28: 00412503 lw a0,4(sp)\n" - " 2c: 00812583 lw a1,8(sp)\n" - " 30: 00c12603 lw a2,12(sp)\n" - " 34: 01010113 addi sp,sp,16\n" - " 38: 04862f03 lw t5,72(a2)\n" - " 3c: fe010113 addi sp,sp,-32\n" - " 40: 00112023 sw ra,0(sp)\n" - " 44: 00a12223 sw a0,4(sp)\n" - " 48: 00b12423 sw a1,8(sp)\n" - " 4c: 00c12623 sw a2,12(sp)\n" - " 50: 01f12823 sw t6,16(sp)\n" - " 54: 01400593 li a1,20\n" - " 58: 000f00e7 jalr t5\n" - " 5c: 00050f13 mv t5,a0\n" - " 60: 00012083 lw ra,0(sp)\n" - " 64: 00412503 lw a0,4(sp)\n" - " 68: 00812583 lw a1,8(sp)\n" - " 6c: 00c12603 lw a2,12(sp)\n" - " 70: 01012f83 lw t6,16(sp)\n" - " 74: 02010113 addi sp,sp,32\n" - " 78: 04862e83 lw t4,72(a2)\n" - " 7c: fe010113 addi sp,sp,-32\n" - " 80: 00112023 sw ra,0(sp)\n" - " 84: 00a12223 sw a0,4(sp)\n" - " 88: 00b12423 sw a1,8(sp)\n" - " 8c: 00c12623 sw a2,12(sp)\n" - " 90: 01e12823 sw t5,16(sp)\n" - " 94: 01f12a23 sw t6,20(sp)\n" - " 98: 01300593 li a1,19\n" - " 9c: 000e80e7 jalr t4\n" - " a0: 00050e93 mv t4,a0\n" - " a4: 00012083 lw ra,0(sp)\n" - " a8: 00412503 lw a0,4(sp)\n" - " ac: 00812583 lw a1,8(sp)\n" - " b0: 00c12603 lw a2,12(sp)\n" - " b4: 01012f03 lw t5,16(sp)\n" - " b8: 01412f83 lw t6,20(sp)\n" - " bc: 02010113 addi sp,sp,32\n" - " c0: 03462e03 lw t3,52(a2)\n" - " c4: fe010113 addi sp,sp,-32\n" - " c8: 00112023 sw ra,0(sp)\n" - " cc: 00a12223 sw a0,4(sp)\n" - " d0: 00b12423 sw a1,8(sp)\n" - " d4: 00c12623 sw a2,12(sp)\n" - " d8: 01d12823 sw t4,16(sp)\n" - " dc: 000fa583 lw a1,0(t6)\n" - " e0: 000f2603 lw a2,0(t5)\n" - " e4: 000e00e7 jalr t3\n" - " e8: 00050e13 mv t3,a0\n" - " ec: 00012083 lw ra,0(sp)\n" - " f0: 00412503 lw a0,4(sp)\n" - " f4: 00812583 lw a1,8(sp)\n" - " f8: 00c12603 lw a2,12(sp)\n" - " fc: 01012e83 lw t4,16(sp)\n" - " 100: 02010113 addi sp,sp,32\n" - " 104: 01cea023 sw t3,0(t4)" + " 0: 04862f83 lw t6,72(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 45cd li a1,19\n" + " 10: 9f82 jalr t6\n" + " 12: 8faa mv t6,a0\n" + " 14: 4082 lw ra,0(sp)\n" + " 16: 4512 lw a0,4(sp)\n" + " 18: 45a2 lw a1,8(sp)\n" + " 1a: 4632 lw a2,12(sp)\n" + " 1c: 0141 addi sp,sp,16\n" + " 1e: 04862f03 lw t5,72(a2)\n" + " 22: 1101 addi sp,sp,-32\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: c87e sw t6,16(sp)\n" + " 2e: 45d1 li a1,20\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 02010113 addi sp,sp,32\n" + " 42: 04862e83 lw t4,72(a2)\n" + " 46: 1101 addi sp,sp,-32\n" + " 48: c006 sw ra,0(sp)\n" + " 4a: c22a sw a0,4(sp)\n" + " 4c: c42e sw a1,8(sp)\n" + " 4e: c632 sw a2,12(sp)\n" + " 50: c87a sw t5,16(sp)\n" + " 52: ca7e sw t6,20(sp)\n" + " 54: 45cd li a1,19\n" + " 56: 9e82 jalr t4\n" + " 58: 8eaa mv t4,a0\n" + " 5a: 4082 lw ra,0(sp)\n" + " 5c: 4512 lw a0,4(sp)\n" + " 5e: 45a2 lw a1,8(sp)\n" + " 60: 4632 lw a2,12(sp)\n" + " 62: 4f42 lw t5,16(sp)\n" + " 64: 4fd2 lw t6,20(sp)\n" + " 66: 02010113 addi sp,sp,32\n" + " 6a: 03462e03 lw t3,52(a2)\n" + " 6e: 1101 addi sp,sp,-32\n" + " 70: c006 sw ra,0(sp)\n" + " 72: c22a sw a0,4(sp)\n" + " 74: c42e sw a1,8(sp)\n" + " 76: c632 sw a2,12(sp)\n" + " 78: c876 sw t4,16(sp)\n" + " 7a: 000fa583 lw a1,0(t6)\n" + " 7e: 000f2603 lw a2,0(t5)\n" + " 82: 9e02 jalr t3\n" + " 84: 8e2a mv t3,a0\n" + " 86: 4082 lw ra,0(sp)\n" + " 88: 4512 lw a0,4(sp)\n" + " 8a: 45a2 lw a1,8(sp)\n" + " 8c: 4632 lw a2,12(sp)\n" + " 8e: 4ec2 lw t4,16(sp)\n" + " 90: 02010113 addi sp,sp,32\n" + " 94: 01cea023 sw t3,0(t4)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -257,37 +257,37 @@ call_primitive_few_free_regs_test() -> ?BACKEND:assert_all_native_free(State7), Stream = ?BACKEND:stream(State7), Dump = << - " 0: 00100f93 li t6,1\n" - " 4: 00200f13 li t5,2\n" - " 8: 00300e93 li t4,3\n" - " c: 00400e13 li t3,4\n" - " 10: 00500393 li t2,5\n" - " 14: 0e400313 li t1,228\n" - " 18: 00c30333 add t1,t1,a2\n" - " 1c: 00032303 lw t1,0(t1)\n" - " 20: fe010113 addi sp,sp,-32\n" - " 24: 00112023 sw ra,0(sp)\n" - " 28: 00a12223 sw a0,4(sp)\n" - " 2c: 00b12423 sw a1,8(sp)\n" - " 30: 00c12623 sw a2,12(sp)\n" - " 34: 01d12823 sw t4,16(sp)\n" - " 38: 01e12a23 sw t5,20(sp)\n" - " 3c: 01f12c23 sw t6,24(sp)\n" - " 40: 000f0513 mv a0,t5\n" - " 44: 000f8593 mv a1,t6\n" - " 48: 000e0613 mv a2,t3\n" - " 4c: 000e8693 mv a3,t4\n" - " 50: 00038713 mv a4,t2\n" - " 54: 000300e7 jalr t1\n" - " 58: 00050313 mv t1,a0\n" - " 5c: 00012083 lw ra,0(sp)\n" - " 60: 00412503 lw a0,4(sp)\n" - " 64: 00812583 lw a1,8(sp)\n" - " 68: 00c12603 lw a2,12(sp)\n" - " 6c: 01012e83 lw t4,16(sp)\n" - " 70: 01412f03 lw t5,20(sp)\n" - " 74: 01812f83 lw t6,24(sp)\n" - " 78: 02010113 addi sp,sp,32" + " 0: 4f85 li t6,1\n" + " 2: 4f09 li t5,2\n" + " 4: 4e8d li t4,3\n" + " 6: 4e11 li t3,4\n" + " 8: 4395 li t2,5\n" + " a: 0e400313 li t1,228\n" + " e: 9332 add t1,t1,a2\n" + " 10: 00032303 lw t1,0(t1)\n" + " 14: 1101 addi sp,sp,-32\n" + " 16: c006 sw ra,0(sp)\n" + " 18: c22a sw a0,4(sp)\n" + " 1a: c42e sw a1,8(sp)\n" + " 1c: c632 sw a2,12(sp)\n" + " 1e: c876 sw t4,16(sp)\n" + " 20: ca7a sw t5,20(sp)\n" + " 22: cc7e sw t6,24(sp)\n" + " 24: 857a mv a0,t5\n" + " 26: 85fe mv a1,t6\n" + " 28: 8672 mv a2,t3\n" + " 2a: 86f6 mv a3,t4\n" + " 2c: 871e mv a4,t2\n" + " 2e: 9302 jalr t1\n" + " 30: 832a mv t1,a0\n" + " 32: 4082 lw ra,0(sp)\n" + " 34: 4512 lw a0,4(sp)\n" + " 36: 45a2 lw a1,8(sp)\n" + " 38: 4632 lw a2,12(sp)\n" + " 3a: 4ec2 lw t4,16(sp)\n" + " 3c: 4f52 lw t5,20(sp)\n" + " 3e: 4fe2 lw t6,24(sp)\n" + " 40: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -297,21 +297,22 @@ call_ext_only_test() -> State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 01062f83 lw t6,16(a2)\n" - " 28: 02800613 li a2,40\n" - " 2c: 00200693 li a3,2\n" - " 30: 00200713 li a4,2\n" - " 34: fff00793 li a5,-1\n" - " 38: 000f8067 jr t6" + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 57fd li a5,-1\n" + " 2e: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -323,12 +324,12 @@ call_primitive_last_5_args_test() -> ]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04c62f03 lw t5,76(a2)\n" - " 8: 00800613 li a2,8\n" - " c: 2cb00693 li a3,715\n" - " 10: 000f8713 mv a4,t6\n" - " 14: 000f0067 jr t5" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04c62f03 lw t5,76(a2)\n" + " 8: 4621 li a2,8\n" + " a: 2cb00693 li a3,715\n" + " e: 877e mv a4,t6\n" + " 10: 8f02 jr t5" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -338,21 +339,22 @@ call_ext_last_test() -> State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 01062f83 lw t6,16(a2)\n" - " 28: 02800613 li a2,40\n" - " 2c: 00200693 li a3,2\n" - " 30: 00200713 li a4,2\n" - " 34: 00a00793 li a5,10\n" - " 38: 000f8067 jr t6" + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 47a9 li a5,10\n" + " 2e: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -362,9 +364,9 @@ call_primitive_last_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00062f83 lw t6,0(a2)\n" - " 4: 02a00613 li a2,42\n" - " 8: 000f8067 jr t6" + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 02a00613 li a2,42\n" + " 8: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -386,22 +388,22 @@ return_if_not_equal_to_ctx_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 05462f83 lw t6,84(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16\n" - " 34: 00af8663 beq t6,a0,0x40\n" - " 38: 000f8513 mv a0,t6\n" - " 3c: 00008067 ret" + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 00af8463 beq t6,a0,0x24\n" + " 20: 857e mv a0,t6\n" + " 22: 8082 ret" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -418,23 +420,23 @@ return_if_not_equal_to_ctx_test_() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 05462f83 lw t6,84(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16\n" - " 34: 000f8f13 mv t5,t6\n" - " 38: 00af0663 beq t5,a0,0x44\n" - " 3c: 000f0513 mv a0,t5\n" - " 40: 00008067 ret" + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 8f7e mv t5,t6\n" + " 1e: 00af0463 beq t5,a0,0x26\n" + " 22: 857a mv a0,t5\n" + " 24: 8082 ret" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -447,9 +449,9 @@ move_to_cp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 000f2f83 lw t6,0(t5)\n" - " 8: 05f52e23 sw t6,92(a0)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 05f52e23 sw t6,92(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -459,9 +461,9 @@ increment_sp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f83 lw t6,20(a0)\n" - " 4: 01cf8f93 addi t6,t6,28\n" - " 8: 01f52a23 sw t6,20(a0)" + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 0ff1 addi t6,t6,28\n" + " 6: 01f52a23 sw t6,20(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -485,10 +487,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000fd463 bgez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000fd363 bgez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -503,10 +505,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01efd463 bge t6,t5,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -521,11 +523,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01dfd463 bge t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -541,12 +543,12 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 40000e93 li t4,1024\n" - " c: 01dfd463 bge t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2\n" - " 14: 0ec0006f j 0x100" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2\n" + " 12: a0fd j 0x100" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -561,10 +563,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -579,10 +581,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -597,11 +599,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: fff00e93 li t4,-1\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 5efd li t4,-1\n" + " a: 01df9363 bne t6,t4,0x10\n" + " e: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -616,10 +618,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -634,10 +636,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -652,11 +654,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -671,11 +673,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -690,11 +692,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -711,12 +713,12 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 7cb00e93 li t4,1995\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 001f0f13 addi t5,t5,1\n" - " 14: 0ec0006f j 0x100" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 7cb00e93 li t4,1995\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f05 addi t5,t5,1\n" + " 12: a0fd j 0x100" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -730,11 +732,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -749,11 +751,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -768,11 +770,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -787,11 +789,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -806,11 +808,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -825,11 +827,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ec463 bltz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -844,11 +846,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ec463 bltz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -863,11 +865,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ed463 bgez t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -882,11 +884,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ed463 bgez t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -901,11 +903,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 007ffe93 andi t4,t6,7\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -920,11 +922,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 005ffe93 andi t4,t6,5\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 005ffe93 andi t4,t6,5\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -939,11 +941,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 007ffe93 andi t4,t6,7\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -958,12 +960,12 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: ffffce93 not t4,t6\n" - " c: 01ce9e93 slli t4,t4,0x1c\n" - " 10: 000e8463 beqz t4,0x18\n" - " 14: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffce93 not t4,t6\n" + " c: 0ef2 slli t4,t4,0x1c\n" + " e: 000e8363 beqz t4,0x14\n" + " 12: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -978,12 +980,12 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: ffffcf93 not t6,t6\n" - " c: 01cf9f93 slli t6,t6,0x1c\n" - " 10: 000f8463 beqz t6,0x18\n" - " 14: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffcf93 not t6,t6\n" + " c: 0ff2 slli t6,t6,0x1c\n" + " e: 000f8363 beqz t6,0x14\n" + " 12: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -998,14 +1000,14 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f8e93 mv t4,t6\n" - " c: 03f00e13 li t3,63\n" - " 10: 01cefeb3 and t4,t4,t3\n" - " 14: 00800e13 li t3,8\n" - " 18: 01ce8463 beq t4,t3,0x20\n" - " 1c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 8efe mv t4,t6\n" + " a: 03f00e13 li t3,63\n" + " e: 01cefeb3 and t4,t4,t3\n" + " 12: 4e21 li t3,8\n" + " 14: 01ce8363 beq t4,t3,0x1a\n" + " 18: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -1020,10 +1022,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01efd463 bge t6,t5,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -1044,13 +1046,13 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03f00e93 li t4,63\n" - " c: 01dfffb3 and t6,t6,t4\n" - " 10: 00800e93 li t4,8\n" - " 14: 01df8463 beq t6,t4,0x1c\n" - " 18: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03f00e93 li t4,63\n" + " c: 01dfffb3 and t6,t6,t4\n" + " 10: 4ea1 li t4,8\n" + " 12: 01df8363 beq t6,t4,0x18\n" + " 16: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -1066,11 +1068,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 003ffe93 andi t4,t6,3\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 003ffe93 andi t4,t6,3\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -1095,13 +1097,13 @@ if_else_block_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df9663 bne t6,t4,0x18\n" - " 10: 002f0f13 addi t5,t5,2\n" - " 14: 0080006f j 0x1c\n" - " 18: 004f0f13 addi t5,t5,4" + "0: 01852f83 lw t6,24(a0)\n" + "4: 01c52f03 lw t5,28(a0)\n" + "8: 03b00e93 li t4,59\n" + "c: 01df9463 bne t6,t4,0x14\n" + "10: 0f09 addi t5,t5,2\n" + "12: a011 j 0x16\n" + "14: 0f11 addi t5,t5,4" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1114,8 +1116,8 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 003fdf93 srli t6,t6,0x3" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf93 srli t6,t6,0x3" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -1127,8 +1129,8 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 003fdf13 srli t5,t6,0x3" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf13 srli t5,t6,0x3" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -1141,8 +1143,8 @@ shift_left_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 003f9f93 slli t6,t6,0x3" + "0: 01852f83 lw t6,24(a0)\n" + "4: 0f8e slli t6,t6,0x3" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1160,27 +1162,29 @@ call_only_or_schedule_next_and_label_relocation_test() -> Stream = ?BACKEND:stream(State8), Dump = << - " 0: 00000697 auipc a3,0x0\n" - " 4: 04c68067 jr 76(a3) # 0x4c\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 01068067 jr 16(a3) # 0x18\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 03468067 jr 52(a3) # 0x44\n" - " 18: 0085af83 lw t6,8(a1)\n" - " 1c: ffff8f93 addi t6,t6,-1\n" - " 20: 01f5a423 sw t6,8(a1)\n" - " 24: 000f8663 beqz t6,0x30\n" - " 28: 01c0006f j 0x44\n" - " 2c: 00000013 nop\n" - " 30: 00000f97 auipc t6,0x0\n" - " 34: 014f8f93 addi t6,t6,20 # 0x44\n" - " 38: 01f5a223 sw t6,4(a1)\n" - " 3c: 00862f83 lw t6,8(a2)\n" - " 40: 000f8067 jr t6\n" - " 44: 00062f83 lw t6,0(a2)\n" - " 48: 000f8067 jr t6\n" - " 4c: 00462f83 lw t6,4(a2)\n" - " 50: 000f8067 jr t6" + " 0: 00000697 auipc a3,0x0\n" + " 4: 04668067 jr 70(a3) # 0x46\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 03068067 jr 48(a3) # 0x40\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f8663 beqz t6,0x2e\n" + " 26: a829 j 0x40\n" + " 28: 0001 nop\n" + " 2a: 00000013 nop\n" + " 2e: 00000f97 auipc t6,0x0\n" + " 32: 0fd1 addi t6,t6,20 # 0x42\n" + " 34: 0001 nop\n" + " 36: 01f5a223 sw t6,4(a1)\n" + " 3a: 00862f83 lw t6,8(a2)\n" + " 3e: 8f82 jr t6\n" + " 40: 00062f83 lw t6,0(a2)\n" + " 44: 8f82 jr t6\n" + " 46: 00462f83 lw t6,4(a2)\n" + " 4a: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1207,26 +1211,28 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), State8 = ?BACKEND:update_branches(State7), Stream = ?BACKEND:stream(State8), - % Extract the final section starting at 0x218 (after jump table 24 bytes + 128 loads 512 bytes) - % RISC-V: Jump table is 3×8=24 bytes, loads are 4 bytes each + % Extract the final section starting at 0x118 (after jump table 24 bytes + 128 loads 256 bytes) + % RISC-V: Jump table is 3×8=24 bytes, loads are 2 bytes each (compressed) Dump = << - " 218: 0085af83 lw t6,8(a1)\n" - " 21c: ffff8f93 addi t6,t6,-1\n" - " 220: 01f5a423 sw t6,8(a1)\n" - " 224: 000f8663 beqz t6,0x230\n" - " 228: 01c0006f j 0x244\n" - " 22c: 00000013 nop\n" - " 230: 00000f97 auipc t6,0x0\n" - " 234: 014f8f93 addi t6,t6,20 # 0x244\n" - " 238: 01f5a223 sw t6,4(a1)\n" - " 23c: 00862f83 lw t6,8(a2)\n" - " 240: 000f8067 jr t6\n" - " 244: 00062f83 lw t6,0(a2)\n" - " 248: 000f8067 jr t6\n" - " 24c: 00462f83 lw t6,4(a2)\n" - " 250: 000f8067 jr t6" + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f8663 beqz t6,0x16\n" + " e: a829 j 0x28\n" + " 10: 0001 nop\n" + " 12: 00000013 nop\n" + " 16: 00000f97 auipc t6,0x0\n" + " 1a: 0fd1 addi t6,t6,20 # 0x2a\n" + " 1c: 0001 nop\n" + " 1e: 01f5a223 sw t6,4(a1)\n" + " 22: 00862f83 lw t6,8(a2)\n" + " 26: 8f82 jr t6\n" + " 28: 00062f83 lw t6,0(a2)\n" + " 2c: 8f82 jr t6\n" + " 2e: 00462f83 lw t6,4(a2)\n" + " 32: 8f82 jr t6" >>, - {_, RelevantBinary} = split_binary(Stream, 16#218), + {_, RelevantBinary} = split_binary(Stream, 16#118), ?assertEqual(dump_to_bin(Dump), RelevantBinary). call_bif_with_large_literal_integer_test() -> @@ -1245,59 +1251,59 @@ call_bif_with_large_literal_integer_test() -> Stream = ?BACKEND:stream(State6), Dump = << - " 0: 02062f83 lw t6,32(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 00058513 mv a0,a1\n" - " 1c: 00200593 li a1,2\n" - " 20: 000f80e7 jalr t6\n" - " 24: 00050f93 mv t6,a0\n" - " 28: 00012083 lw ra,0(sp)\n" - " 2c: 00412503 lw a0,4(sp)\n" - " 30: 00812583 lw a1,8(sp)\n" - " 34: 00c12603 lw a2,12(sp)\n" - " 38: 01010113 addi sp,sp,16\n" - " 3c: 03c62f03 lw t5,60(a2)\n" - " 40: fe010113 addi sp,sp,-32\n" - " 44: 00112023 sw ra,0(sp)\n" - " 48: 00a12223 sw a0,4(sp)\n" - " 4c: 00b12423 sw a1,8(sp)\n" - " 50: 00c12623 sw a2,12(sp)\n" - " 54: 01f12823 sw t6,16(sp)\n" - " 58: 3b7ff5b7 lui a1,0x3b7ff\n" - " 5c: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" - " 60: 000f00e7 jalr t5\n" - " 64: 00050f13 mv t5,a0\n" - " 68: 00012083 lw ra,0(sp)\n" - " 6c: 00412503 lw a0,4(sp)\n" - " 70: 00812583 lw a1,8(sp)\n" - " 74: 00c12603 lw a2,12(sp)\n" - " 78: 01012f83 lw t6,16(sp)\n" - " 7c: 02010113 addi sp,sp,32\n" - " 80: ff010113 addi sp,sp,-16\n" - " 84: 00112023 sw ra,0(sp)\n" - " 88: 00a12223 sw a0,4(sp)\n" - " 8c: 00b12423 sw a1,8(sp)\n" - " 90: 00c12623 sw a2,12(sp)\n" - " 94: 00000593 li a1,0\n" - " 98: 00100613 li a2,1\n" - " 9c: 01852683 lw a3,24(a0)\n" - " a0: 000f0713 mv a4,t5\n" - " a4: 000f80e7 jalr t6\n" - " a8: 00050f93 mv t6,a0\n" - " ac: 00012083 lw ra,0(sp)\n" - " b0: 00412503 lw a0,4(sp)\n" - " b4: 00812583 lw a1,8(sp)\n" - " b8: 00c12603 lw a2,12(sp)\n" - " bc: 01010113 addi sp,sp,16\n" - " c0: 000f9863 bnez t6,0xd0\n" - " c4: 01862f83 lw t6,24(a2)\n" - " c8: 0c800613 li a2,200\n" - " cc: 000f8067 jr t6\n" - " d0: 01f52c23 sw t6,24(a0)" + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 4589 li a1,2\n" + " 12: 9f82 jalr t6\n" + " 14: 8faa mv t6,a0\n" + " 16: 4082 lw ra,0(sp)\n" + " 18: 4512 lw a0,4(sp)\n" + " 1a: 45a2 lw a1,8(sp)\n" + " 1c: 4632 lw a2,12(sp)\n" + " 1e: 0141 addi sp,sp,16\n" + " 20: 03c62f03 lw t5,60(a2)\n" + " 24: 1101 addi sp,sp,-32\n" + " 26: c006 sw ra,0(sp)\n" + " 28: c22a sw a0,4(sp)\n" + " 2a: c42e sw a1,8(sp)\n" + " 2c: c632 sw a2,12(sp)\n" + " 2e: c87e sw t6,16(sp)\n" + " 30: 3b7ff5b7 lui a1,0x3b7ff\n" + " 34: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" + " 38: 9f02 jalr t5\n" + " 3a: 8f2a mv t5,a0\n" + " 3c: 4082 lw ra,0(sp)\n" + " 3e: 4512 lw a0,4(sp)\n" + " 40: 45a2 lw a1,8(sp)\n" + " 42: 4632 lw a2,12(sp)\n" + " 44: 4fc2 lw t6,16(sp)\n" + " 46: 02010113 addi sp,sp,32\n" + " 4a: 1141 addi sp,sp,-16\n" + " 4c: c006 sw ra,0(sp)\n" + " 4e: c22a sw a0,4(sp)\n" + " 50: c42e sw a1,8(sp)\n" + " 52: c632 sw a2,12(sp)\n" + " 54: 4581 li a1,0\n" + " 56: 4605 li a2,1\n" + " 58: 4d14 lw a3,24(a0)\n" + " 5a: 877a mv a4,t5\n" + " 5c: 9f82 jalr t6\n" + " 5e: 8faa mv t6,a0\n" + " 60: 4082 lw ra,0(sp)\n" + " 62: 4512 lw a0,4(sp)\n" + " 64: 45a2 lw a1,8(sp)\n" + " 66: 4632 lw a2,12(sp)\n" + " 68: 0141 addi sp,sp,16\n" + " 6a: 000f9763 bnez t6,0x78\n" + " 6e: 01862f83 lw t6,24(a2)\n" + " 72: 07200613 li a2,114\n" + " 76: 8f82 jr t6\n" + " 78: 01f52c23 sw t6,24(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1310,18 +1316,19 @@ get_list_test() -> State5 = ?BACKEND:free_native_registers(State4, [Reg]), ?BACKEND:assert_all_native_free(State5), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 00300f13 li t5,3\n" - " 8: ffff4f13 not t5,t5\n" - " c: 01efffb3 and t6,t6,t5\n" - " 10: 004fae83 lw t4,4(t6)\n" - " 14: 01452f03 lw t5,20(a0)\n" - " 18: 01df2223 sw t4,4(t5)\n" - " 1c: 000fae83 lw t4,0(t6)\n" - " 20: 01452f03 lw t5,20(a0)\n" - " 24: 01df2023 sw t4,0(t5)" - >>, + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 4f0d li t5,3\n" + "6: ffff4f13 not t5,t5\n" + "a: 01efffb3 and t6,t6,t5\n" + "e: 004fae83 lw t4,4(t6)\n" + "12: 01452f03 lw t5,20(a0)\n" + "16: 01df2223 sw t4,4(t5)\n" + "1a: 000fae83 lw t4,0(t6)\n" + "1e: 01452f03 lw t5,20(a0)\n" + "22: 01df2023 sw t4,0(t5)" + >>, ?assertEqual(dump_to_bin(Dump), Stream). is_integer_test() -> @@ -1352,29 +1359,32 @@ is_integer_test() -> State4 = ?BACKEND:add_label(State3, Label, 16#100), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 040f0463 beqz t5,0x54\n" - " 10: 000f8f13 mv t5,t6\n" - " 14: 00300e93 li t4,3\n" - " 18: 01df7f33 and t5,t5,t4\n" - " 1c: 00200e93 li t4,2\n" - " 20: 01df0663 beq t5,t4,0x2c\n" - " 24: 0dc0006f j 0x100\n" - " 28: 00000013 nop\n" - " 2c: 00300f13 li t5,3\n" - " 30: ffff4f13 not t5,t5\n" - " 34: 01efffb3 and t6,t6,t5\n" - " 38: 000faf83 lw t6,0(t6)\n" - " 3c: 03f00f13 li t5,63\n" - " 40: 01efffb3 and t6,t6,t5\n" - " 44: 00800f13 li t5,8\n" - " 48: 01ef8663 beq t6,t5,0x54\n" - " 4c: 0b40006f j 0x100\n" - " 50: 00000013 nop" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 020f0f63 beqz t5,0x48\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 03f00f13 li t5,63\n" + " 36: 01efffb3 and t6,t6,t5\n" + " 3a: 4f21 li t5,8\n" + " 3c: 01ef8663 beq t6,t5,0x48\n" + " 40: a0c1 j 0x100\n" + " 42: 0001 nop\n" + " 44: 00000013 nop" + >>, ?assertEqual(dump_to_bin(Dump), Stream). cond_jump_to_label(Cond, Label, MMod, MSt0) -> @@ -1411,34 +1421,37 @@ is_number_test() -> State4 = ?BACKEND:add_label(State3, Label, 16#100), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 040f0e63 beqz t5,0x68\n" - " 10: 000f8f13 mv t5,t6\n" - " 14: 00300e93 li t4,3\n" - " 18: 01df7f33 and t5,t5,t4\n" - " 1c: 00200e93 li t4,2\n" - " 20: 01df0663 beq t5,t4,0x2c\n" - " 24: 0dc0006f j 0x100\n" - " 28: 00000013 nop\n" - " 2c: 00300f13 li t5,3\n" - " 30: ffff4f13 not t5,t5\n" - " 34: 01efffb3 and t6,t6,t5\n" - " 38: 000faf83 lw t6,0(t6)\n" - " 3c: 000f8f13 mv t5,t6\n" - " 40: 03f00e93 li t4,63\n" - " 44: 01df7f33 and t5,t5,t4\n" - " 48: 00800e93 li t4,8\n" - " 4c: 01df0e63 beq t5,t4,0x68\n" - " 50: 03f00f13 li t5,63\n" - " 54: 01efffb3 and t6,t6,t5\n" - " 58: 01800f13 li t5,24\n" - " 5c: 01ef8663 beq t6,t5,0x68\n" - " 60: 0a00006f j 0x100\n" - " 64: 00000013 nop" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 040f0763 beqz t5,0x58\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 8f7e mv t5,t6\n" + " 34: 03f00e93 li t4,63\n" + " 38: 01df7f33 and t5,t5,t4\n" + " 3c: 4ea1 li t4,8\n" + " 3e: 01df0d63 beq t5,t4,0x58\n" + " 42: 03f00f13 li t5,63\n" + " 46: 01efffb3 and t6,t6,t5\n" + " 4a: 4f61 li t5,24\n" + " 4c: 01ef8663 beq t6,t5,0x58\n" + " 50: a845 j 0x100\n" + " 52: 0001 nop\n" + " 54: 00000013 nop" + >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> @@ -1456,13 +1469,14 @@ is_boolean_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8a63 beq t6,t5,0x1c\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8663 beq t6,t5,0x1c\n" - " 14: 0ec0006f j 0x100\n" - " 18: 00000013 nop" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: a0fd j 0x100\n" + " 14: 0001 nop\n" + " 16: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1480,15 +1494,16 @@ is_boolean_far_test() -> State4 = ?BACKEND:add_label(State3, Label, 16#1000), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8a63 beq t6,t5,0x1c\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8663 beq t6,t5,0x1c\n" - " 14: 7ed0006f j 0x1000\n" - " 18: 00000013 nop" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 7ef0006f j 0x1000\n" + " 16: 00000013 nop" + >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_known_test() -> @@ -1505,15 +1520,16 @@ is_boolean_far_known_test() -> ?BACKEND:assert_all_native_free(State4), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8a63 beq t6,t5,0x1c\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8663 beq t6,t5,0x1c\n" - " 14: 00001f17 auipc t5,0x1\n" - " 18: fecf0067 jr -20(t5) # 0x1000" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 00001f17 auipc t5,0x1\n" + " 16: feef0067 jr -18(t5) # 0x1000" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point @@ -1542,53 +1558,54 @@ wait_timeout_test() -> State10 = ?BACKEND:update_branches(State9), Stream = ?BACKEND:stream(State10), - Dump = << - " 0: 00000f97 auipc t6,0x0\n" - " 4: 024f8f93 addi t6,t6,36\n" - " 8: 01f5a223 sw t6,4(a1)\n" - " c: 00001fb7 lui t6,0x1\n" - " 10: 388f8f93 addi t6,t6,904\n" - " 14: 07862f03 lw t5,120(a2)\n" - " 18: 000f8613 mv a2,t6\n" - " 1c: 02a00693 li a3,42\n" - " 20: 000f0067 jr t5\n" - " 24: 05462f83 lw t6,84(a2)\n" - " 28: ff010113 addi sp,sp,-16\n" - " 2c: 00112023 sw ra,0(sp)\n" - " 30: 00a12223 sw a0,4(sp)\n" - " 34: 00b12423 sw a1,8(sp)\n" - " 38: 00c12623 sw a2,12(sp)\n" - " 3c: 000f80e7 jalr t6\n" - " 40: 00050f93 mv t6,a0\n" - " 44: 00012083 lw ra,0(sp)\n" - " 48: 00412503 lw a0,4(sp)\n" - " 4c: 00812583 lw a1,8(sp)\n" - " 50: 00c12603 lw a2,12(sp)\n" - " 54: 01010113 addi sp,sp,16\n" - " 58: 00af8663 beq t6,a0,0x60\n" - " 5c: 000f8513 mv a0,t6\n" - " 60: 00008067 ret\n" - " 64: 08400f93 li t6,132\n" - " 68: 00cf8fb3 add t6,t6,a2\n" - " 6c: 000faf83 lw t6,0(t6)\n" - " 70: ff010113 addi sp,sp,-16\n" - " 74: 00112023 sw ra,0(sp)\n" - " 78: 00a12223 sw a0,4(sp)\n" - " 7c: 00b12423 sw a1,8(sp)\n" - " 80: 00c12623 sw a2,12(sp)\n" - " 84: 00200593 li a1,2\n" - " 88: 000f80e7 jalr t6\n" - " 8c: 00050f93 mv t6,a0\n" - " 90: 00012083 lw ra,0(sp)\n" - " 94: 00412503 lw a0,4(sp)\n" - " 98: 00812583 lw a1,8(sp)\n" - " 9c: 00c12603 lw a2,12(sp)\n" - " a0: 01010113 addi sp,sp,16\n" - " a4: 000f9863 bnez t6,0xb4\n" - " a8: 07c62f83 lw t6,124(a2)\n" - " ac: 02a00613 li a2,42\n" - " b0: 000f8067 jr t6" - >>, + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 020f8f93 addi t6,t6,32 # 0x20\n" + " 8: 01f5a223 sw t6,4(a1)\n" + " c: 6f85 lui t6,0x1\n" + " e: 388f8f93 addi t6,t6,904 # 0x1388\n" + " 12: 07862f03 lw t5,120(a2)\n" + " 16: 867e mv a2,t6\n" + " 18: 02a00693 li a3,42\n" + " 1c: 8f02 jr t5\n" + " 1e: 05462f83 lw t6,84(a2)\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 9f82 jalr t6\n" + " 2e: 8faa mv t6,a0\n" + " 30: 4082 lw ra,0(sp)\n" + " 32: 4512 lw a0,4(sp)\n" + " 34: 45a2 lw a1,8(sp)\n" + " 36: 4632 lw a2,12(sp)\n" + " 38: 0141 addi sp,sp,16\n" + " 3a: 00af8463 beq t6,a0,0x42\n" + " 3e: 857e mv a0,t6\n" + " 40: 8082 ret\n" + " 42: 08400f93 li t6,132\n" + " 46: 9fb2 add t6,t6,a2\n" + " 48: 000faf83 lw t6,0(t6)\n" + " 4c: 1141 addi sp,sp,-16\n" + " 4e: c006 sw ra,0(sp)\n" + " 50: c22a sw a0,4(sp)\n" + " 52: c42e sw a1,8(sp)\n" + " 54: c632 sw a2,12(sp)\n" + " 56: 4589 li a1,2\n" + " 58: 9f82 jalr t6\n" + " 5a: 8faa mv t6,a0\n" + " 5c: 4082 lw ra,0(sp)\n" + " 5e: 4512 lw a0,4(sp)\n" + " 60: 45a2 lw a1,8(sp)\n" + " 62: 4632 lw a2,12(sp)\n" + " 64: 0141 addi sp,sp,16\n" + " 66: 000f9763 bnez t6,0x74\n" + " 6a: 07c62f83 lw t6,124(a2)\n" + " 6e: 02a00613 li a2,42\n" + " 72: 8f82 jr t6" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test OP_WAIT pattern that uses set_continuation_to_label @@ -1602,25 +1619,26 @@ wait_test() -> State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), Stream = ?BACKEND:stream(State4), - Dump = << - " 0: 00000697 auipc a3,0x0\n" - " 4: 00068067 jr a3\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 00068067 jr a3\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 00068067 jr a3\n" - " 18: 00000697 auipc a3,0x0\n" - " 1c: 00068067 jr a3\n" - " 20: 00000697 auipc a3,0x0\n" - " 24: 00068067 jr a3\n" - " 28: 00000697 auipc a3,0x0\n" - " 2c: 00068067 jr a3\n" - " 30: 00000f97 auipc t6,0x0\n" - " 34: 004f8f93 addi t6,t6,4\n" - " 38: 01f5a223 sw t6,4(a1)\n" - " 3c: 07462f83 lw t6,116(a2)\n" - " 40: 000f8067 jr t6" - >>, + Dump = + << + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 6: ffffffff .insn 4, 0xffffffff\n" + " a: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 12: ffffffff .insn 4, 0xffffffff\n" + " 16: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 1e: ffffffff .insn 4, 0xffffffff\n" + " 22: ffffffff .insn 4, 0xffffffff\n" + " 24: ffffffff .insn 4, 0xffffffff\n" + " 28: ffffffff .insn 4, 0xffffffff\n" + " 2c: 01f5a223 sw t6,4(a1)\n" + " 30: 07462f83 lw t6,116(a2)\n" + " 34: 8f82 jr t6" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test return_labels_and_lines/2 function @@ -1638,31 +1656,32 @@ return_labels_and_lines_test() -> Stream = ?BACKEND:stream(State3), % Should have generated auipc + addi + ret + labels table + lines table - % auipc = 4 bytes, addi = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes - % Total minimum: 36 bytes - ?assert(byte_size(Stream) >= 36), + % auipc = 4 bytes, addi = 2 bytes (compressed), ret = 2 bytes, labels table = 3*2*2 = 12 bytes, lines table = 3*2*2 = 12 bytes + % Total: 4 + 2 + 2 + 12 + 12 = 32 bytes + ?assert(byte_size(Stream) >= 32), % Expected: auipc a0, 0 + addi a0, a0, 12 + ret + labels table + lines table - % The data tables start at offset 0xC (12), so we load PC + 12 into a0 - Dump = << - " 0: 00000517 auipc a0,0x0\n" - " 4: 00c50513 addi a0,a0,12\n" - " 8: 00008067 ret\n" - " c: 0200 addi s0,sp,256\n" - " e: 0100 addi s0,sp,128\n" - " 10: 0000 unimp\n" - " 12: 1000 addi s0,sp,32\n" - " 14: 0200 addi s0,sp,256\n" - " 16: 0000 unimp\n" - " 18: 2000 fld fs0,0(s0)\n" - " 1a: 0200 addi s0,sp,256\n" - " 1c: 0a00 addi s0,sp,272\n" - " 1e: 0000 unimp\n" - " 20: 1000 addi s0,sp,32\n" - " 22: 1400 addi s0,sp,544\n" - " 24: 0000 unimp\n" - " 26: 2000 fld fs0,0(s0)" - >>, + % The data tables start at offset 0x8 (8), but we need offset 0xC (12) because of alignment + Dump = + << + " 0: 00000517 auipc a0,0x0\n" + " 4: 0531 addi a0,a0,12 # 0xc\n" + " 6: 8082 ret\n" + " 8: 0200 addi s0,sp,256\n" + " a: 0100 addi s0,sp,128\n" + " c: 0000 unimp\n" + " e: 1000 addi s0,sp,32\n" + " 10: 0200 addi s0,sp,256\n" + " 12: 0000 unimp\n" + " 14: 2000 fld fs0,0(s0)\n" + " 16: 0200 addi s0,sp,256\n" + " 18: 0a00 addi s0,sp,272\n" + " 1a: 0000 unimp\n" + " 1c: 1000 addi s0,sp,32\n" + " 1e: 1400 addi s0,sp,544\n" + " 20: 0000 unimp\n" + " 22: 2000 fld fs0,0(s0)" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test call_primitive with {free, {x_reg, X}} @@ -1674,40 +1693,41 @@ gc_bif2_test() -> ]), Stream = ?BACKEND:stream(State2), - Dump = << - " 0: 02062f83 lw t6,32(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 00058513 mv a0,a1\n" - " 1c: 02a00593 li a1,42\n" - " 20: 000f80e7 jalr t6\n" - " 24: 00050f93 mv t6,a0\n" - " 28: 00012083 lw ra,0(sp)\n" - " 2c: 00412503 lw a0,4(sp)\n" - " 30: 00812583 lw a1,8(sp)\n" - " 34: 00c12603 lw a2,12(sp)\n" - " 38: 01010113 addi sp,sp,16\n" - " 3c: ff010113 addi sp,sp,-16\n" - " 40: 00112023 sw ra,0(sp)\n" - " 44: 00a12223 sw a0,4(sp)\n" - " 48: 00b12423 sw a1,8(sp)\n" - " 4c: 00c12623 sw a2,12(sp)\n" - " 50: 00000593 li a1,0\n" - " 54: 00300613 li a2,3\n" - " 58: 01452f03 lw t5,20(a0)\n" - " 5c: 000f2683 lw a3,0(t5)\n" - " 60: 01852703 lw a4,24(a0)\n" - " 64: 000f80e7 jalr t6\n" - " 68: 00050f93 mv t6,a0\n" - " 6c: 00012083 lw ra,0(sp)\n" - " 70: 00412503 lw a0,4(sp)\n" - " 74: 00812583 lw a1,8(sp)\n" - " 78: 00c12603 lw a2,12(sp)\n" - " 7c: 01010113 addi sp,sp,16" - >>, + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 02a00593 li a1,42\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 4581 li a1,0\n" + " 2e: 460d li a2,3\n" + " 30: 01452f03 lw t5,20(a0)\n" + " 34: 000f2683 lw a3,0(t5)\n" + " 38: 4d18 lw a4,24(a0)\n" + " 3a: 9f82 jalr t6\n" + " 3c: 8faa mv t6,a0\n" + " 3e: 4082 lw ra,0(sp)\n" + " 40: 4512 lw a0,4(sp)\n" + " 42: 45a2 lw a1,8(sp)\n" + " 44: 4632 lw a2,12(sp)\n" + " 46: 0141 addi sp,sp,16" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test case where parameter value is in a1 @@ -1718,27 +1738,28 @@ memory_ensure_free_with_roots_test() -> ]), Stream = ?BACKEND:stream(State1), - Dump = << - " 0: 0b000f93 li t6,176\n" - " 4: 00cf8fb3 add t6,t6,a2\n" - " 8: 000faf83 lw t6,0(t6)\n" - " c: ff010113 addi sp,sp,-16\n" - " 10: 00112023 sw ra,0(sp)\n" - " 14: 00a12223 sw a0,4(sp)\n" - " 18: 00b12423 sw a1,8(sp)\n" - " 1c: 00c12623 sw a2,12(sp)\n" - " 20: 00058f13 mv t5,a1\n" - " 24: 000f0613 mv a2,t5\n" - " 28: 00400693 li a3,4\n" - " 2c: 00100713 li a4,1\n" - " 30: 000f80e7 jalr t6\n" - " 34: 00050f93 mv t6,a0\n" - " 38: 00012083 lw ra,0(sp)\n" - " 3c: 00412503 lw a0,4(sp)\n" - " 40: 00812583 lw a1,8(sp)\n" - " 44: 00c12603 lw a2,12(sp)\n" - " 48: 01010113 addi sp,sp,16" - >>, + Dump = + << + " 0: 0b000f93 li t6,176\n" + " 4: 9fb2 add t6,t6,a2\n" + " 6: 000faf83 lw t6,0(t6)\n" + " a: 1141 addi sp,sp,-16\n" + " c: c006 sw ra,0(sp)\n" + " e: c22a sw a0,4(sp)\n" + " 10: c42e sw a1,8(sp)\n" + " 12: c632 sw a2,12(sp)\n" + " 14: 8f2e mv t5,a1\n" + " 16: 867a mv a2,t5\n" + " 18: 4691 li a3,4\n" + " 1a: 4705 li a4,1\n" + " 1c: 9f82 jalr t6\n" + " 1e: 8faa mv t6,a0\n" + " 20: 4082 lw ra,0(sp)\n" + " 22: 4512 lw a0,4(sp)\n" + " 24: 45a2 lw a1,8(sp)\n" + " 26: 4632 lw a2,12(sp)\n" + " 28: 0141 addi sp,sp,16" + >>, ?assertEqual(dump_to_bin(Dump), Stream). call_ext_test() -> @@ -1747,29 +1768,31 @@ call_ext_test() -> State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), ?BACKEND:assert_all_native_free(State2), Stream = ?BACKEND:stream(State2), - Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 0005af03 lw t5,0(a1)\n" - " 28: 000f2f03 lw t5,0(t5)\n" - " 2c: 018f1f13 slli t5,t5,0x18\n" - " 30: 15000f93 li t6,336\n" - " 34: 00000013 nop\n" - " 38: 01ff6f33 or t5,t5,t6\n" - " 3c: 05e52e23 sw t5,92(a0)\n" - " 40: 01062f83 lw t6,16(a2)\n" - " 44: 00200613 li a2,2\n" - " 48: 00500693 li a3,5\n" - " 4c: fff00713 li a4,-1\n" - " 50: 000f8067 jr t6" - >>, + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 0005af03 lw t5,0(a1)\n" + " 24: 000f2f03 lw t5,0(t5)\n" + " 28: 0f62 slli t5,t5,0x18\n" + " 2a: 11800f93 li t6,280\n" + " 2e: 00000013 nop\n" + " 32: 01ff6f33 or t5,t5,t6\n" + " 36: 05e52e23 sw t5,92(a0)\n" + " 3a: 01062f83 lw t6,16(a2)\n" + " 3e: 4609 li a2,2\n" + " 40: 4695 li a3,5\n" + " 42: 577d li a4,-1\n" + " 44: 8f82 jr t6" + >>, ?assertEqual(dump_to_bin(Dump), Stream). call_fun_test() -> @@ -1801,56 +1824,58 @@ call_fun_test() -> ]), ?BACKEND:assert_all_native_free(State9), Stream = ?BACKEND:stream(State9), - Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 01852f83 lw t6,24(a0)\n" - " 28: 000f8f13 mv t5,t6\n" - " 2c: 000f0e93 mv t4,t5\n" - " 30: 00300e13 li t3,3\n" - " 34: 01cefeb3 and t4,t4,t3\n" - " 38: 00200e13 li t3,2\n" - " 3c: 01ce8c63 beq t4,t3,0x54\n" - " 40: 04c62f83 lw t6,76(a2)\n" - " 44: 04400613 li a2,68\n" - " 48: 18b00693 li a3,395\n" - " 4c: 000f0713 mv a4,t5\n" - " 50: 000f8067 jr t6\n" - " 54: 00300e93 li t4,3\n" - " 58: fffece93 not t4,t4\n" - " 5c: 01df7f33 and t5,t5,t4\n" - " 60: 000f2f03 lw t5,0(t5)\n" - " 64: 000f0e93 mv t4,t5\n" - " 68: 03f00e13 li t3,63\n" - " 6c: 01cefeb3 and t4,t4,t3\n" - " 70: 01400e13 li t3,20\n" - " 74: 01ce8c63 beq t4,t3,0x8c\n" - " 78: 04c62f83 lw t6,76(a2)\n" - " 7c: 07c00613 li a2,124\n" - " 80: 18b00693 li a3,395\n" - " 84: 000f0713 mv a4,t5\n" - " 88: 000f8067 jr t6\n" - " 8c: 0005ae83 lw t4,0(a1)\n" - " 90: 000eae83 lw t4,0(t4)\n" - " 94: 018e9e93 slli t4,t4,0x18\n" - " 98: 30000f13 li t5,768\n" - " 9c: 00000013 nop\n" - " a0: 01eeeeb3 or t4,t4,t5\n" - " a4: 05d52e23 sw t4,92(a0)\n" - " a8: 08000f13 li t5,128\n" - " ac: 00cf0f33 add t5,t5,a2\n" - " b0: 000f2f03 lw t5,0(t5)\n" - " b4: 000f8613 mv a2,t6\n" - " b8: 00000693 li a3,0\n" - " bc: 000f0067 jr t5" - >>, + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01852f83 lw t6,24(a0)\n" + " 24: 8f7e mv t5,t6\n" + " 26: 8efa mv t4,t5\n" + " 28: 4e0d li t3,3\n" + " 2a: 01cefeb3 and t4,t4,t3\n" + " 2e: 4e09 li t3,2\n" + " 30: 01ce8a63 beq t4,t3,0x44\n" + " 34: 04c62f83 lw t6,76(a2)\n" + " 38: 03800613 li a2,56\n" + " 3c: 18b00693 li a3,395\n" + " 40: 877a mv a4,t5\n" + " 42: 8f82 jr t6\n" + " 44: 4e8d li t4,3\n" + " 46: fffece93 not t4,t4\n" + " 4a: 01df7f33 and t5,t5,t4\n" + " 4e: 000f2f03 lw t5,0(t5)\n" + " 52: 8efa mv t4,t5\n" + " 54: 03f00e13 li t3,63\n" + " 58: 01cefeb3 and t4,t4,t3\n" + " 5c: 4e51 li t3,20\n" + " 5e: 01ce8a63 beq t4,t3,0x72\n" + " 62: 04c62f83 lw t6,76(a2)\n" + " 66: 06600613 li a2,102\n" + " 6a: 18b00693 li a3,395\n" + " 6e: 877a mv a4,t5\n" + " 70: 8f82 jr t6\n" + " 72: 0005ae83 lw t4,0(a1)\n" + " 76: 000eae83 lw t4,0(t4)\n" + " 7a: 0ee2 slli t4,t4,0x18\n" + " 7c: 27000f13 li t5,624\n" + " 80: 00000013 nop\n" + " 84: 01eeeeb3 or t4,t4,t5\n" + " 88: 05d52e23 sw t4,92(a0)\n" + " 8c: 08000f13 li t5,128\n" + " 90: 9f32 add t5,t5,a2\n" + " 92: 000f2f03 lw t5,0(t5)\n" + " 96: 867e mv a2,t6\n" + " 98: 4681 li a3,0\n" + " 9a: 8f02 jr t5" + >>, ?assertEqual(dump_to_bin(Dump), Stream). move_to_vm_register_test0(State, Source, Dest, Dump) -> @@ -1868,252 +1893,252 @@ move_to_vm_register_test_() -> [ ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << - " 0: 00000f93 li t6,0\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 4f81 li t6,0\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << - " 0: 00000f93 li t6,0\n" - " 4: 05f52c23 sw t6,88(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 4f81 li t6,0\n" + " 2: 05f52c23 sw t6,88(a0)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {ptr, t5}, << - " 0: 00000f93 li t6,0\n" - " 4: 01ff2023 sw t6,0(t5)\n" - " 8: 0f80006f j 0x100" + " 0: 4f81 li t6,0\n" + " 2: 01ff2023 sw t6,0(t5)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << - " 0: 00000f13 li t5,0\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 01efa423 sw t5,8(t6)\n" - " c: 0f40006f j 0x100" + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 01efa423 sw t5,8(t6)\n" + " a: a8dd j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << - " 0: 00000f13 li t5,0\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 05efa823 sw t5,80(t6)\n" - " c: 0f40006f j 0x100" + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 05efa823 sw t5,80(t6)\n" + " a: a8dd j 0x100" >>) end), %% Test: Immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << - " 0: 02a00f93 li t6,42\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << - " 0: 02a00f93 li t6,42\n" - " 4: 05f52c23 sw t6,88(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 02a00f93 li t6,42\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 01efa423 sw t5,8(t6)\n" - " c: 0f40006f j 0x100" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: a8d5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 05efa823 sw t5,80(t6)\n" - " c: 0f40006f j 0x100" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: a8d5 j 0x100" >>) end), %% Test: Immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 99, {ptr, a3}, << - " 0: 06300f93 li t6,99\n" - " 4: 01f6a023 sw t6,0(a3)\n" - " 8: 0f80006f j 0x100" + " 0: 06300f93 li t6,99\n" + " 4: 01f6a023 sw t6,0(a3)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << - " 0: 01c52f83 lw t6,28(a0)\n" - " 4: 03f52023 sw t6,32(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 03f52023 sw t6,32(a0)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << - " 0: 01c52f83 lw t6,28(a0)\n" - " 4: 01f5a023 sw t6,0(a1)\n" - " 8: 0f80006f j 0x100" + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f5a023 sw t6,0(a1)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: ptr to x_reg ?_test(begin move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << - " 0: 000e2f83 lw t6,0(t3)\n" - " 4: 03f52223 sw t6,36(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 000e2f83 lw t6,0(t3)\n" + " 4: 03f52223 sw t6,36(a0)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01452f03 lw t5,20(a0)\n" - " 8: 01ff2223 sw t6,4(t5)\n" - " c: 0f40006f j 0x100" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 01ff2223 sw t6,4(t5)\n" + " c: a8d5 j 0x100" >>) end), %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 000f2f83 lw t6,0(t5)\n" - " 8: 03f52223 sw t6,36(a0)\n" - " c: 0f40006f j 0x100" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 004f2f83 lw t6,4(t5)\n" - " 8: 03f52223 sw t6,36(a0)\n" - " c: 0f40006f j 0x100" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 004f2f83 lw t6,4(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, t4, {x_reg, 0}, << - " 0: 01d52c23 sw t4,24(a0)\n" - " 4: 0fc0006f j 0x100" + " 0: 01d52c23 sw t4,24(a0)\n" + " 4: a8f5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, t5, {x_reg, extra}, << - " 0: 05e52c23 sw t5,88(a0)\n" - " 4: 0fc0006f j 0x100" + " 0: 05e52c23 sw t5,88(a0)\n" + " 4: a8f5 j 0x100" >>) end), %% Test: Native register to ptr ?_test(begin move_to_vm_register_test0(State0, t3, {ptr, a3}, << - " 0: 01c6a023 sw t3,0(a3)\n" - " 4: 0fc0006f j 0x100" + " 0: 01c6a023 sw t3,0(a3)\n" + " 4: a8f5 j 0x100" >>) end), %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, a1, {y_reg, 0}, << - " 0: 01452f83 lw t6,20(a0)\n" - " 4: 00bfa023 sw a1,0(t6)\n" - " 8: 0f80006f j 0x100" + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 00bfa023 sw a1,0(t6)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: Large immediate to x_reg (uses lui + addi in RISC-V) ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01f52c23 sw t6,24(a0)\n" - " c: 0f40006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f52c23 sw t6,24(a0)\n" + " c: a8d5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 05f52c23 sw t6,88(a0)\n" - " c: 0f40006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 05f52c23 sw t6,88(a0)\n" + " c: a8d5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01452f03 lw t5,20(a0)\n" - " c: 01ff2423 sw t6,8(t5)\n" - " 10: 0f00006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 01ff2423 sw t6,8(t5)\n" + " 10: a8c5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01452f03 lw t5,20(a0)\n" - " c: 05ff2823 sw t6,80(t5)\n" - " 10: 0f00006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 05ff2823 sw t6,80(t5)\n" + " 10: a8c5 j 0x100" >>) end), %% Test: Large immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01f6a023 sw t6,0(a3)\n" - " c: 0f40006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f6a023 sw t6,0(a3)\n" + " c: a8d5 j 0x100" >>) end), %% Test: x_reg to y_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << - " 0: 05452f83 lw t6,84(a0)\n" - " 4: 01452f03 lw t5,20(a0)\n" - " 8: 07ff2e23 sw t6,124(t5)\n" - " c: 0f40006f j 0x100" + " 0: 05452f83 lw t6,84(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 07ff2e23 sw t6,124(t5)\n" + " c: a8d5 j 0x100" >>) end), %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 07cf2f83 lw t6,124(t5)\n" - " 8: 05f52a23 sw t6,84(a0)\n" - " c: 0f40006f j 0x100" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)\n" + " 8: 05f52a23 sw t6,84(a0)\n" + " c: a8d5 j 0x100" >>) end), %% Test: Large y_reg index (32) that exceeds str immediate offset limit ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 32}, << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 08000e93 li t4,128\n" - " c: 01fe8eb3 add t4,t4,t6\n" - " 10: 01eea023 sw t5,0(t4)\n" - " 14: 0ec0006f j 0x100" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 08000e93 li t4,128\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)\n" + " 12: a0fd j 0x100" >>) end), %% Test: Negative immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << - " 0: fff00f93 li t6,-1\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 5ffd li t6,-1\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, -100, {x_reg, 0}, << - " 0: f9c00f93 li t6,-100\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: f9c00f93 li t6,-100\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << - " 0: c1800f93 li t6,-1000\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: c1800f93 li t6,-1000\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" >>) end) ] @@ -2134,87 +2159,87 @@ move_array_element_test_() -> %% move_array_element: reg[x] to x_reg ?_test(begin move_array_element_test0(State0, a3, 2, {x_reg, 0}, << - " 0: 0086af83 lw t6,8(a3)\n" - " 4: 01f52c23 sw t6,24(a0)" + " 0: 0086af83 lw t6,8(a3)\n" + " 4: 01f52c23 sw t6,24(a0)" >>) end), %% move_array_element: reg[x] to ptr ?_test(begin move_array_element_test0(State0, a3, 3, {ptr, t4}, << - " 0: 00c6af83 lw t6,12(a3)\n" - " 4: 01fea023 sw t6,0(t4)" + " 0: 00c6af83 lw t6,12(a3)\n" + " 4: 01fea023 sw t6,0(t4)" >>) end), %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, a3, 1, {y_reg, 2}, << - " 0: 0046af03 lw t5,4(a3)\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 01efa423 sw t5,8(t6)" + " 0: 0046af03 lw t5,4(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)" >>) end), %% move_array_element: reg[x] to native reg (t4) ?_test(begin move_array_element_test0(State0, a3, 1, t4, << - " 0: 0046ae83 lw t4,4(a3)" + " 0: 0046ae83 lw t4,4(a3)" >>) end), %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, a3, 7, {y_reg, 31}, << - " 0: 01c6af03 lw t5,28(a3)\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 07efae23 sw t5,124(t6)" + " 0: 01c6af03 lw t5,28(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 07efae23 sw t5,124(t6)" >>) end), %% move_array_element: reg[x] to x_reg ?_test(begin move_array_element_test0(State0, a3, 7, {x_reg, 15}, << - " 0: 01c6af83 lw t6,28(a3)\n" - " 4: 05f52a23 sw t6,84(a0)" + " 0: 01c6af83 lw t6,28(a3)\n" + " 4: 05f52a23 sw t6,84(a0)" >>) end), %% move_array_element: reg_x[reg_y] to x_reg ?_test(begin {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, << - " 0: 0106af83 lw t6,16(a3)\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 01f68fb3 add t6,a3,t6\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: 03f52023 sw t6,32(a0)" + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 03f52023 sw t6,32(a0)" >>) end), %% move_array_element: reg_x[reg_y] to pointer (large x reg) ?_test(begin {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, << - " 0: 0106af83 lw t6,16(a3)\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 01f68fb3 add t6,a3,t6\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: 01fea023 sw t6,0(t4)" + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01fea023 sw t6,0(t4)" >>) end), %% move_array_element: reg_x[reg_y] to y_reg ?_test(begin {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, << - " 0: 0106af83 lw t6,16(a3)\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 01f68fb3 add t6,a3,t6\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: 01452f03 lw t5,20(a0)\n" - " 14: 07ff2e23 sw t6,124(t5)" + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01452f03 lw t5,20(a0)\n" + " 12: 07ff2e23 sw t6,124(t5)" >>) end), %% move_array_element with integer index and x_reg destination ?_test(begin {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 008faf03 lw t5,8(t6)\n" - " 8: 03e52623 sw t5,44(a0)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 008faf03 lw t5,8(t6)\n" + " 8: 03e52623 sw t5,44(a0)" >>) end) ] @@ -2232,7 +2257,7 @@ get_array_element_test_() -> {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 010e2f83 lw t6,16(t3)" + " 0: 010e2f83 lw t6,16(t3)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t6, Reg) @@ -2252,8 +2277,8 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01f6a423 sw t6,8(a3)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2262,11 +2287,11 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 000e0f13 mv t5,t3\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2275,11 +2300,11 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 000faf83 lw t6,0(t6)\n" - " 4: 000e0f13 mv t5,t3\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 000faf83 lw t6,0(t6)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2288,12 +2313,12 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 008f2f83 lw t6,8(t5)\n" - " 8: 000e0f13 mv t5,t3\n" - " c: 002f1f13 slli t5,t5,0x2\n" - " 10: 01e68f33 add t5,a3,t5\n" - " 14: 01ff2023 sw t6,0(t5)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 008f2f83 lw t6,8(t5)\n" + " 8: 8f72 mv t5,t3\n" + " a: 0f0a slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2302,8 +2327,8 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01f6a423 sw t6,8(a3)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2315,11 +2340,11 @@ move_to_array_element_test_() -> State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 001e0f13 addi t5,t3,1\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2331,11 +2356,11 @@ move_to_array_element_test_() -> State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 02a00f93 li t6,42\n" - " 4: 001e0f13 addi t5,t3,1\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 02a00f93 li t6,42\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -2355,7 +2380,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 02a00f93 li t6,42" + " 0: 02a00f93 li t6,42" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2365,7 +2390,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: fd600f93 li t6,-42" + " 0: fd600f93 li t6,-42" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2375,7 +2400,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: f0100f93 li t6,-255" + " 0: f0100f93 li t6,-255" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2386,8 +2411,8 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State2), ?assertEqual(t6, Reg), Dump = << - " 0: f0000f93 li t6,-256\n" - " 4: 0fc0006f j 0x100" + " 0: f0000f93 li t6,-256\n" + " 4: a8f5 j 0x100" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2397,7 +2422,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t5, Reg), Dump = << - " 0: 000f2f03 lw t5,0(t5)" + " 0: 000f2f03 lw t5,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2407,7 +2432,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 02c52f83 lw t6,44(a0)" + " 0: 02c52f83 lw t6,44(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2417,8 +2442,8 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 00cf2f83 lw t6,12(t5)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 00cf2f83 lw t6,12(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2427,7 +2452,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, 42, t5), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 02a00f13 li t5,42" + " 0: 02a00f13 li t5,42" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2436,7 +2461,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, t6, t4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 000f8e93 mv t4,t6" + " 0: 8efe mv t4,t6" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2445,7 +2470,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 000fae03 lw t3,0(t6)" + " 0: 000fae03 lw t3,0(t6)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2454,7 +2479,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 02052683 lw a3,32(a0)" + " 0: 5114 lw a3,32(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2463,8 +2488,8 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f83 lw t6,20(a0)\n" - " 4: 008fa583 lw a1,8(t6)" + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008fa583 lw a1,8(t6)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2476,12 +2501,12 @@ move_to_native_register_test_() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 06052f03 lw t5,96(a0)\n" - " 8: 004fae83 lw t4,4(t6)\n" - " c: 01df2c23 sw t4,24(t5)\n" - " 10: 008fae83 lw t4,8(t6)\n" - " 14: 01df2e23 sw t4,28(t5)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 06052f03 lw t5,96(a0)\n" + " 8: 004fae83 lw t4,4(t6)\n" + " c: 01df2c23 sw t4,24(t5)\n" + " 10: 008fae83 lw t4,8(t6)\n" + " 14: 01df2e23 sw t4,28(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -2504,21 +2529,21 @@ add_test_() -> [ ?_test(begin add_test0(State0, a2, 2, << - " 0: 00260613 addi a2,a2,2\n" - " 4: 0fc0006f j 0x100" + " 0: 0609 addi a2,a2,2\n" + " 2: a8fd j 0x100" >>) end), ?_test(begin add_test0(State0, a2, 256, << - " 0: 10000f93 li t6,256\n" - " 4: 01f60633 add a2,a2,t6\n" - " 8: 0f80006f j 0x100" + " 0: 10000f93 li t6,256\n" + " 4: 967e add a2,a2,t6\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin add_test0(State0, a2, a3, << - " 0: 00d60633 add a2,a2,a3\n" - " 4: 0fc0006f j 0x100" + " 0: 9636 add a2,a2,a3\n" + " 2: a8fd j 0x100" >>) end) ] @@ -2540,21 +2565,21 @@ sub_test_() -> [ ?_test(begin sub_test0(State0, a2, 2, << - " 0: ffe60613 addi a2,a2,-2\n" - " 4: 0fc0006f j 0x100" + " 0: 1679 addi a2,a2,-2\n" + " 2: a8fd j 0x100" >>) end), ?_test(begin sub_test0(State0, a2, 256, << - " 0: 10000f93 li t6,256\n" - " 4: 41f60633 sub a2,a2,t6\n" - " 8: 0f80006f j 0x100" + " 0: 10000f93 li t6,256\n" + " 4: 41f60633 sub a2,a2,t6\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin sub_test0(State0, a2, a3, << - " 0: 40d60633 sub a2,a2,a3\n" - " 4: 0fc0006f j 0x100" + " 0: 8e15 sub a2,a2,a3\n" + " 2: a8fd j 0x100" >>) end) ] @@ -2574,61 +2599,61 @@ mul_test_() -> [ ?_test(begin mul_test0(State0, a2, 2, << - " 0: 00161613 slli a2,a2,0x1" + " 0: 0606 slli a2,a2,0x1" >>) end), ?_test(begin mul_test0(State0, a2, 3, << - " 0: 00161f93 slli t6,a2,0x1\n" - " 4: 00cf8633 add a2,t6,a2" + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 4, << - " 0: 00261613 slli a2,a2,0x2" + " 0: 060a slli a2,a2,0x2" >>) end), ?_test(begin mul_test0(State0, a2, 5, << - " 0: 00261f93 slli t6,a2,0x2\n" - " 4: 00cf8633 add a2,t6,a2" + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 6, << - " 0: 00161f93 slli t6,a2,0x1\n" - " 4: 00cf8633 add a2,t6,a2\n" - " 8: 00161613 slli a2,a2,0x1" + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" >>) end), ?_test(begin mul_test0(State0, a2, 7, << - " 0: 00361f93 slli t6,a2,0x3\n" - " 4: 40cf8633 sub a2,t6,a2" + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 40cf8633 sub a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 8, << - " 0: 00361613 slli a2,a2,0x3" + " 0: 060e slli a2,a2,0x3" >>) end), ?_test(begin mul_test0(State0, a2, 9, << - " 0: 00361f93 slli t6,a2,0x3\n" - " 4: 00cf8633 add a2,t6,a2" + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 00cf8633 add a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 10, << - " 0: 00261f93 slli t6,a2,0x2\n" - " 4: 00cf8633 add a2,t6,a2\n" - " 8: 00161613 slli a2,a2,0x1" + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" >>) end), ?_test(begin mul_test0(State0, a2, 11, << - " 0: 00b00f93 li t6,11\n" - " 4: 03f60633 mul a2,a2,t6" + " 0: 4fad li t6,11\n" + " 2: 03f60633 mul a2,a2,t6" >>) end) ] @@ -2648,24 +2673,24 @@ set_args1_y_reg_test() -> Stream = ?BACKEND:stream(State1), % Expected disassembly for loading from y_reg and calling primitive Dump = << - " 0: 04300f93 li t6,67\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 00cf8fb3 add t6,t6,a2\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: ff010113 addi sp,sp,-16\n" - " 14: 00112023 sw ra,0(sp)\n" - " 18: 00a12223 sw a0,4(sp)\n" - " 1c: 00b12423 sw a1,8(sp)\n" - " 20: 00c12623 sw a2,12(sp)\n" - " 24: 01452f03 lw t5,20(a0)\n" - " 28: 014f2503 lw a0,20(t5)\n" - " 2c: 000f80e7 jalr t6\n" - " 30: 00050f93 mv t6,a0\n" - " 34: 00012083 lw ra,0(sp)\n" - " 38: 00412503 lw a0,4(sp)\n" - " 3c: 00812583 lw a1,8(sp)\n" - " 40: 00c12603 lw a2,12(sp)\n" - " 44: 01010113 addi sp,sp,16" + " 0: 04300f93 li t6,67\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 9fb2 add t6,t6,a2\n" + " 8: 000faf83 lw t6,0(t6)\n" + " c: 1141 addi sp,sp,-16\n" + " e: c006 sw ra,0(sp)\n" + " 10: c22a sw a0,4(sp)\n" + " 12: c42e sw a1,8(sp)\n" + " 14: c632 sw a2,12(sp)\n" + " 16: 01452f03 lw t5,20(a0)\n" + " 1a: 014f2503 lw a0,20(t5)\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2677,10 +2702,10 @@ large_y_reg_read_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses helper with temp register for large offset Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 1ec00f93 li t6,492\n" - " 8: 01ef8fb3 add t6,t6,t5\n" - " c: 000faf83 lw t6,0(t6)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 1ec00f93 li t6,492\n" + " 8: 9ffa add t6,t6,t5\n" + " a: 000faf83 lw t6,0(t6)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t6, Reg). @@ -2693,11 +2718,11 @@ large_y_reg_write_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses helper with temp registers for large offset Dump = << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 1ec00e93 li t4,492\n" - " c: 01fe8eb3 add t4,t4,t6\n" - " 10: 01eea023 sw t5,0(t4)" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 1ec00e93 li t4,492\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2715,15 +2740,15 @@ large_y_reg_read_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses t0+t1 fallback sequence when temps are exhausted Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 01452283 lw t0,20(a0)\n" - " 18: 08c00313 li t1,140\n" - " 1c: 00530333 add t1,t1,t0\n" - " 20: 00032303 lw t1,0(t1)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452283 lw t0,20(a0)\n" + " 18: 08c00313 li t1,140\n" + " 1c: 9316 add t1,t1,t0\n" + " 1e: 00032303 lw t1,0(t1)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t1, ResultReg). @@ -2743,15 +2768,15 @@ large_y_reg_write_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses t1/t0 fallback sequence Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 01452303 lw t1,20(a0)\n" - " 18: 0c800293 li t0,200\n" - " 1c: 006282b3 add t0,t0,t1\n" - " 20: 01f2a023 sw t6,0(t0)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452303 lw t1,20(a0)\n" + " 18: 0c800293 li t0,200\n" + " 1c: 929a add t0,t0,t1\n" + " 1e: 01f2a023 sw t6,0(t0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2762,8 +2787,8 @@ y_reg_boundary_direct_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses direct addressing since 31 * 4 = 124 < 2048 Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 07cf2f83 lw t6,124(t5)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t6, Reg). @@ -2774,7 +2799,7 @@ debugger_test() -> State1 = ?BACKEND:debugger(State0), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00100073 ebreak" + " 0: 9002 ebreak" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2791,15 +2816,15 @@ and_register_exhaustion_negative_test() -> StateResult = ?BACKEND:and_(StateNoRegs, t6, -4), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: 00300293 li t0,3\n" - " 1c: fff2c293 not t0,t0\n" - " 20: 005fffb3 and t6,t6,t0" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 428d li t0,3\n" + " 1a: fff2c293 not t0,t0\n" + " 1e: 005fffb3 and t6,t6,t0" >>, ?assertEqual(dump_to_bin(ExpectedDump), Stream). @@ -2816,14 +2841,14 @@ and_register_exhaustion_positive_test() -> StateResult = ?BACKEND:and_(StateNoRegs, t6, 16#3F), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: 03f00293 li t0,63\n" - " 1c: 005fffb3 and t6,t6,t0" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 03f00293 li t0,63\n" + " 1c: 005fffb3 and t6,t6,t0" >>, ?assertEqual(dump_to_bin(ExpectedDump), Stream). @@ -2843,21 +2868,21 @@ alloc_boxed_integer_fragment_small_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 03c62f83 lw t6,60(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 02a00593 li a1,42\n" - " 1c: 00000613 li a2,0\n" - " 20: 000f80e7 jalr t6\n" - " 24: 00050f93 mv t6,a0\n" - " 28: 00012083 lw ra,0(sp)\n" - " 2c: 00412503 lw a0,4(sp)\n" - " 30: 00812583 lw a1,8(sp)\n" - " 34: 00c12603 lw a2,12(sp)\n" - " 38: 01010113 addi sp,sp,16" + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 4601 li a2,0\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2874,28 +2899,28 @@ alloc_boxed_integer_fragment_large_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 03c62f83 lw t6,60(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 9abce5b7 lui a1,0x9abce\n" - " 1c: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" - " 20: 12345637 lui a2,0x12345\n" - " 24: 67860613 addi a2,a2,1656 # 0x12345678\n" - " 28: 000f80e7 jalr t6\n" - " 2c: 00050f93 mv t6,a0\n" - " 30: 00012083 lw ra,0(sp)\n" - " 34: 00412503 lw a0,4(sp)\n" - " 38: 00812583 lw a1,8(sp)\n" - " 3c: 00c12603 lw a2,12(sp)\n" - " 40: 01010113 addi sp,sp,16\n" - " 44: 04c62f03 lw t5,76(a2)\n" - " 48: 04800613 li a2,72\n" - " 4c: 28b00693 li a3,651\n" - " 50: 000f8713 mv a4,t6\n" - " 54: 000f0067 jr t5" + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9abce5b7 lui a1,0x9abce\n" + " 12: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" + " 16: 12345637 lui a2,0x12345\n" + " 1a: 67860613 addi a2,a2,1656 # 0x12345678\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16\n" + " 2c: 04c62f03 lw t5,76(a2)\n" + " 30: 03000613 li a2,48\n" + " 34: 28b00693 li a3,651\n" + " 38: 877e mv a4,t6\n" + " 3a: 8f02 jr t5" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2911,29 +2936,29 @@ call_func_ptr_stack_alignment_test() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: fe010113 addi sp,sp,-32\n" - " 14: 00112023 sw ra,0(sp)\n" - " 18: 00a12223 sw a0,4(sp)\n" - " 1c: 00b12423 sw a1,8(sp)\n" - " 20: 00c12623 sw a2,12(sp)\n" - " 24: 01d12823 sw t4,16(sp)\n" - " 28: 01e12a23 sw t5,20(sp)\n" - " 2c: 01f12c23 sw t6,24(sp)\n" - " 30: 02a00513 li a0,42\n" - " 34: 000e00e7 jalr t3\n" - " 38: 00050e13 mv t3,a0\n" - " 3c: 00012083 lw ra,0(sp)\n" - " 40: 00412503 lw a0,4(sp)\n" - " 44: 00812583 lw a1,8(sp)\n" - " 48: 00c12603 lw a2,12(sp)\n" - " 4c: 01012e83 lw t4,16(sp)\n" - " 50: 01412f03 lw t5,20(sp)\n" - " 54: 01812f83 lw t6,24(sp)\n" - " 58: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 1101 addi sp,sp,-32\n" + " 12: c006 sw ra,0(sp)\n" + " 14: c22a sw a0,4(sp)\n" + " 16: c42e sw a1,8(sp)\n" + " 18: c632 sw a2,12(sp)\n" + " 1a: c876 sw t4,16(sp)\n" + " 1c: ca7a sw t5,20(sp)\n" + " 1e: cc7e sw t6,24(sp)\n" + " 20: 02a00513 li a0,42\n" + " 24: 9e02 jalr t3\n" + " 26: 8e2a mv t3,a0\n" + " 28: 4082 lw ra,0(sp)\n" + " 2a: 4512 lw a0,4(sp)\n" + " 2c: 45a2 lw a1,8(sp)\n" + " 2e: 4632 lw a2,12(sp)\n" + " 30: 4ec2 lw t4,16(sp)\n" + " 32: 4f52 lw t5,20(sp)\n" + " 34: 4fe2 lw t6,24(sp)\n" + " 36: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2965,35 +2990,35 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fe010113 addi sp,sp,-32\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 01c12a23 sw t3,20(sp)\n" - " 34: 01d12c23 sw t4,24(sp)\n" - " 38: 01f12e23 sw t6,28(sp)\n" - " 3c: 00038613 mv a2,t2\n" - " 40: 00300693 li a3,3\n" - " 44: 00100713 li a4,1\n" - " 48: 000f00e7 jalr t5\n" - " 4c: 00050f13 mv t5,a0\n" - " 50: 00012083 lw ra,0(sp)\n" - " 54: 00412503 lw a0,4(sp)\n" - " 58: 00812583 lw a1,8(sp)\n" - " 5c: 00c12603 lw a2,12(sp)\n" - " 60: 01012303 lw t1,16(sp)\n" - " 64: 01412e03 lw t3,20(sp)\n" - " 68: 01812e83 lw t4,24(sp)\n" - " 6c: 01c12f83 lw t6,28(sp)\n" - " 70: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 468d li a3,3\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3006,35 +3031,35 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fe010113 addi sp,sp,-32\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 01c12a23 sw t3,20(sp)\n" - " 34: 01d12c23 sw t4,24(sp)\n" - " 38: 01f12e23 sw t6,28(sp)\n" - " 3c: 00038613 mv a2,t2\n" - " 40: 00100693 li a3,1\n" - " 44: 00030713 mv a4,t1\n" - " 48: 000f00e7 jalr t5\n" - " 4c: 00050f13 mv t5,a0\n" - " 50: 00012083 lw ra,0(sp)\n" - " 54: 00412503 lw a0,4(sp)\n" - " 58: 00812583 lw a1,8(sp)\n" - " 5c: 00c12603 lw a2,12(sp)\n" - " 60: 01012303 lw t1,16(sp)\n" - " 64: 01412e03 lw t3,20(sp)\n" - " 68: 01812e83 lw t4,24(sp)\n" - " 6c: 01c12f83 lw t6,28(sp)\n" - " 70: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 4685 li a3,1\n" + " 2e: 871a mv a4,t1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3047,35 +3072,35 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fe010113 addi sp,sp,-32\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 01c12a23 sw t3,20(sp)\n" - " 34: 01d12c23 sw t4,24(sp)\n" - " 38: 01f12e23 sw t6,28(sp)\n" - " 3c: 00038613 mv a2,t2\n" - " 40: 00030693 mv a3,t1\n" - " 44: 00100713 li a4,1\n" - " 48: 000f00e7 jalr t5\n" - " 4c: 00050f13 mv t5,a0\n" - " 50: 00012083 lw ra,0(sp)\n" - " 54: 00412503 lw a0,4(sp)\n" - " 58: 00812583 lw a1,8(sp)\n" - " 5c: 00c12603 lw a2,12(sp)\n" - " 60: 01012303 lw t1,16(sp)\n" - " 64: 01412e03 lw t3,20(sp)\n" - " 68: 01812e83 lw t4,24(sp)\n" - " 6c: 01c12f83 lw t6,28(sp)\n" - " 70: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 869a mv a3,t1\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t5, ResultReg) @@ -3089,39 +3114,39 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fd010113 addi sp,sp,-48\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 00712a23 sw t2,20(sp)\n" - " 34: 01c12c23 sw t3,24(sp)\n" - " 38: 01d12e23 sw t4,28(sp)\n" - " 3c: 03e12023 sw t5,32(sp)\n" - " 40: 03f12223 sw t6,36(sp)\n" - " 44: 00058313 mv t1,a1\n" - " 48: 000f0513 mv a0,t5\n" - " 4c: 00068593 mv a1,a3\n" - " 50: 000300e7 jalr t1\n" - " 54: 00a12423 sw a0,8(sp)\n" - " 58: 00012083 lw ra,0(sp)\n" - " 5c: 00412503 lw a0,4(sp)\n" - " 60: 00812583 lw a1,8(sp)\n" - " 64: 00c12603 lw a2,12(sp)\n" - " 68: 01012303 lw t1,16(sp)\n" - " 6c: 01412383 lw t2,20(sp)\n" - " 70: 01812e03 lw t3,24(sp)\n" - " 74: 01c12e83 lw t4,28(sp)\n" - " 78: 02012f03 lw t5,32(sp)\n" - " 7c: 02412f83 lw t6,36(sp)\n" - " 80: 03010113 addi sp,sp,48" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07a sw t5,32(sp)\n" + " 2e: d27e sw t6,36(sp)\n" + " 30: 832e mv t1,a1\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: c42a sw a0,8(sp)\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f02 lw t5,32(sp)\n" + " 4c: 5f92 lw t6,36(sp)\n" + " 4e: 03010113 addi sp,sp,48" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3135,37 +3160,37 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fd010113 addi sp,sp,-48\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 00712a23 sw t2,20(sp)\n" - " 34: 01c12c23 sw t3,24(sp)\n" - " 38: 01d12e23 sw t4,28(sp)\n" - " 3c: 03f12023 sw t6,32(sp)\n" - " 40: 00862303 lw t1,8(a2)\n" - " 44: 000f0513 mv a0,t5\n" - " 48: 00068593 mv a1,a3\n" - " 4c: 000300e7 jalr t1\n" - " 50: 00050f13 mv t5,a0\n" - " 54: 00012083 lw ra,0(sp)\n" - " 58: 00412503 lw a0,4(sp)\n" - " 5c: 00812583 lw a1,8(sp)\n" - " 60: 00c12603 lw a2,12(sp)\n" - " 64: 01012303 lw t1,16(sp)\n" - " 68: 01412383 lw t2,20(sp)\n" - " 6c: 01812e03 lw t3,24(sp)\n" - " 70: 01c12e83 lw t4,28(sp)\n" - " 74: 02012f83 lw t6,32(sp)\n" - " 78: 03010113 addi sp,sp,48" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07e sw t6,32(sp)\n" + " 2e: 00862303 lw t1,8(a2)\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: 8f2a mv t5,a0\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f82 lw t6,32(sp)\n" + " 4c: 03010113 addi sp,sp,48" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -3180,12 +3205,12 @@ jump_to_continuation_test_() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}), Stream = ?BACKEND:stream(State1), - % Expected: riscv32 PIC sequence - simpler than ARM, no prolog/epilog needed + % Expected: riscv32 PIC sequence Dump = << - " 0: 00000f97 auipc t6,0x0\n" - " 4: 00af8fb3 add t6,t6,a0\n" - " 8: 000f8067 jr t6" + " 0: 00000f97 auipc t6,0x0\n" + " 4: 9faa add t6,t6,a0\n" + " 6: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3200,18 +3225,18 @@ jump_to_continuation_test_() -> % NetOffset = 0 - 32 = -32 (0xFFFFFFE0) Dump = << - " 0: 00000697 auipc a3,0x0\n" - " 4: 00068067 jr a3\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 00068067 jr a3\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 00068067 jr a3\n" - " 18: 00000697 auipc a3,0x0\n" - " 1c: 00068067 jr a3\n" - " 20: 00000f97 auipc t6,0x0\n" - " 24: fe0f8f93 addi t6,t6,-32\n" - " 28: 00af8fb3 add t6,t6,a0\n" - " 2c: 000f8067 jr t6" + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 8: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 14: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 20: 00000f97 auipc t6,0x0\n" + " 24: 1f81 addi t6,t6,-32 # 0x0\n" + " 26: 9faa add t6,t6,a0\n" + " 28: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -3246,88 +3271,92 @@ add_beam_test() -> Dump = << % jump table (new 8-byte format) - " 0: 00000697 auipc a3,0x0\n" - " 4: 10c68067 jr 268(a3) # 0x10c\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 01868067 jr 24(a3) # 0x20\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 04c68067 jr 76(a3) # 0x5c\n" - " 18: 00000697 auipc a3,0x0\n" - " 1c: 0ec68067 jr 236(a3) # 0x104\n" + " 0: 00000697 auipc a3,0x0\n" + " 4: 0e068067 jr 224(a3) # 0xe0\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01868067 jr 24(a3) # 0x20\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 04868067 jr 72(a3) # 0x58\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 0c268067 jr 194(a3) # 0xda\n" % label 1 % {move,{integer,9},{x,1}}. - " 20: 09f00f93 li t6,159\n" - " 24: 01f52e23 sw t6,28(a0)\n" + " 20: 09f00f93 li t6,159\n" + " 24: 01f52e23 sw t6,28(a0)\n" % {move,{integer,8},{x,0}} - " 28: 08f00f93 li t6,143\n" - " 2c: 01f52c23 sw t6,24(a0)\n" + " 28: 08f00f93 li t6,143\n" + " 2c: 01f52c23 sw t6,24(a0)\n" % {call_only,2,{f,2}}. - " 30: 0085af83 lw t6,8(a1)\n" - " 34: ffff8f93 addi t6,t6,-1\n" - " 38: 01f5a423 sw t6,8(a1)\n" - " 3c: 000f8663 beqz t6,0x48\n" - " 40: 01c0006f j 0x5c\n" - " 44: 00000013 nop\n" - " 48: 00000f97 auipc t6,0x0\n" - " 4c: 014f8f93 addi t6,t6,20 # 0x5c\n" - " 50: 01f5a223 sw t6,4(a1)\n" - " 54: 00862f83 lw t6,8(a2)\n" - " 58: 000f8067 jr t6\n" + " 30: 0085af83 lw t6,8(a1)\n" + " 34: 1ffd addi t6,t6,-1\n" + " 36: 01f5a423 sw t6,8(a1)\n" + " 3a: 000f8663 beqz t6,0x46\n" + " 3e: a829 j 0x58\n" + " 40: 0001 nop\n" + " 42: 00000013 nop\n" + " 46: 00000f97 auipc t6,0x0\n" + " 4a: 0fd1 addi t6,t6,20 # 0x5a\n" + " 4c: 0001 nop\n" + " 4e: 01f5a223 sw t6,4(a1)\n" + " 52: 00862f83 lw t6,8(a2)\n" + " 56: 8f82 jr t6\n" % label 2 % {allocate,1,1}. - " 5c: 01462f83 lw t6,20(a2)\n" - " 60: ff010113 addi sp,sp,-16\n" - " 64: 00112023 sw ra,0(sp)\n" - " 68: 00a12223 sw a0,4(sp)\n" - " 6c: 00b12423 sw a1,8(sp)\n" - " 70: 00c12623 sw a2,12(sp)\n" - " 74: 00100613 li a2,1\n" - " 78: 00000693 li a3,0\n" - " 7c: 00100713 li a4,1\n" - " 80: 000f80e7 jalr t6\n" - " 84: 00050f93 mv t6,a0\n" - " 88: 00012083 lw ra,0(sp)\n" - " 8c: 00412503 lw a0,4(sp)\n" - " 90: 00812583 lw a1,8(sp)\n" - " 94: 00c12603 lw a2,12(sp)\n" - " 98: 01010113 addi sp,sp,16\n" - " 9c: 01ff9f13 slli t5,t6,0x1f\n" - " a0: 000f4863 bltz t5,0xb0\n" - " a4: 01862f83 lw t6,24(a2)\n" - " a8: 0a800613 li a2,168\n" - " ac: 000f8067 jr t6\n" + " 58: 01462f83 lw t6,20(a2)\n" + " 5c: 1141 addi sp,sp,-16\n" + " 5e: c006 sw ra,0(sp)\n" + " 60: c22a sw a0,4(sp)\n" + " 62: c42e sw a1,8(sp)\n" + " 64: c632 sw a2,12(sp)\n" + " 66: 4605 li a2,1\n" + " 68: 4681 li a3,0\n" + " 6a: 4705 li a4,1\n" + " 6c: 9f82 jalr t6\n" + " 6e: 8faa mv t6,a0\n" + " 70: 4082 lw ra,0(sp)\n" + " 72: 4512 lw a0,4(sp)\n" + " 74: 45a2 lw a1,8(sp)\n" + " 76: 4632 lw a2,12(sp)\n" + " 78: 0141 addi sp,sp,16\n" + " 7a: 01ff9f13 slli t5,t6,0x1f\n" + " 7e: 000f4763 bltz t5,0x8c\n" + " 82: 01862f83 lw t6,24(a2)\n" + " 86: 08600613 li a2,134\n" + " 8a: 8f82 jr t6\n" % {init_yregs,{list,[{y,0}]}}. %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), - " b0: 03b00f13 li t5,59\n" - " b4: 01452f83 lw t6,20(a0)\n" - " b8: 01efa023 sw t5,0(t6)\n" + " 8c: 03b00f13 li t5,59\n" + " 90: 01452f83 lw t6,20(a0)\n" + " 94: 01efa023 sw t5,0(t6)\n" % {call,1,{f,3}} %% call_or_schedule_next(State9, 3), - " bc: 0005af03 lw t5,0(a1)\n" - " c0: 000f2f03 lw t5,0(t5)\n" - " c4: 018f1f13 slli t5,t5,0x18\n" - " c8: 41000f93 li t6,1040\n" - " cc: 00000013 nop\n" - " d0: 01ff6f33 or t5,t5,t6\n" - " d4: 05e52e23 sw t5,92(a0)\n" - " d8: 0085af83 lw t6,8(a1)\n" - " dc: ffff8f93 addi t6,t6,-1\n" - " e0: 01f5a423 sw t6,8(a1)\n" - " e4: 000f8663 beqz t6,0xf0\n" - " e8: 01c0006f j 0x104\n" - " ec: 00000013 nop\n" - " f0: 00000f97 auipc t6,0x0\n" - " f4: 014f8f93 addi t6,t6,20 # 0x104\n" - " f8: 01f5a223 sw t6,4(a1)\n" - " fc: 00862f83 lw t6,8(a2)\n" - " 100: 000f8067 jr t6\n" + " 98: 0005af03 lw t5,0(a1)\n" + " 9c: 000f2f03 lw t5,0(t5)\n" + " a0: 0f62 slli t5,t5,0x18\n" + " a2: 36800f93 li t6,872\n" + " a6: 00000013 nop\n" + " aa: 01ff6f33 or t5,t5,t6\n" + " ae: 05e52e23 sw t5,92(a0)\n" + " b2: 0085af83 lw t6,8(a1)\n" + " b6: 1ffd addi t6,t6,-1\n" + " b8: 01f5a423 sw t6,8(a1)\n" + " bc: 000f8663 beqz t6,0xc8\n" + " c0: a829 j 0xda\n" + " c2: 0001 nop\n" + " c4: 00000013 nop\n" + " c8: 00000f97 auipc t6,0x0\n" + " cc: 0fd1 addi t6,t6,20 # 0xdc\n" + " ce: 0001 nop\n" + " d0: 01f5a223 sw t6,4(a1)\n" + " d4: 00862f83 lw t6,8(a2)\n" + " d8: 8f82 jr t6\n" %% (continuation) % label 3 - " 104: 00462f83 lw t6,4(a2)\n" - " 108: 000f8067 jr t6\n" + " da: 00462f83 lw t6,4(a2)\n" + " de: 8f82 jr t6\n" % label 0 - " 10c: 00462f83 lw t6,4(a2)\n" - " 110: 000f8067 jr t6\n" + " e0: 00462f83 lw t6,4(a2)\n" + " e4: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -3346,15 +3375,15 @@ dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> dump_to_bin0(Tail, addr, Acc); dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> dump_to_bin0(Tail, addr, Acc); -dump_to_bin0(<<$ , Tail/binary>>, addr, Acc) -> +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> dump_to_bin0(Tail, addr, Acc); dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> dump_to_bin0(Tail, hex, Acc); -dump_to_bin0(<<$ , Tail/binary>>, hex, Acc) -> +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> dump_to_bin0(Tail, hex, Acc); %% Handle RISC-V 32-bit instructions (8 consecutive hex digits) dump_to_bin0(<>, hex, Acc) when - (Sp =:= $ orelse Sp =:= $\s) andalso + (Sp =:= $\t orelse Sp =:= $\s) andalso ?IS_HEX_DIGIT(H1) andalso ?IS_HEX_DIGIT(H2) andalso ?IS_HEX_DIGIT(H3) andalso @@ -3369,7 +3398,7 @@ dump_to_bin0(<>, hex, Acc) when dump_to_bin0(Rest, instr, [<> | Acc]); %% Handle 32-bits undefined instruction (ARM format with space: "1234 5678") dump_to_bin0(<>, hex, Acc) when - (Sp =:= $ orelse Sp =:= $\s) andalso + (Sp =:= $\t orelse Sp =:= $\s) andalso ?IS_HEX_DIGIT(H1) andalso ?IS_HEX_DIGIT(H2) andalso ?IS_HEX_DIGIT(H3) andalso @@ -3384,7 +3413,7 @@ dump_to_bin0(<>, hex, Acc) dump_to_bin0(Rest, instr, [<>, <> | Acc]); %% Handle 16-bit ARM32 Thumb instructions (4 hex digits) dump_to_bin0(<>, hex, Acc) when - (Sp =:= $ orelse Sp =:= $\s) andalso + (Sp =:= $\t orelse Sp =:= $\s) andalso ?IS_HEX_DIGIT(H1) andalso ?IS_HEX_DIGIT(H2) andalso ?IS_HEX_DIGIT(H3) andalso diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index 7117ee5f69..cf989e746d 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -78,6 +78,7 @@ find_binutils(Arch) -> ArchStr = atom_to_list(Arch), BinutilsList = [ {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"}, + {ArchStr ++ "-unknown-elf-as", ArchStr ++ "-unknown-elf-objdump"}, {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"}, {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"}, {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"} @@ -118,7 +119,7 @@ get_as_flags(aarch64) -> get_as_flags(x86_64) -> "--64"; get_as_flags(riscv32) -> - "-march=rv32ima". + "-march=rv32imac". %% Parse objdump output lines and extract binary data -spec asm_lines([binary()], binary(), atom()) -> binary(). From 59723dcbeaf4700b5e0803b36ddcf0c2df72def6 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 7 Sep 2025 21:19:56 +0200 Subject: [PATCH 94/97] JIT: Add DWARF support Signed-off-by: Paul Guyot --- CMakeLists.txt | 1 + CMakeModules/BuildErlang.cmake | 2 +- libs/jit/src/CMakeLists.txt | 9 +- libs/jit/src/compact_term.hrl | 52 + libs/jit/src/jit.erl | 425 ++++--- libs/jit/src/jit_aarch64.erl | 13 + libs/jit/src/jit_armv6m.erl | 13 + libs/jit/src/jit_backend_dwarf_impl.hrl | 39 + libs/jit/src/jit_dwarf.erl | 1496 +++++++++++++++++++++++ libs/jit/src/jit_precompile.erl | 192 ++- libs/jit/src/jit_x86_64.erl | 13 + src/libAtomVM/CMakeLists.txt | 3 + src/libAtomVM/jit.c | 415 +++++++ src/libAtomVM/jit.h | 28 + src/libAtomVM/module.c | 12 + tests/erlang_tests/CMakeLists.txt | 1 + tests/libs/jit/CMakeLists.txt | 7 +- tests/libs/jit/jit_dwarf_tests.erl | 276 +++++ tests/libs/jit/jit_tests.erl | 38 +- tests/libs/jit/tests.erl | 1 + 20 files changed, 2850 insertions(+), 186 deletions(-) create mode 100644 libs/jit/src/compact_term.hrl create mode 100644 libs/jit/src/jit_backend_dwarf_impl.hrl create mode 100644 libs/jit/src/jit_dwarf.erl create mode 100644 tests/libs/jit/jit_dwarf_tests.erl diff --git a/CMakeLists.txt b/CMakeLists.txt index 307917422f..875a16349b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ option(AVM_DISABLE_SMP "Disable SMP." OFF) option(AVM_DISABLE_TASK_DRIVER "Disable task driver support." OFF) option(AVM_DISABLE_JIT "Disable just in time compilation." ON) option(AVM_ENABLE_PRECOMPILED "Enable execution of precompiled code, even if JIT is disabled." OFF) +option(AVM_DISABLE_JIT_DWARF "Disable DWARF debug and profiling info for JIT." ON) option(AVM_USE_32BIT_FLOAT "Use 32 bit floats." OFF) option(AVM_VERBOSE_ABORT "Print module and line number on VM abort" OFF) option(AVM_RELEASE "Build an AtomVM release" OFF) diff --git a/CMakeModules/BuildErlang.cmake b/CMakeModules/BuildErlang.cmake index 2bc6754d87..986a2900a1 100644 --- a/CMakeModules/BuildErlang.cmake +++ b/CMakeModules/BuildErlang.cmake @@ -22,7 +22,6 @@ macro(pack_archive avm_name) set(multiValueArgs ERLC_FLAGS MODULES) cmake_parse_arguments(PACK_ARCHIVE "" "" "${multiValueArgs}" ${ARGN}) - list(JOIN PACK_ARCHIVE_ERLC_FLAGS " " PACK_ARCHIVE_ERLC_FLAGS) foreach(module_name IN LISTS ${PACK_ARCHIVE_MODULES} PACK_ARCHIVE_MODULES PACK_ARCHIVE_UNPARSED_ARGUMENTS) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam @@ -83,6 +82,7 @@ macro(pack_precompiled_archive avm_name) string(REGEX REPLACE "\\+.*$" "" jit_target_arch "${jit_target_arch_variant}") set(jit_compiler_modules ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}.beam diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index ae62643c30..df155f9d0d 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -24,6 +24,7 @@ include(BuildErlang) set(ERLANG_MODULES jit + jit_dwarf jit_precompile jit_stream_binary jit_stream_mmap @@ -37,7 +38,13 @@ set(ERLANG_MODULES jit_x86_64_asm ) -pack_precompiled_archive(jit ${ERLANG_MODULES}) +if (NOT AVM_DISABLE_JIT_DWARF) + set(erlc_flags -DJIT_DWARF) +else() + set(erlc_flags) +endif() + +pack_precompiled_archive(jit ERLC_FLAGS ${erlc_flags} MODULES ${ERLANG_MODULES}) include(../../../version.cmake) diff --git a/libs/jit/src/compact_term.hrl b/libs/jit/src/compact_term.hrl new file mode 100644 index 0000000000..3739b4404d --- /dev/null +++ b/libs/jit/src/compact_term.hrl @@ -0,0 +1,52 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + + +-define(COMPACT_LITERAL, 0). +-define(COMPACT_INTEGER, 1). +-define(COMPACT_ATOM, 2). +-define(COMPACT_XREG, 3). +-define(COMPACT_YREG, 4). +-define(COMPACT_LABEL, 5). +-define(COMPACT_EXTENDED, 7). +-define(COMPACT_LARGE_LITERAL, 8). +-define(COMPACT_LARGE_INTEGER, 9). +-define(COMPACT_LARGE_ATOM, 10). +-define(COMPACT_LARGE_XREG, 11). +-define(COMPACT_LARGE_YREG, 12). + +% OTP-20+ format +-define(COMPACT_EXTENDED_LIST, 16#17). +-define(COMPACT_EXTENDED_FP_REGISTER, 16#27). +-define(COMPACT_EXTENDED_ALLOCATION_LIST, 16#37). +-define(COMPACT_EXTENDED_LITERAL, 16#47). +% https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_asm.erl#L433 +-define(COMPACT_EXTENDED_TYPED_REGISTER, 16#57). + +-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_WORDS, 0). +-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FLOATS, 1). +-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FUNS, 2). + +-define(COMPACT_LARGE_IMM_MASK, 16#18). +-define(COMPACT_11BITS_VALUE, 16#8). +-define(COMPACT_NBITS_VALUE, 16#18). + +-define(COMPACT_LARGE_INTEGER_11BITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_11BITS_VALUE)). +-define(COMPACT_LARGE_INTEGER_NBITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_NBITS_VALUE)). diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 71d38c615f..83cb0c0d8d 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -24,7 +24,8 @@ stream/1, backend/1, beam_chunk_header/3, - compile/6 + compile/6, + decode_value64/1 ]). % NIFs @@ -46,38 +47,7 @@ -include("opcodes.hrl"). -include("primitives.hrl"). -include("term.hrl"). - --define(COMPACT_LITERAL, 0). --define(COMPACT_INTEGER, 1). --define(COMPACT_ATOM, 2). --define(COMPACT_XREG, 3). --define(COMPACT_YREG, 4). --define(COMPACT_LABEL, 5). --define(COMPACT_EXTENDED, 7). --define(COMPACT_LARGE_LITERAL, 8). --define(COMPACT_LARGE_INTEGER, 9). --define(COMPACT_LARGE_ATOM, 10). --define(COMPACT_LARGE_XREG, 11). --define(COMPACT_LARGE_YREG, 12). - -% OTP-20+ format --define(COMPACT_EXTENDED_LIST, 16#17). --define(COMPACT_EXTENDED_FP_REGISTER, 16#27). --define(COMPACT_EXTENDED_ALLOCATION_LIST, 16#37). --define(COMPACT_EXTENDED_LITERAL, 16#47). -% https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_asm.erl#L433 --define(COMPACT_EXTENDED_TYPED_REGISTER, 16#57). - --define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_WORDS, 0). --define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FLOATS, 1). --define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FUNS, 2). - --define(COMPACT_LARGE_IMM_MASK, 16#18). --define(COMPACT_11BITS_VALUE, 16#8). --define(COMPACT_NBITS_VALUE, 16#18). - --define(COMPACT_LARGE_INTEGER_11BITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_11BITS_VALUE)). --define(COMPACT_LARGE_INTEGER_NBITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_NBITS_VALUE)). +-include("compact_term.hrl"). -define(BOXED_FUN_SIZE, 3). -define(FLOAT_SIZE_64, 2). @@ -114,6 +84,20 @@ -define(ASSERT_ALL_NATIVE_FREE(St), ok). -define(ASSERT(Expr), ok). +-ifdef(JIT_DWARF). +-define(DWARF_OPCODE(MMod, MSt, Opcode), MMod:dwarf_opcode(MSt, Opcode)). +-define(DWARF_LABEL(MMod, MSt, Label), MMod:dwarf_label(MSt, Label)). +-define(DWARF_FUNCTION(MMod, MSt, FunctionName, Arity), + MMod:dwarf_function(MSt, (State0#state.atom_resolver)(FunctionName), Arity) +). +-define(DWARF_LINE(MMod, MSt, Line), MMod:dwarf_line(MSt, Line)). +-else. +-define(DWARF_OPCODE(_MMod, MSt, _Opcode), MSt). +-define(DWARF_LABEL(MMod, MSt, _Label), MSt). +-define(DWARF_FUNCTION(_MMod, MSt, _FunctionName, _Arity), MSt). +-define(DWARF_LINE(_MMod, MSt, _Line), MSt). +-endif. + %%----------------------------------------------------------------------------- %% @param LabelsCount number of labels %% @param Arch code for the architecture @@ -162,17 +146,17 @@ compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _MMod, _MSt) error(badarg, [CodeChunk]). % 1 -first_pass( - <>, MMod, MSt0, State0 -) -> +first_pass(<>, MMod, MSt, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_literal(Rest0), ?TRACE("OP_LABEL ~p\n", [Label]), + MSt0 = ?DWARF_LABEL(MMod, MSt, Label), MSt1 = MMod:add_label(MSt0, Label), ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest1, MMod, MSt1, State0); % 2 -first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"func_info/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_ModuleAtom, Rest1} = decode_atom(Rest0), {_FunctionName, Rest2} = decode_atom(Rest1), @@ -194,12 +178,14 @@ first_pass(<>, MMod, MSt0, #state{tail_cache = TC} MSt3 = MMod:free_native_registers(MSt2, [OffsetReg]), State0 end, - ?ASSERT_ALL_NATIVE_FREE(MSt3), - first_pass(Rest3, MMod, MSt3, State1); + MSt4 = ?DWARF_FUNCTION(MMod, MSt3, _FunctionName, _Arity), + ?ASSERT_ALL_NATIVE_FREE(MSt4), + first_pass(Rest3, MMod, MSt4, State1); % 3 first_pass( - <>, MMod, MSt0, #state{labels_count = LabelsCount} = State + <>, MMod, MSt, #state{labels_count = LabelsCount} = State ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"int_call_end/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_INT_CALL_END\n", []), MSt1 = MMod:add_label(MSt0, LabelsCount), @@ -208,7 +194,8 @@ first_pass( ]), {State, MSt2}; % 4 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), @@ -217,7 +204,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State0); % 5 -first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_last/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), @@ -250,7 +238,8 @@ first_pass(<>, MMod, MSt0, #state{tail_cache = TC} ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest3, MMod, MSt3, State1); % 6 -first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_only/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), @@ -268,7 +257,8 @@ first_pass(<>, MMod, MSt0, #state{tail_cache = TC} ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State1); % 7 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {Index, Rest2} = decode_literal(Rest1), @@ -280,7 +270,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 8 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext_last/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {Index, Rest2} = decode_literal(Rest1), @@ -293,7 +284,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest3, MMod, MSt2, State0); % 9 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif0/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Bif, Rest1} = decode_literal(Rest0), {MSt1, FuncPtr} = MMod:call_primitive(MSt0, ?PRIM_GET_IMPORTED_BIF, [ @@ -309,7 +301,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest2, MMod, MSt5, State0); % 10 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif1/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Bif, Rest2} = decode_literal(Rest1), @@ -326,7 +319,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 11 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif2/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Bif, Rest2} = decode_literal(Rest1), @@ -344,7 +338,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 12 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"allocate/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {StackNeed, Rest1} = decode_literal(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -356,7 +351,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 13 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"allocate_heap/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {StackNeed, Rest1} = decode_literal(Rest0), {HeapNeed, Rest2} = decode_allocator_list(MMod, Rest1), @@ -369,7 +365,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest3, MMod, MSt2, State0); % 16 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"test_heap/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {HeapNeed, Rest1} = decode_allocator_list(MMod, Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -381,7 +378,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 18 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"deallocate/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {NWords, Rest1} = decode_literal(Rest0), ?TRACE("OP_DEALLOCATE ~p\n", [NWords]), @@ -392,7 +390,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 19 -first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"return/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_RETURN\n", []), % Optimized return: check if returning within same module @@ -428,7 +427,8 @@ first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = St ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest, MMod, MSt6, State1); % 20 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"send/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_SEND\n", []), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_SEND, [ @@ -438,7 +438,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest, MMod, MSt2, State0); % 21 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"remove_message/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_REMOVE_MESSAGE\n", []), {MSt1, Reg1} = MMod:call_primitive(MSt0, ?PRIM_CANCEL_TIMEOUT, [ @@ -456,7 +457,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest, MMod, MSt6, State0); % 22 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"timeout/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_TIMEOUT\n", []), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_TIMEOUT, [ @@ -466,7 +468,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest0, MMod, MSt2, State0); % 23 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"loop_rec/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ @@ -482,7 +485,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest2, MMod, MSt7, State0); % 24 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"loop_rec_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_LOOP_REC_END ~p\n", [Label]), @@ -498,7 +502,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest1, MMod, MSt5, State0); % 25 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"wait/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_WAIT ~p\n", [Label]), @@ -507,7 +512,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 26 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"wait_timeout/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, OffsetRef0} = MMod:set_continuation_to_offset(MSt0), @@ -533,7 +539,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt9), first_pass(Rest2, MMod, MSt9, State0); % 39 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_lt/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -549,7 +556,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 40 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ge/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -565,7 +573,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 41 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_eq/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -584,7 +593,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 42 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ne/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -598,7 +608,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 43 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_eq_exact/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -627,7 +638,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 44 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ne_exact/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -652,7 +664,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 45 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_integer/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -663,7 +676,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 46 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_float/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -672,7 +686,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 47 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_number/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -705,7 +720,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 48 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_atom/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -717,7 +733,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 49 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_pid/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -728,7 +745,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 50 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_reference/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -750,7 +768,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 51 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_port/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -761,7 +780,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 52 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_nil/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -772,7 +792,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 53 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_binary/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -782,7 +803,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 55 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_list/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -800,7 +822,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 56 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_nonempty_list/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -812,7 +835,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 57 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_tuple/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -821,7 +845,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 58 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"test_arity/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -835,7 +860,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 59 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"select_val/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {DefaultLabel, Rest2} = decode_label(Rest1), @@ -865,7 +891,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 60 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"select_tuple_arity/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {DefaultLabel, Rest2} = decode_label(Rest1), @@ -889,7 +916,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 61 -first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"jump/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_JUMP ~p\n", [Label]), @@ -907,7 +935,8 @@ first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = Sta end; % 62 % Same implementation as OP_TRY, to confirm. -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"catch/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), {Label, Rest2} = decode_label(Rest1), @@ -916,7 +945,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 63 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"catch_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_CATCH_END ~p\n", [Dest]), @@ -927,7 +957,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest1, MMod, MSt5, State0); % 64 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"move/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Source, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -937,7 +968,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 65 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_list/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, List, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, HeadDest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -953,7 +985,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt10), first_pass(Rest3, MMod, MSt10, State0); % 66 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_tuple_element/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Source, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {Element, Rest2} = decode_literal(Rest1), @@ -966,7 +999,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 67 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"set_tuple_element/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, NewElement, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Tuple, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0), @@ -979,7 +1013,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 69 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_list/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Head, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Tail, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0), @@ -994,7 +1029,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest3, MMod, MSt7, State0); % 72 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"badmatch/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_BADMATCH ~p\n", [Arg1]), @@ -1004,7 +1040,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 73 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"if_end/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_IF_END\n", []), MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [ @@ -1013,7 +1050,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest0, MMod, MSt1, State0); % 74 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"case_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_CASE_END ~p\n", [Arg1]), @@ -1023,7 +1061,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 75 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_fun/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {ArgsCount, Rest1} = decode_literal(Rest0), ?TRACE("OP_CALL_FUN ~p\n", [ArgsCount]), @@ -1036,7 +1075,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest1, MMod, MSt4, State0); % 77 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_function/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1045,7 +1085,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 78 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext_only/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {Index, Rest2} = decode_literal(Rest1), @@ -1055,7 +1096,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 96 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmove/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FPRegIndex, Rest1} = decode_literal(Rest0), {MSt1, Dest, Rest2} = decode_dest(Rest1, MMod, MSt0), @@ -1065,7 +1107,8 @@ first_pass(<>, MMod, MSt MSt4 = MMod:free_native_registers(MSt3, [ResultReg, Dest]), ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmove/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {FPReg, Rest2} = decode_fp_register(Rest1), @@ -1078,7 +1121,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State0); % 97 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fconv/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Reg} = MMod:move_to_native_register(MSt1, SrcValue), @@ -1099,23 +1143,28 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 98 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fadd/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FADD, Rest0, MMod, MSt0, State0); % 99 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fsub/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FSUB, Rest0, MMod, MSt0, State0); % 100 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmul/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FMUL, Rest0, MMod, MSt0, State0); % 101 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fdiv/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FDIV, Rest0, MMod, MSt0, State0); % 102 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fnegate/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Label, Rest1} = decode_label(Rest0), {{fp_reg, FPRegIndex1}, Rest2} = decode_fp_register(Rest1), @@ -1128,7 +1177,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest3, MMod, MSt2, State0); % 104 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), {Label, Rest2} = decode_label(Rest1), @@ -1137,7 +1187,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 105 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_TRY_END ~p\n", [Dest]), @@ -1146,7 +1197,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest1, MMod, MSt3, State0); % 106 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_case/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_TRY_CASE ~p\n", [Dest]), @@ -1155,7 +1207,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest1, MMod, MSt3, State0); % 107 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_case_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_TRY_CASE_END ~p\n", [Arg1]), @@ -1165,7 +1218,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 108 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"raise/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Stacktrace, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, ExcValue, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0), @@ -1176,7 +1230,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 112 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"apply/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {MSt1, Module} = read_any_xreg(Arity, MMod, MSt0), @@ -1191,7 +1246,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest1, MMod, MSt6, State0); % 113 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"apply_last/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {NWords, Rest2} = decode_literal(Rest1), @@ -1209,7 +1265,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 114 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_boolean/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1222,7 +1279,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 115 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_function2/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1253,7 +1311,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest3, MMod, MSt8, State0); % 117 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_integer2/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1291,7 +1350,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt18), first_pass(Rest7, MMod, MSt18, State0); % 118 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_float2/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1328,7 +1388,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt17), first_pass(Rest7, MMod, MSt17, State0); % 119 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_binary2/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1416,7 +1477,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt28), first_pass(Rest7, MMod, MSt28, State0); % 120 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_bits2/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1446,7 +1508,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt15), first_pass(Rest5, MMod, MSt15, State0); % 121 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_test_tail2/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1464,7 +1527,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt10), first_pass(Rest3, MMod, MSt10, State0); % 124 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif1/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -1489,7 +1553,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest5, MMod, MSt7, State0); % 125 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif2/6">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -1515,7 +1580,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest6, MMod, MSt8, State0); % 129 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_bitstr/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1541,7 +1607,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 132 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_match_string/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1561,7 +1628,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt9), first_pass(Rest4, MMod, MSt9, State0); % 133 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_init_writable/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_BS_INIT_WRITABLE\n", []), HeapSize = term_binary_heap_size(0, MMod), @@ -1578,7 +1646,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest0, MMod, MSt6, State0); % 136 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"trim/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {NWords, Rest1} = decode_literal(Rest0), {_NRemaining, Rest2} = decode_literal(Rest1), @@ -1587,7 +1656,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State0); % 138 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf8/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1602,7 +1672,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 139 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf8/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1614,7 +1685,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 140 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf16/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1631,7 +1703,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 141 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf16/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1645,7 +1718,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 142 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf32/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1662,7 +1736,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 143 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf32/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1676,7 +1751,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 152 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif3/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -1711,12 +1787,14 @@ first_pass( ) -> {Line, Rest1} = decode_literal(Rest0), ?TRACE("OP_LINE ~p\n", [Line]), - Offset = MMod:offset(MSt), - first_pass(Rest1, MMod, MSt, State0#state{ + MSt0 = ?DWARF_LINE(MMod, MSt, Line), + Offset = MMod:offset(MSt0), + first_pass(Rest1, MMod, MSt0, State0#state{ line_offsets = [{Line, Offset} | AccLines] }); % 154 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_map_assoc/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1797,7 +1875,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt18), first_pass(Rest6, MMod, MSt18, State0); % 155 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_map_exact/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1871,7 +1950,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt14), first_pass(Rest6, MMod, MSt14, State0); % 156 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_map/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1880,7 +1960,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 157 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"has_map_fields/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1926,7 +2007,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest5, MMod, MSt7, State0); % 158 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_map_elements/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1983,8 +2065,9 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(Rest6, MMod, MSt14, State0); % 159 first_pass( - <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 + <>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0 ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_tagged_tuple/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -2019,7 +2102,8 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt14), first_pass(Rest4, MMod, MSt14, State0); % 160 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"build_stacktrace/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_STACKTRACE_BUILD, [ctx]), MSt2 = MMod:move_to_vm_register(MSt1, ResultReg, {x_reg, 0}), @@ -2027,7 +2111,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest0, MMod, MSt3, State0); % 161 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"raw_raise/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, ExClassReg} = MMod:move_to_native_register(MSt0, {x_reg, 0}), MSt2 = MMod:if_block(MSt1, {ExClassReg, '==', ?ERROR_ATOM}, fun(BSt0) -> @@ -2043,7 +2128,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest0, MMod, MSt5, State0); % 162 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_hd/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), @@ -2055,7 +2141,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 163 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_tl/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), @@ -2067,7 +2154,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 164 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_tuple2/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), {ListSize, Rest2} = decode_extended_list_header(Rest1), @@ -2092,7 +2180,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest3, MMod, MSt7, State0); % 165 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_tail/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2115,7 +2204,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt14), first_pass(Rest3, MMod, MSt14, State0); % 166 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_start_match3/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -2128,7 +2218,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 167 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_position/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Src, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2144,7 +2235,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt9), first_pass(Rest3, MMod, MSt9, State0); % 168 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_set_position/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Pos, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt1, State0), @@ -2156,7 +2248,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State0); % 169 -first_pass(<>, MMod, MSt0, State) -> +first_pass(<>, MMod, MSt, State) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"swap/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, ArgA, Rest1} = decode_dest(Rest0, MMod, MSt0), {MSt2, ArgB, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2168,7 +2261,8 @@ first_pass(<>, MMod, MSt0, State) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State); % 170 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_start_match4/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_atom_or_label(Rest0, State0), {Live, Rest2} = decode_literal(Rest1), @@ -2189,7 +2283,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 171 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"make_fun3/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FunIndex, Rest1} = decode_literal(Rest0), {MSt1, Dest, Rest2} = decode_dest(Rest1, MMod, MSt0), @@ -2217,7 +2312,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest4, MMod, MSt7, State0); % 172 -first_pass(<>, MMod, MSt0, State) -> +first_pass(<>, MMod, MSt, State) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"init_yregs/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {ListSize, Rest1} = decode_extended_list_header(Rest0), ?TRACE("OP_INIT_YREGS ~p\n", [ListSize]), @@ -2234,7 +2330,8 @@ first_pass(<>, MMod, MSt0, State) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State); % 173 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_bind/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0), {MSt2, RegB, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2243,7 +2340,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 174 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_clear/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_RECV_MARKER_CLEAR ~p\n", [RegA]), @@ -2251,7 +2349,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 175 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_reserve/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_RECV_MARKER_RESERVE ~p\n", [Dest]), @@ -2261,7 +2360,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest1, MMod, MSt3, State0); % 176 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_use/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_RECV_MARKER_USE ~p\n", [RegA]), @@ -2270,8 +2370,9 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(Rest1, MMod, MSt2, State0); % 177 first_pass( - <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 + <>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0 ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_create_bin/6">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {Alloc, Rest2} = decode_allocator_list(MMod, Rest1), @@ -2403,7 +2504,8 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt19), first_pass(Rest7, MMod, MSt19, State1); % 178 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_fun2/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Tag, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {ArgsCount, Rest2} = decode_literal(Rest1), @@ -2419,7 +2521,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 180 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"badrecord/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_BADRECORD ~p\n", [Arg1]), @@ -2430,8 +2533,9 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(Rest1, MMod, MSt2, State0); % 181 first_pass( - <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 + <>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0 ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"update_record/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {HintAtomIndex, Rest1} = decode_atom(Rest0), Hint = AtomResolver(HintAtomIndex), @@ -2507,7 +2611,8 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt11), first_pass(Rest6, MMod, MSt11, State0); % 182 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_match/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, MatchState, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index be1b62f9b4..a82731c73b 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -72,6 +72,17 @@ add_label/3 ]). +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2 +]). +-endif. + +-compile([warnings_as_errors]). + -include_lib("jit.hrl"). -include("primitives.hrl"). @@ -199,6 +210,8 @@ -define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). -define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]). +-include("jit_backend_dwarf_impl.hrl"). + %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. %% sizeof(uintptr_t) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 81271bf540..921b1be1a8 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -72,6 +72,17 @@ add_label/3 ]). +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2 +]). +-endif. + +-compile([warnings_as_errors]). + -include_lib("jit.hrl"). -include("primitives.hrl"). @@ -216,6 +227,8 @@ -define(PARAMETER_REGS, [r0, r1, r2, r3]). -define(SCRATCH_REGS, [r7, r6, r5, r4, r3, r2, r1, r0, r12]). +-include("jit_backend_dwarf_impl.hrl"). + %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. %% sizeof(uintptr_t) diff --git a/libs/jit/src/jit_backend_dwarf_impl.hrl b/libs/jit/src/jit_backend_dwarf_impl.hrl new file mode 100644 index 0000000000..cfba532531 --- /dev/null +++ b/libs/jit/src/jit_backend_dwarf_impl.hrl @@ -0,0 +1,39 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-ifdef(JIT_DWARF). + +dwarf_opcode(#state{stream = Stream0} = State, OpCode) -> + Stream1 = jit_dwarf:opcode(Stream0, OpCode), + State#state{stream = Stream1}. + +dwarf_label(#state{stream = Stream0} = State, Label) -> + Stream1 = jit_dwarf:label(Stream0, Label), + State#state{stream = Stream1}. + +dwarf_line(#state{stream = Stream0} = State, Line) -> + Stream1 = jit_dwarf:line(Stream0, Line), + State#state{stream = Stream1}. + +dwarf_function(#state{stream = Stream0} = State, FunctionName, Arity) -> + Stream1 = jit_dwarf:function(Stream0, FunctionName, Arity), + State#state{stream = Stream1}. + +-endif. diff --git a/libs/jit/src/jit_dwarf.erl b/libs/jit/src/jit_dwarf.erl new file mode 100644 index 0000000000..10d482d0ee --- /dev/null +++ b/libs/jit/src/jit_dwarf.erl @@ -0,0 +1,1496 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_dwarf). + +-record(dwarf, { + % Backend module (jit_armv6m, etc.) + backend :: module(), + % Current module being compiled + module_name :: module(), + opcodes = [] :: [{Offset :: non_neg_integer(), Opcode :: atom(), Size :: non_neg_integer()}], + labels = [] :: [{Offset :: non_neg_integer(), Label :: non_neg_integer()}], + functions = [] :: [ + {Offset :: non_neg_integer(), FunctionName :: atom(), Arity :: non_neg_integer()} + ], + lines = [] :: [ + {Offset :: non_neg_integer(), Filename :: binary(), LineNumber :: pos_integer()} + ], + stream_module :: module(), + stream :: any(), + line_resolver :: fun((non_neg_integer()) -> false | {ok, binary(), pos_integer()}) +}). + +-type state() :: #dwarf{}. + +-export([ + new/5, + opcode/2, + label/2, + function/3, + line/2, + stream/1, + elf/2 +]). + +% jit_stream interface +-export([ + offset/1, + append/2, + replace/3, + map/4 +]). + +%%----------------------------------------------------------------------------- +%% @returns A new state +%% @doc Create a new state with the proxied stream. +%% @end +%%----------------------------------------------------------------------------- +-spec new(module(), module(), module(), pos_integer(), fun( + (non_neg_integer()) -> false | {ok, binary(), pos_integer()} +)) -> state(). +new(Backend, ModuleName, StreamModule, MaxSize, LineResolver) -> + Stream = StreamModule:new(MaxSize), + #dwarf{ + backend = Backend, + module_name = ModuleName, + stream_module = StreamModule, + stream = Stream, + line_resolver = LineResolver, + % Add jump table symbol at offset 0, size will be calculated + opcodes = [{0, jump_table, 0}] + }. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to get the offset from +%% @returns The current offset +%% @doc Get the current offset in the stream +%% @end +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#dwarf{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @param Stream stream to append to +%% @param Binary binary to append to the stream +%% @returns The updated stream +%% @doc Append a binary to the stream +%% @end +%%----------------------------------------------------------------------------- +-spec append(state(), binary()) -> state(). +append(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Binary) -> + Stream1 = StreamModule:append(Stream0, Binary), + State#dwarf{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to update +%% @param Offset offset to update from +%% @param Replacement binary to write at offset +%% @returns The updated stream +%% @doc Replace bytes at a given offset +%% @end +%%----------------------------------------------------------------------------- +-spec replace(state(), non_neg_integer(), binary()) -> state(). +replace(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Offset, Replacement) -> + Stream1 = StreamModule:replace(Stream0, Offset, Replacement), + State#dwarf{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to update +%% @param Offset offset to update from +%% @param Length length of the section to update +%% @param MapFunction function that updates the binary +%% @returns The updated stream +%% @doc Replace bytes at a given offset calling a map function +%% @end +%%----------------------------------------------------------------------------- +-spec map(state(), non_neg_integer(), pos_integer(), fun((binary()) -> binary())) -> state(). +map(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Offset, Length, MapFunction) -> + Stream1 = StreamModule:map(Stream0, Offset, Length, MapFunction), + State#dwarf{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param Opcode the opcode atom to record +%% @returns The updated state with opcode recorded at current offset +%% @doc Record an opcode at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec opcode(state(), binary()) -> state(); + (any(), binary()) -> any(). +opcode(#dwarf{stream_module = StreamModule, stream = Stream, opcodes = Opcodes0} = State, Opcode) -> + Offset = StreamModule:offset(Stream), + % Update size of previous opcode and add new opcode + Opcodes1 = update_previous_opcode_size(Opcodes0, Offset), + % Size will be calculated later + Opcodes2 = [{Offset, Opcode, 0} | Opcodes1], + State#dwarf{opcodes = Opcodes2}; +opcode(BackendStateDwarfDisabled, _Opcode) -> + BackendStateDwarfDisabled. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param Label the label number to record +%% @returns The updated state with label recorded at current offset +%% @doc Record a label at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec label(state(), non_neg_integer()) -> state(); + (any(), non_neg_integer()) -> any(). +label( + #dwarf{stream_module = StreamModule, stream = Stream, labels = Labels0, opcodes = Opcodes0} = + State, + Label +) -> + Offset = StreamModule:offset(Stream), + % Update size of previous opcode before adding label + Opcodes1 = update_previous_opcode_size(Opcodes0, Offset), + Labels1 = [{Offset, Label} | Labels0], + State#dwarf{labels = Labels1, opcodes = Opcodes1}; +label(BackendStateDwarfDisabled, _Label) -> + BackendStateDwarfDisabled. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param FunctionName the function name atom to record +%% @param Arity the function arity +%% @returns The updated state with function recorded at current offset +%% @doc Record a function at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec function(state(), atom(), non_neg_integer()) -> state(); + (any(), atom(), non_neg_integer()) -> any(). +function( + #dwarf{stream_module = StreamModule, stream = Stream, functions = Functions0} = State, + FunctionName, + Arity +) -> + Offset = StreamModule:offset(Stream), + Functions1 = [{Offset, FunctionName, Arity} | Functions0], + State#dwarf{functions = Functions1}; +function(BackendStateDwarfDisabled, _FunctionName, _Arity) -> + BackendStateDwarfDisabled. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param Line the line number to record +%% @returns The updated state with line recorded at current offset +%% @doc Record a line number at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec line(state(), pos_integer()) -> state(); + (any(), pos_integer()) -> any(). +line( + #dwarf{ + stream_module = StreamModule, + stream = Stream, + lines = Lines0, + line_resolver = LineResolver, + module_name = ModuleName + } = State, + LineRef +) -> + Offset = StreamModule:offset(Stream), + case LineResolver(LineRef) of + {ok, Filename, LineNumber} -> + % Check if this is the first time we see the module file and add line 1 at offset 0 + Lines1 = maybe_add_initial_line(Lines0, ModuleName, Filename), + Lines2 = [{Offset, Filename, LineNumber} | Lines1], + State#dwarf{lines = Lines2}; + false -> + % No line information available, skip storing this line + State + end; +line(BackendStateDwarfDisabled, _LineRef) -> + BackendStateDwarfDisabled. + +%% Helper function to add line 1 at offset 0 for the module file if not already present +maybe_add_initial_line(Lines, ModuleName, Filename) -> + ExpectedBasename = <<(atom_to_binary(ModuleName, utf8))/binary, ".erl">>, + Basename = lists:last(binary:split(Filename, <<"/">>, [global])), + case Basename =:= ExpectedBasename of + true -> + % This is the module file, check if we already have an entry at offset 0 + case lists:any(fun({Offset, _, _}) -> Offset =:= 0 end, Lines) of + false -> + % Add line 1 at offset 0 for the jump table + [{0, Filename, 1} | Lines]; + true -> + % Already have an entry at offset 0, don't duplicate + Lines + end; + false -> + % Not the module file, no change needed + Lines + end. + +%% Helper function to update the size of the most recent opcode +update_previous_opcode_size([], _NewOffset) -> + % No previous opcode to update + []; +update_previous_opcode_size([{Offset, Opcode, 0} | Rest], NewOffset) -> + % Update the size of the most recent opcode + Size = NewOffset - Offset, + [{Offset, Opcode, Size} | Rest]; +update_previous_opcode_size([{Offset, Opcode, Size} | Rest], _NewOffset) when Size > 0 -> + % Previous opcode already has a calculated size, don't change it + [{Offset, Opcode, Size} | Rest]; +update_previous_opcode_size(Opcodes, _NewOffset) -> + % Unexpected format, return unchanged + Opcodes. + +-spec stream(state()) -> any(). +stream(#dwarf{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @param State DWARF state containing debug information +%% @returns {ok, binary(), binary()} with ELF structure containing DWARF info, +%% (without and with native code in .text) or false if not compiled +%% with JIT_DWARF +%% @doc Generate ELF binaries with DWARF debug sections +%% @end +%%----------------------------------------------------------------------------- +-spec elf(state(), binary()) -> {ok, binary(), binary()} | false. +-ifdef(JIT_DWARF). +elf(#dwarf{module_name = ModuleName, backend = Backend} = State, NativeCode) -> + SourceFile = <<(atom_to_binary(ModuleName, utf8))/binary, ".erl">>, + + % Generate DWARF sections + DebugInfoSection = generate_debug_info_section_with_opcodes(State, SourceFile), + DebugLineSection = generate_debug_line_section(State, SourceFile), + DebugAbbrevSection = generate_debug_abbrev_section_with_opcodes(), + DebugStrSection = generate_debug_str_section(State, SourceFile), + + % Generate symbol table sections for function names + {SymtabSection, StrtabSection} = generate_symbol_table(State, Backend), + + % Create base sections list + BaseSections = [ + {<<".debug_info">>, DebugInfoSection}, + {<<".debug_line">>, DebugLineSection}, + {<<".debug_abbrev">>, DebugAbbrevSection}, + {<<".debug_str">>, DebugStrSection}, + {<<".symtab">>, SymtabSection}, + {<<".strtab">>, StrtabSection} + ], + + % Add ARM attributes section for armv6m backend + Sections = + case Backend of + jit_armv6m -> + ArmAttributesSection = generate_arm_attributes_section(), + BaseSections ++ [{<<".ARM.attributes">>, ArmAttributesSection}]; + _ -> + BaseSections + end, + + % Create complete ELF with text section and debug sections + {CombinedELF, TextSectionOffset} = create_elf_with_text_and_debug_sections( + Backend, Sections, NativeCode + ), + {ok, TextSectionOffset, CombinedELF}. +-else. +elf(_State, _NativeCode) -> + false. +-endif. + +-ifdef(JIT_DWARF). + +%% DWARF constants +-define(DW_TAG_compile_unit, 16#11). +-define(DW_TAG_subprogram, 16#2e). +-define(DW_TAG_lexical_block, 16#0b). +-define(DW_TAG_label, 16#0a). +-define(DW_AT_name, 16#03). +-define(DW_AT_comp_dir, 16#1b). +-define(DW_AT_producer, 16#25). +-define(DW_AT_language, 16#13). +-define(DW_AT_low_pc, 16#11). +-define(DW_AT_high_pc, 16#12). +-define(DW_AT_stmt_list, 16#10). +-define(DW_FORM_string, 16#08). +-define(DW_FORM_addr, 16#01). +-define(DW_FORM_data4, 16#06). +-define(DW_FORM_data1, 16#0b). +-define(DW_FORM_udata, 16#0f). +-define(DW_LANG_C, 16#02). +-define(DW_LANG_Erlang, 16#46). +-define(DW_LANG_Elixir, 16#47). +-define(DW_LANG_Gleam, 16#48). + +%% ELF constants +-define(EI_MAG0, 16#7f). +-define(EI_MAG1, $E). +-define(EI_MAG2, $L). +-define(EI_MAG3, $F). +-define(ELFCLASS32, 1). +-define(ELFCLASS64, 2). +-define(ELFDATA2LSB, 1). +-define(EV_CURRENT, 1). +-define(ET_REL, 1). +-define(EM_ARM, 40). +-define(EM_X86_64, 62). +-define(EM_AARCH64, 183). +-define(SHT_PROGBITS, 1). +-define(SHT_SYMTAB, 2). +-define(SHT_STRTAB, 3). +-define(SHT_ARM_ATTRIBUTES, 16#70000003). +-define(SHF_ALLOC, 2). +-define(SHF_EXECINSTR, 4). + +%% ARM EABI flags + +% EABI version 5 +-define(EF_ARM_EABI_VER5, 16#05000000). +% Soft float ABI +-define(EF_ARM_ABI_FLOAT_SOFT, 16#00000200). +% ARM architecture v6-M (Thumb-only) +-define(EF_ARM_ARCH_V6M, 16#00000009). + +%% Map JIT backend to ELF machine type +backend_to_machine_type(jit_x86_64) -> ?EM_X86_64; +backend_to_machine_type(jit_aarch64) -> ?EM_AARCH64; +backend_to_machine_type(jit_armv6m) -> ?EM_ARM. + +%% Map JIT backend to ELF flags +backend_to_elf_flags(jit_armv6m) -> + ?EF_ARM_EABI_VER5 bor ?EF_ARM_ABI_FLOAT_SOFT bor ?EF_ARM_ARCH_V6M; +backend_to_elf_flags(_) -> + 0. + +%% Find section index by name +find_section_index(SectionName, SectionNames) -> + find_section_index_helper(SectionName, SectionNames, 0). + +find_section_index_helper(_, [], _) -> + error({section_not_found}); +find_section_index_helper(SectionName, [SectionName | _], Index) -> + Index; +find_section_index_helper(SectionName, [_ | Rest], Index) -> + find_section_index_helper(SectionName, Rest, Index + 1). + +%% Find .symtab section index in section headers + +%% Generate ARM attributes section for ARMv6-M +generate_arm_attributes_section() -> + % ARM EABI attributes format according to ARM IHI 0045E + + % Build the tag-value pairs for file attributes + TagValuePairs = << + % CPU_arch attribute: ARMv6S-M (value 11) + 6, + 11, + % CPU_arch_profile attribute: 'M' profile (value 77 = 'M') + 7, + 77, + % ARM_ISA_use attribute: No ARM ISA (value 0) + 8, + 0, + % THUMB_ISA_use attribute: Thumb-1 only (value 1) + 9, + 1, + % FP_arch attribute: No FP (value 0) + 10, + 0, + % ABI_PCS_wchar_t attribute: 4 bytes (value 2) + 18, + 2, + % ABI_enum_size attribute: int-sized (value 2) + 26, + 2, + % ABI_align_needed attribute: 8-byte alignment (value 1) + 24, + 1, + % ABI_align_preserved attribute: 8-byte alignment (value 1) + 25, + 1 + >>, + + % Calculate file attributes subsection length (tag + length field + tag-value pairs) + FileAttributesLength = 1 + 4 + byte_size(TagValuePairs), + + % Build file attributes subsection + FileAttributes = << + % File attributes tag + 1, + % Length of this file attributes subsection + FileAttributesLength:32/little, + % The tag-value pairs + TagValuePairs/binary + >>, + + % Build vendor subsection ("aeabi" + null + file attributes) + VendorContent = <<"aeabi", 0, FileAttributes/binary>>, + VendorLength = byte_size(VendorContent), + + % Calculate total section length (format version + vendor length + vendor content) + TotalLength = 1 + 4 + VendorLength, + + % Build final section according to ARM EABI spec + << + % Format version 'A' + $A, + % Total section length (4 bytes, little-endian) + TotalLength:32/little, + % Vendor subsection content + VendorContent/binary + >>. + +generate_debug_str_section(#dwarf{module_name = ModuleName}, SourceFile) -> + % String table: null-terminated strings + Strings = [ + % Index 0: empty string + <<0>>, + % Index 1: source file name + SourceFile, + <<0>>, + % Index 2: producer + <<"AtomVM JIT Compiler v0.7.0">>, + <<0>>, + % Index 3: comp_dir + <<"/tmp">>, + <<0>>, + % Index 4: module name + atom_to_binary(ModuleName, utf8), + <<0>> + ], + iolist_to_binary(Strings). + +generate_debug_abbrev_section_with_opcodes() -> + % Abbreviation table + << + % Abbrev 1: DW_TAG_compile_unit + + % Abbreviation code + 1, + % Tag + ?DW_TAG_compile_unit, + % Has children (DW_CHILDREN_yes) + 1, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Compilation directory + ?DW_AT_comp_dir, + ?DW_FORM_string, + % Producer + ?DW_AT_producer, + ?DW_FORM_string, + % Language + ?DW_AT_language, + ?DW_FORM_data4, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % High PC + ?DW_AT_high_pc, + ?DW_FORM_addr, + % Statement list + ?DW_AT_stmt_list, + ?DW_FORM_data4, + % End of attributes + 0, + 0, + + % Abbrev 2: DW_TAG_lexical_block (for opcodes) + % Abbreviation code + 2, + % Tag + ?DW_TAG_lexical_block, + % Has no children + 0, + % Name attribute (opcode name) + ?DW_AT_name, + ?DW_FORM_string, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % End of attributes + 0, + 0, + + % Abbrev 3: DW_TAG_label (for labels) + % Abbreviation code + 3, + % Tag + ?DW_TAG_label, + % Has no children + 0, + % Name attribute (label name) + ?DW_AT_name, + ?DW_FORM_string, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % End of attributes + 0, + 0, + + % Abbrev 4: DW_TAG_subprogram (for functions) + % Abbreviation code + 4, + % Tag + ?DW_TAG_subprogram, + % Has no children + 0, + % Name attribute (module:function/arity) + ?DW_AT_name, + ?DW_FORM_string, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % High PC + ?DW_AT_high_pc, + ?DW_FORM_addr, + % End of attributes + 0, + 0, + + % End of abbreviations + 0 + >>. + +generate_debug_info_section_with_opcodes( + #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName} = + State, + SourceFile +) -> + % Calculate address ranges + {LowPC, HighPC} = calculate_address_range(State), + + % Build content first to calculate actual length + CompileUnitContent = << + % DWARF version + 4:16/little, + % Abbreviation offset + 0:32/little, + % Address size + 4, + % Compilation unit DIE (abbreviation 1) + 1, + % DW_AT_name + SourceFile/binary, + 0, + % DW_AT_comp_dir + "/tmp", + 0, + % DW_AT_producer + "AtomVM JIT Compiler v0.7.0", + 0, + % DW_AT_language + ?DW_LANG_Erlang:32/little, % for now, we always say Erlang + % DW_AT_low_pc + LowPC:32/little, + % DW_AT_high_pc + HighPC:32/little, + % DW_AT_stmt_list (offset into .debug_line) + 0:32/little + >>, + + % Generate DIEs for functions, opcodes and labels + FunctionDIEs = generate_function_dies_with_module(Functions, ModuleName), + OpcodeDIEs = generate_opcode_dies(Opcodes), + LabelDIEs = generate_label_dies(Labels), + + % End of children marker + EndMarker = <<0>>, + + % Calculate actual unit length (everything after the length field) + Content = + <>, + UnitLength = byte_size(Content), + + % Build final section with correct length + <>. + +generate_debug_line_section(#dwarf{lines = Lines, opcodes = _Opcodes}, SourceFile) -> + % Build header content first to calculate actual lengths + HeaderContent = << + % DWARF version + 4:16/little, + % Header length (placeholder, calculated below) + 0:32/little, + % Minimum instruction length (Thumb) + 2, + % Maximum operations per instruction + 1, + % Default is_stmt + 1, + % Line base + (-5):8/signed, + % Line range + 14, + % Opcode base + 13 + >>, + + % Standard opcode lengths (for opcodes 1-12, opcode_base-1 entries) + % DW_LNS_copy(1)=0, DW_LNS_advance_pc(2)=1, DW_LNS_advance_line(3)=1, etc. + StdOpcodeLengths = <<0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1>>, + + % Build file table with actual filenames from line data + UniqueFullPaths = + case Lines of + [] -> + [SourceFile]; + _ -> + % Extract unique filenames from Lines, don't add SourceFile as it may be a duplicate + Filenames = [Filename || {_Offset, Filename, _LineNum} <- Lines], + lists:usort(Filenames) + end, + + % Split paths into directories and filenames, avoiding duplicates + {Directories, FileEntries, _} = lists:foldl( + fun(FullPath, {DirAcc, FileAcc, FileSet}) -> + case filename:split(binary_to_list(FullPath)) of + [Basename] -> + % Just a filename, no directory + FileKey = {Basename, 0}, + case sets:is_element(FileKey, FileSet) of + % Skip duplicate + true -> {DirAcc, FileAcc, FileSet}; + false -> {DirAcc, [FileKey | FileAcc], sets:add_element(FileKey, FileSet)} + end; + PathParts -> + DirParts = lists:droplast(PathParts), + Dir = filename:join(DirParts), + Basename = lists:last(PathParts), + % Find or add directory to get proper index (1-based) + {NewDirAcc, DirIndex} = + case lists:search(fun(D) -> D =:= Dir end, DirAcc) of + {value, _} -> + % Find index of existing directory (1-based) + ExistingIndex = + length(lists:takewhile(fun(D) -> D =/= Dir end, DirAcc)) + 1, + {DirAcc, ExistingIndex}; + false -> + % Add new directory and return its 1-based index + NewIndex = length(DirAcc) + 1, + {DirAcc ++ [Dir], NewIndex} + end, + FileKey = {Basename, DirIndex}, + case sets:is_element(FileKey, FileSet) of + % Skip duplicate + true -> + {NewDirAcc, FileAcc, FileSet}; + false -> + {NewDirAcc, [FileKey | FileAcc], sets:add_element(FileKey, FileSet)} + end + end + end, + {[], [], sets:new()}, + UniqueFullPaths + ), + + % Build directory table + DirectoryTable = lists:foldl( + fun(Dir, Acc) -> + DirBin = list_to_binary(Dir), + <> + end, + <<>>, + Directories + ), + + % Build file table entries with proper ULEB128 encoding for directory index + FileTableEntries = lists:foldl( + fun({Filename, DirIndex}, Acc) -> + DirIndexEncoded = encode_uleb128(DirIndex), + <> + end, + <<>>, + lists:reverse(FileEntries) + ), + + FileTable = << + % Directory table + DirectoryTable/binary, + % End of directory table + 0, + % File table entries + FileTableEntries/binary, + % End of file table + 0 + >>, + + % Line number program - using actual line data with file mapping + FileMapping = lists:zip(UniqueFullPaths, lists:seq(1, length(FileEntries))), + Program = generate_line_program(Lines, FileMapping), + + % Calculate actual header length (everything from version to end of file table) + HeaderPlusTablesContent = <>, + % -4 for header_length field itself + HeaderLength = byte_size(HeaderContent) - 4 + byte_size(HeaderPlusTablesContent), + + % Build corrected header with actual length + CorrectedHeader = << + % DWARF version + 4:16/little, + % Header length (actual) + HeaderLength:32/little, + % Minimum instruction length (Thumb) + 2, + % Maximum operations per instruction + 1, + % Default is_stmt + 1, + % Line base + (-5):8/signed, + % Line range + 14, + % Opcode base + 13 + >>, + + % Calculate total unit length (everything after unit length field) + ContentAfterLength = + <>, + UnitLength = byte_size(ContentAfterLength), + + <>. + +create_elf_header_and_sections(Backend, Sections) -> + % Determine ELF format based on backend word size + WordSize = Backend:word_size(), + % 32 or 64 bits + WordSizeInBits = WordSize * 8, + ElfClass = + case WordSize of + 8 -> ?ELFCLASS64; + 4 -> ?ELFCLASS32 + end, + + % ELF format dependent sizes + {ElfHeaderSize, SectionHeaderSize} = + case WordSize of + % ELF64 + 8 -> {64, 64}; + % ELF32 + 4 -> {52, 40} + end, + + % Create section name string table (dynamic based on sections) + SectionNames = + [<<>>] ++ [SectionName || {SectionName, _Section} <- Sections] ++ [<<".shstrtab">>], + ShStrTab = create_string_table(SectionNames), + + % Calculate offsets + % null + debug sections + shstrtab + SectionCount = length(SectionNames), + + % String table index is the last section + ShStrTabIndex = SectionCount - 1, + + % Section data layout: debug sections + string table + {SectionData, SectionOffsets} = layout_sections(Sections, ShStrTab, ElfHeaderSize), + + % Section headers start after all section data + SectionHeaderOffset = ElfHeaderSize + byte_size(SectionData), + + % Get machine type and flags for this backend + MachineType = backend_to_machine_type(Backend), + ElfFlags = backend_to_elf_flags(Backend), + + % ELF header + ElfHeader = << + % Magic + ?EI_MAG0, + ?EI_MAG1, + ?EI_MAG2, + ?EI_MAG3, + % ELF class (32-bit or 64-bit) + ElfClass, + % Little endian + ?ELFDATA2LSB, + % ELF version + ?EV_CURRENT, + % OS ABI + 0, + % ABI version + 0, + % Padding + 0, + 0, + 0, + 0, + 0, + 0, + 0, + % Relocatable file + ?ET_REL:16/little, + % Architecture from backend + MachineType:16/little, + % Version + 1:32/little, + % Entry point - 32 or 64 bit depending on word size + 0:WordSizeInBits/little, + % Program header offset - 32 or 64 bit depending on word size + 0:WordSizeInBits/little, + % Section header offset - 32 or 64 bit depending on word size + SectionHeaderOffset:WordSizeInBits/little, + % Flags + ElfFlags:32/little, + % ELF header size + ElfHeaderSize:16/little, + % Program header entry size + 0:16/little, + % Program header count + 0:16/little, + % Section header entry size + SectionHeaderSize:16/little, + % Section count + SectionCount:16/little, + % String table index (.shstrtab) + ShStrTabIndex:16/little + >>, + + % Generate section headers + SectionHeaders = create_section_headers_proper( + SectionNames, Sections, SectionOffsets, ShStrTab, Backend, WordSizeInBits + ), + + <>. + +%% Helper functions +calculate_address_range(#dwarf{opcodes = Opcodes}) -> + case Opcodes of + [] -> + {0, 0}; + _ -> + % Use the new 3-tuple format {Offset, Opcode, Size} + OffsetsAndSizes = [{Offset, Size} || {Offset, _, Size} <- Opcodes], + Offsets = [Offset || {Offset, _} <- OffsetsAndSizes], + MinOffset = lists:min(Offsets), + % For max, use offset + size, or fallback to offset + 4 if size is 0 + MaxOffset = lists:max([ + case Size of + % Fallback for opcodes without calculated size + 0 -> Offset + 4; + _ -> Offset + Size + end + || {Offset, Size} <- OffsetsAndSizes + ]), + {MinOffset, MaxOffset} + end. + +generate_line_program(Lines, FileMapping) -> + case Lines of + [] -> + % No line data - generate simple program + << + % Set file to 1 using DW_LNS_set_file (opcode 4) with file index 1 + 4, + 1, + % End sequence: extended opcode + % Extended opcode prefix + 0, + % Length of extended opcode + 1, + % DW_LNE_end_sequence + 1 + >>; + _ -> + % Sort lines by offset + SortedLines = lists:sort( + fun({OffsetA, _, _}, {OffsetB, _, _}) -> + OffsetA =< OffsetB + end, + Lines + ), + generate_line_program_entries(SortedLines, FileMapping, 0, 1, 0) + end. + +generate_line_program_entries([], _FileMapping, _LastOffset, _LastLine, _LastFileIndex) -> + % End the sequence + << + % End sequence: extended opcode + % Extended opcode prefix + 0, + % Length of extended opcode + 1, + % DW_LNE_end_sequence + 1 + >>; +generate_line_program_entries( + [{Offset, Filename, LineNumber} | Rest], FileMapping, LastOffset, LastLine, LastFileIndex +) -> + % Generate DWARF line program opcodes + % For simplicity, we'll use DW_LNS_advance_pc and DW_LNS_advance_line + + % Find file index from mapping + FileIndex = + case lists:keyfind(Filename, 1, FileMapping) of + {Filename, Index} -> Index; + % Default to first file if not found + false -> 1 + end, + + % Calculate address and line deltas + AddressDelta = Offset - LastOffset, + LineDelta = LineNumber - LastLine, + + % Build opcodes + FileOpcodes = + if + FileIndex =/= LastFileIndex -> + % DW_LNS_set_file (opcode 4) with file index + <<4, FileIndex>>; + true -> + <<>> + end, + + InitialOpcodes = + if + LastOffset == 0 -> + % Set initial file index + <<4, FileIndex>>; + true -> + FileOpcodes + end, + + Opcodes = << + InitialOpcodes/binary, + % DW_LNS_advance_pc (opcode 2) with ULEB128 delta + 2, + (encode_uleb128(AddressDelta))/binary, + % DW_LNS_advance_line (opcode 3) with SLEB128 delta + 3, + (encode_sleb128(LineDelta))/binary, + % DW_LNS_copy (opcode 1) - emit a new row + 1 + >>, + + RestOpcodes = generate_line_program_entries(Rest, FileMapping, Offset, LineNumber, FileIndex), + <>. + +% Encode unsigned LEB128 +encode_uleb128(Value) when Value < 128 -> + <>; +encode_uleb128(Value) -> + Byte = (Value band 16#7F) bor 16#80, + Rest = encode_uleb128(Value bsr 7), + <>. + +% Encode signed LEB128 +encode_sleb128(Value) when Value >= -64, Value < 64 -> + ByteValue = Value band 16#7F, + <>; +encode_sleb128(Value) when Value >= 0 -> + encode_uleb128(Value); +encode_sleb128(Value) -> + encode_sleb128_negative(Value). + +encode_sleb128_negative(Value) -> + Byte = Value band 16#7F, + NewValue = Value bsr 7, + if + NewValue == -1, (Byte band 16#40) =/= 0 -> + <>; + true -> + ByteWithCont = Byte bor 16#80, + Rest = encode_sleb128_negative(NewValue), + <> + end. + +%% Generate DIEs for functions as DW_TAG_subprogram with module:func/arity naming +generate_function_dies_with_module(Functions, ModuleName) -> + % Filter and sort functions by address + ValidFunctions = lists:sort([ + {Offset, FunctionName, Arity} + || {Offset, FunctionName, Arity} <- Functions, Offset >= 0 + ]), + + % Generate DIE for each function + FunctionDIEsList = [ + generate_function_die_with_module(Offset, FunctionName, Arity, ModuleName) + || {Offset, FunctionName, Arity} <- ValidFunctions + ], + iolist_to_binary(FunctionDIEsList). + +%% Generate DIE for a single function with module name +generate_function_die_with_module(Offset, FunctionName, Arity, ModuleName) -> + % Create module:function/arity format + FunctionString = list_to_binary(io_lib:format("~s:~s/~B", [ModuleName, FunctionName, Arity])), + % Estimate function size (can be improved later) + FunctionSize = 100, + << + % Abbreviation code (4 = DW_TAG_subprogram) + 4, + % DW_AT_name + FunctionString/binary, + 0, + % DW_AT_low_pc + Offset:32/little, + % DW_AT_high_pc (low_pc + size) + (Offset + FunctionSize):32/little + >>. + +%% Generate DIEs for opcodes as DW_TAG_lexical_block +generate_opcode_dies(Opcodes) -> + % Filter and sort opcodes by address + ValidOpcodes = lists:sort([{Offset, Opcode} || {Offset, Opcode} <- Opcodes, Offset >= 0]), + + % Generate DIE for each opcode + OpcodeDIEsList = [generate_opcode_die(Offset, Opcode) || {Offset, Opcode} <- ValidOpcodes], + iolist_to_binary(OpcodeDIEsList). + +%% Generate DIE for a single opcode +generate_opcode_die(Offset, Opcode) -> + OpcodeString = list_to_binary(io_lib:format("~s@~B", [Opcode, Offset])), + << + % Abbreviation code (2 = DW_TAG_lexical_block) + 2, + % DW_AT_name + OpcodeString/binary, + 0, + % DW_AT_low_pc + Offset:32/little + >>. + +%% Generate DIEs for labels as DW_TAG_label +generate_label_dies(Labels) -> + % Filter and sort labels by address + ValidLabels = lists:sort([{Offset, Label} || {Offset, Label} <- Labels, Offset >= 0]), + + % Generate DIE for each label + LabelDIEsList = [generate_label_die(Offset, Label) || {Offset, Label} <- ValidLabels], + iolist_to_binary(LabelDIEsList). + +%% Generate DIE for a single label +generate_label_die(Offset, Label) -> + LabelString = list_to_binary(io_lib:format("label_~B", [Label])), + << + % Abbreviation code (3 = DW_TAG_label) + 3, + % DW_AT_name + LabelString/binary, + 0, + % DW_AT_low_pc + Offset:32/little + >>. + +%% Generate symbol table for function names and opcode symbols +generate_symbol_table( + #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName}, + Backend +) -> + % Determine ELF format based on backend word size + WordSize = Backend:word_size(), + % Build string table for symbol names (functions) with module:function/arity format + FunctionNames = [ + list_to_binary(io_lib:format("~s:~s/~B", [ModuleName, FunctionName, Arity])) + || {_Offset, FunctionName, Arity} <- Functions + ], + % Build string table for opcode symbols with module:op_opcode@offset format + OpcodeNames = [ + list_to_binary(io_lib:format("~s:op_~s@~w", [ModuleName, Opcode, Offset])) + || {Offset, Opcode, _Size} <- Opcodes + ], + % Build string table for label symbols with module:label_X@offset format + LabelNames = [ + list_to_binary(io_lib:format("~s:label_~w@~w", [ModuleName, LabelNum, Offset])) + || {Offset, LabelNum} <- Labels + ], + % Add ARM mapping symbol to indicate Thumb code (for armv6m backend) + MappingSymbols = + case Backend of + % Thumb mapping symbol at start of .text section + jit_armv6m -> [<<"$t">>]; + _ -> [] + end, + SymbolNames = FunctionNames ++ OpcodeNames ++ LabelNames ++ MappingSymbols, + + % String table starts with null string + StrtabContent = lists:foldl( + fun(Name, Acc) -> + <> + end, + % Start with null string at offset 0 + <<0>>, + SymbolNames + ), + + % Calculate string offsets + {_, StringOffsets} = lists:foldl( + fun(Name, {CurrentOffset, Offsets}) -> + % +1 for null terminator + NextOffset = CurrentOffset + byte_size(Name) + 1, + {NextOffset, [CurrentOffset | Offsets]} + end, + % Start after null string + {1, []}, + SymbolNames + ), + ReversedOffsets = lists:reverse(StringOffsets), + + % Generate symbol table entries + % First entry is always the null symbol + NullSymbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + <<0:32/little, 0, 0, 0:16/little, 0:64/little, 0:64/little>>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + <<0:32/little, 0:32/little, 0:32/little, 0, 0, 0:16/little>> + end, + + % Generate function symbols + FunctionSymbols = lists:foldl( + fun({{Offset, _FunctionName, _Arity}, StringOffset}, Acc) -> + % Function name is now module:function/arity (already in FunctionNames) + % Estimated function size + FuncSize = 100, + + % Use raw offset for symbol address (no Thumb bit) + FunctionAddress = Offset, + + % Symbol table entry (format depends on word size) + Symbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + << + StringOffset:32/little, + % st_info (STB_GLOBAL << 4 | STT_FUNC) + 16#12, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little, + % st_value (function address) + FunctionAddress:64/little, + % st_size (function size) + FuncSize:64/little + >>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + << + StringOffset:32/little, + FunctionAddress:32/little, + FuncSize:32/little, + 16#12, + 0, + 1:16/little + >> + end, + <> + end, + <<>>, + lists:zip(Functions, lists:sublist(ReversedOffsets, length(Functions))) + ), + + % Generate opcode symbols + OpcodeStringOffsets = lists:sublist(ReversedOffsets, length(Functions) + 1, length(Opcodes)), + OpcodeSymbols = lists:foldl( + fun({{Offset, _Opcode, Size}, StringOffset}, Acc) -> + % Use raw offset for symbol address (no Thumb bit) + OpcodeAddress = Offset, + + % Symbol table entry (format depends on word size) + Symbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + << + StringOffset:32/little, + % st_info (STB_GLOBAL << 4 | STT_NOTYPE) + 16#10, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little, + % st_value (opcode address) + OpcodeAddress:64/little, + % st_size (actual calculated opcode size) + Size:64/little + >>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + << + StringOffset:32/little, + OpcodeAddress:32/little, + Size:32/little, + 16#10, + 0, + 1:16/little + >> + end, + <> + end, + <<>>, + lists:zip(Opcodes, OpcodeStringOffsets) + ), + + % Generate label symbols + LabelStringOffsets = lists:sublist( + ReversedOffsets, length(Functions) + length(Opcodes) + 1, length(Labels) + ), + LabelSymbols = lists:foldl( + fun({{Offset, _LabelNum}, StringOffset}, Acc) -> + % Use raw offset for symbol address + LabelAddress = Offset, + + % Symbol table entry (format depends on word size) + Symbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + << + StringOffset:32/little, + % st_info (STB_GLOBAL << 4 | STT_NOTYPE) + 16#10, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little, + % st_value (label address) + LabelAddress:64/little, + % st_size (label size - 0 for point labels) + 0:64/little + >>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + << + StringOffset:32/little, + LabelAddress:32/little, + 0:32/little, + 16#10, + 0, + 1:16/little + >> + end, + <> + end, + <<>>, + lists:zip(Labels, LabelStringOffsets) + ), + + % Generate mapping symbols for ARM (Thumb indicator) + MappingSymbolOffsets = + case Backend of + jit_armv6m -> + lists:sublist( + ReversedOffsets, length(Functions) + length(Opcodes) + length(Labels) + 1, 1 + ); + _ -> + [] + end, + MappingSymbolBinaries = + case Backend of + jit_armv6m -> + [StringOffset] = MappingSymbolOffsets, + % $t mapping symbol at address 0 (start of .text) to indicate Thumb code + MappingSymbol = << + % st_name (offset in string table for "$t") + StringOffset:32/little, + % st_value (address 0 - start of .text section) + 0:32/little, + % st_size (0 for mapping symbols) + 0:32/little, + % st_info (STB_LOCAL << 4 | STT_NOTYPE) - local symbol + 16#00, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little + >>, + <>; + _ -> + <<>> + end, + + % Symbol table must have local symbols first, then global symbols + SymtabContent = + <>, + + {SymtabContent, StrtabContent}. + +%% Create string table from list of binaries +create_string_table(Binaries) -> + <<<> || Binary <- Binaries>>. + +%% Layout sections in memory and calculate offsets +layout_sections(Sections, ShStrTab, BaseOffset) -> + {Data, Offsets} = lists:foldl( + fun({_Name, SectionData}, {AccData, AccOffsets}) -> + Offset = BaseOffset + byte_size(AccData), + NewData = <>, + NewOffsets = [Offset | AccOffsets], + {NewData, NewOffsets} + end, + {<<>>, []}, + Sections + ), + + % Add string table at the end + ShStrTabOffset = BaseOffset + byte_size(Data), + FinalData = <>, + FinalOffsets = [ShStrTabOffset | lists:reverse(Offsets)], + + {FinalData, FinalOffsets}. + +%% Create properly formatted section headers +create_section_headers_proper( + SectionNames, Sections, SectionOffsets, ShStrTab, Backend, WordSizeInBits +) -> + % Create null section header (index 0) + % Size depends on ELF format: 40 bytes (ELF32) or 64 bytes (ELF64) + SectionHeaderSizeBits = + case WordSizeInBits of + % 64 bytes * 8 bits + 64 -> 512; + % 40 bytes * 8 bits + 32 -> 320 + end, + NullHeader = <<0:SectionHeaderSizeBits/little>>, + + % Create section headers for all sections (indices 1-6) + % SectionOffsets from layout_sections: [ShStrTabOffset, ...SectionOffsets in order...] + [_ShStrTabOffset | SectionOffsetsInOrder] = SectionOffsets, + + SectionHeaders = lists:foldl( + fun({_Index, {{SectionName, SectionData}, FileOffset}}, Acc) -> + % Calculate name offset in string table by finding the null-terminated section name + SectionNameWithNull = <>, + {NameOffset, _Length} = binary:match(ShStrTab, SectionNameWithNull), + + % Determine section type, properties, and flags + {SectionType, SectionFlags, Link, Info, EntrySize} = + case SectionName of + <<".symtab">> -> + % Find .strtab index dynamically + StrtabIndex = find_section_index(<<".strtab">>, SectionNames), + % Local symbols: null symbol + mapping symbol (for armv6m) + NumLocalSymbols = + case Backend of + % null + $t mapping symbol + jit_armv6m -> 2; + % only null symbol + _ -> 1 + end, + % SHT_SYMTAB, link to strtab, info = first non-local symbol, entsize = 16 + SymTabEntrySize = + case WordSizeInBits of + 32 -> 16; + 64 -> 24 + end, + {?SHT_SYMTAB, 0, StrtabIndex, NumLocalSymbols, SymTabEntrySize}; + % SHT_STRTAB + <<".strtab">> -> + {3, 0, 0, 0, 0}; + % ARM attributes + <<".ARM.attributes">> -> + {?SHT_ARM_ATTRIBUTES, 0, 0, 0, 0}; + % .text section - executable code + <<".text">> -> + {?SHT_PROGBITS, ?SHF_ALLOC bor ?SHF_EXECINSTR, 0, 0, 0}; + % Debug sections and other progbits + _ -> + {?SHT_PROGBITS, 0, 0, 0, 0} + end, + + Header = << + % Name offset - always 32-bit + NameOffset:32/little, + % Type - always 32-bit + SectionType:32/little, + % Flags - 32/64 bit depending on word size + SectionFlags:WordSizeInBits/little, + % Address - 32/64 bit depending on word size + 0:WordSizeInBits/little, + % File offset - 32/64 bit depending on word size + FileOffset:WordSizeInBits/little, + % Size - 32/64 bit depending on word size + (byte_size(SectionData)):WordSizeInBits/little, + % Link - always 32-bit + Link:32/little, + % Info - always 32-bit + Info:32/little, + % Address align - 32/64 bit depending on word size + 1:WordSizeInBits/little, + % Entry size - 32/64 bit depending on word size + EntrySize:WordSizeInBits/little + >>, + <> + end, + <<>>, + lists:zip(lists:seq(1, length(Sections)), lists:zip(Sections, SectionOffsetsInOrder)) + ), + + % Create string table section header (index 7, the last section) + + % Calculate offset for ".shstrtab" + ShStrTabNameWithNull = <<".shstrtab", 0>>, + {ShStrTabNameOffset, _Length} = binary:match(ShStrTab, ShStrTabNameWithNull), + % First in offsets (ShStrTabOffset is added at the beginning) + ShStrTabFileOffset = lists:nth(1, SectionOffsets), + ShStrTabHeader = << + % Name offset - always 32-bit + ShStrTabNameOffset:32/little, + % Type - always 32-bit + ?SHT_STRTAB:32/little, + % Flags - 32/64 bit depending on word size + 0:WordSizeInBits/little, + % Address - 32/64 bit depending on word size + 0:WordSizeInBits/little, + % File offset - 32/64 bit depending on word size + ShStrTabFileOffset:WordSizeInBits/little, + % Size - 32/64 bit depending on word size + (byte_size(ShStrTab)):WordSizeInBits/little, + % Link - always 32-bit + 0:32/little, + % Info - always 32-bit + 0:32/little, + % Address align - 32/64 bit depending on word size + 1:WordSizeInBits/little, + % Entry size - 32/64 bit depending on word size + 0:WordSizeInBits/little + >>, + + <>. + +%% @doc Add .text section containing native code to existing debug-only ELF +%% @doc Create complete ELF with .text section and debug sections from the start +create_elf_with_text_and_debug_sections(Backend, DebugSections, NativeCode) -> + % Add .text section as the first section + TextSection = {<<".text">>, NativeCode}, + AllSections = [TextSection | DebugSections], + + % Calculate text section offset: it's the first section after the ELF header + WordSize = Backend:word_size(), + TextSectionOffset = + case WordSize of + % ELF64 header size + 8 -> 64; + % ELF32 header size + 4 -> 52 + end, + + % Create complete ELF with all sections + ElfBinary = create_elf_header_and_sections(Backend, AllSections), + + {ElfBinary, TextSectionOffset}. + +-endif. diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index 930b79dc37..bd0476f68d 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -19,14 +19,20 @@ % -module(jit_precompile). --export([start/0, compile/3, atom_resolver/1, type_resolver/1]). +-export([start/0, compile/4, atom_resolver/1, type_resolver/1]). -include_lib("jit.hrl"). +-include("compact_term.hrl"). + %% @doc Precompile BEAM files on command line start() -> - [Target, Dir | Files] = init:get_plain_arguments(), - lists:foreach(fun(File) -> compile(Target, Dir, File) end, Files). + [Target, Dir | Files0] = init:get_plain_arguments(), + {Files, Dwarf} = case Files0 of + ["-g" | FilesT] -> {FilesT, true}; + _ -> {Files0, true} + end, + lists:foreach(fun(File) -> compile(Target, Dir, Dwarf, File) end, Files). %% @doc Parse target string to extract base architecture and requested variant %% Examples: @@ -50,10 +56,10 @@ parse_target(Target) -> {BaseTarget, RequestedVariant} end. -compile(Target, Dir, Path) -> +compile(Target, Dir, Dwarf, Path) -> try {ok, InitialBinary} = file:read_file(Path), - {ok, _Module, InitialChunks} = beam_lib:all_chunks(InitialBinary), + {ok, Module, InitialChunks} = beam_lib:all_chunks(InitialBinary), FilteredChunks0 = lists:keydelete("avmN", 1, InitialChunks), FilteredChunks = lists:keydelete("Code", 1, FilteredChunks0), {"Code", CodeChunk} = lists:keyfind("Code", 1, InitialChunks), @@ -84,6 +90,17 @@ compile(Target, Dir, Path) -> end, TypeResolver = type_resolver(TypesChunk), + % Parse line table (Line chunk) for DWARF line information + LineResolver = + case lists:keyfind("Line", 1, InitialChunks) of + {"Line", LineTable} -> + fun(LineRef) -> resolve_line_info(Module, LineTable, LineRef) end; + false -> + io:format("LineResolver -- Line chunk not found\n"), + % No line table - return false + fun(_LineRef) -> false end + end, + % Parse target to extract arch and variant {BaseTarget, RequestedVariant} = parse_target(Target), Backend = list_to_atom("jit_" ++ BaseTarget), @@ -97,21 +114,42 @@ compile(Target, Dir, Path) -> _ -> error({unsupported_target, Target}) end, - Stream0 = jit_stream_binary:new(0), <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, + <> = jit:beam_chunk_header(LabelsCount, Arch, RequestedVariant), - Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, Arch, RequestedVariant) - ), + Stream2 = case Dwarf of + true -> + Stream0 = jit_dwarf:new(Backend, Module, jit_stream_binary, 0, LineResolver), + Backend:new(RequestedVariant, jit_dwarf, Stream0); + false -> + Backend:new(RequestedVariant, jit_stream_binary, <>) + end, - Stream2 = Backend:new(RequestedVariant, jit_stream_binary, Stream1), {LabelsCount, Stream3} = jit:compile( CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 ), - NativeCode = Backend:stream(Stream3), - UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}], + NewChunks = + case Dwarf of + true -> + DwarfStream = Backend:stream(Stream3), + NativeCode = jit_dwarf:stream(DwarfStream), + + case jit_dwarf:elf(DwarfStream, NativeCode) of + false -> + % No debug info - just store native code with info header + [{"avmN", <>}]; + {ok, TextSectionOffset, ELF} -> + % Update BEAM chunk header structure and combine with ELF. + EmbeddedElfChunk = update_avmn_chunk_with_elf(Info, ELF, TextSectionOffset), + [{"avmN", EmbeddedElfChunk}] + end; + false -> + [{"avmN", Backend:stream(Stream3)}] + end, + + UpdatedChunks = FilteredChunks ++ NewChunks, {ok, Binary} = beam_lib:build_module(UpdatedChunks), Basename = filename:basename(Path), UpdatedFile = filename:join(Dir, Basename), @@ -253,3 +291,133 @@ parse_extra(0, 0, 1, <>, LowerBound, UpperBound, parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Value + 1); parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Unit) -> {Rest, LowerBound, UpperBound, Unit}. + +%% @doc Update existing Info by updating offset +update_avmn_chunk_with_elf(Info, ElfBinary, TextSectionOffset) -> + % Parse Info to update the offset: LabelsCount + Version + ArchCount + NativeCodeArch + <> = Info, + + % Calculate new offset: from start of ELF to .text section + NewOffset = TextSectionOffset, + + % Create updated Info with new offset + UpdatedInfo = <>, + + % Build updated chunk: InfoSize + UpdatedInfo + ELF + <<(byte_size(UpdatedInfo)):32, UpdatedInfo/binary, ElfBinary/binary>>. + +%% @doc Resolve a line reference to filename and line number +resolve_line_info( + Module, + <>, + LineRef +) when Version =:= 0, LineRef > 0, LineRef =< NumRefs -> + resolve_line_info0(Module, 1, 0, LineRef, NumRefs, Rest, false); +resolve_line_info(_Module, <>, _) when Version =/= 0 -> + io:format("resolve_line_info -- unknown Line table version (~p)\n", [Version]), + false; +resolve_line_info( + _Module, + <<_Version:32, _Flags:32, _NumInstr:32, _NumRefs:32, _NumFilenames:32, _Rest/binary>>, + 0 +) -> + false; +resolve_line_info( + _Module, + <<_Version:32, _Flags:32, _NumInstr:32, NumRefs:32, _NumFilenames:32, _Rest/binary>>, + LineRef +) -> + io:format("resolve_line_info -- invalid lineref (~p) (NumRefs = ~p)\n", [LineRef, NumRefs]), + false. + +resolve_line_info0( + Module, CurrentLineRef, _CurrentLocationIx, _LineRef, NumRefs, LocationData, {Line, LocationIx} +) when CurrentLineRef > NumRefs -> + resolve_line_info1(Module, LocationIx, LocationData, Line); +resolve_line_info0( + Module, + LineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <<_:4, ?COMPACT_INTEGER:4, _/binary>> = Bin, + false +) -> + {Line, Rest} = jit:decode_value64(Bin), + resolve_line_info0( + Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx} + ); +resolve_line_info0( + Module, + CurrentLineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <<_:4, ?COMPACT_INTEGER:4, _/binary>> = Bin, + Acc +) -> + {_Line, Rest} = jit:decode_value64(Bin), + resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc); +resolve_line_info0( + Module, + LineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <>, + false +) -> + Line = (Val bsl 8) bor NextByte, + resolve_line_info0( + Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx} + ); +resolve_line_info0( + Module, + CurrentLineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <<_Val:3, ?COMPACT_LARGE_INTEGER_11BITS:5, _NextByte, Rest/binary>>, + Acc +) -> + resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc); +resolve_line_info0( + Module, + LineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <>, + false +) -> + resolve_line_info0( + Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx} + ); +resolve_line_info0( + Module, + CurrentLineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <>, + Acc +) -> + resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc); +resolve_line_info0( + Module, + CurrentLineRef, + _CurrentLocationIx, + LineRef, + NumRefs, + <<_:4, AtomTag:4, _/binary>> = Bin, + Acc +) when AtomTag =:= ?COMPACT_LARGE_ATOM; AtomTag =:= ?COMPACT_ATOM -> + {NewLocationIx, Rest} = jit:decode_value64(Bin), + resolve_line_info0(Module, CurrentLineRef, NewLocationIx, LineRef, NumRefs, Rest, Acc). + +resolve_line_info1(Module, 0, _LocationData, Line) -> + {ok, <<(atom_to_binary(Module, utf8))/binary, ".erl">>, Line}; +resolve_line_info1(_Module, 1, <>, Line) -> + {ok, Filename, Line}; +resolve_line_info1(Module, N, <>, Line) -> + resolve_line_info1(Module, N - 1, Rest, Line). diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 26b08de0d7..332ae3c25a 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -72,6 +72,17 @@ add_label/3 ]). +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2 +]). +-endif. + +-compile([warnings_as_errors]). + -include_lib("jit.hrl"). -include("primitives.hrl"). @@ -184,6 +195,8 @@ -define(PARAMETER_REGS, [rdi, rsi, rdx, rcx, r8, r9]). -define(SCRATCH_REGS, [rdi, rsi, rdx, rcx, r8, r9, r10, r11]). +-include("jit_backend_dwarf_impl.hrl"). + %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. %% sizeof(uintptr_t) diff --git a/src/libAtomVM/CMakeLists.txt b/src/libAtomVM/CMakeLists.txt index 2a5c58bd05..fec49b6bdf 100644 --- a/src/libAtomVM/CMakeLists.txt +++ b/src/libAtomVM/CMakeLists.txt @@ -179,6 +179,9 @@ endif() if (NOT AVM_DISABLE_JIT AND NOT AVM_ENABLE_PRECOMPILED) target_compile_definitions(libAtomVM PUBLIC AVM_NO_EMU) endif() +if (AVM_DISABLE_JIT_DWARF OR AVM_DISABLE_JIT) + target_compile_definitions(libAtomVM PUBLIC AVM_NO_JIT_DWARF) +endif() if(HAVE_PLATFORM_SMP_H) target_compile_definitions(libAtomVM PUBLIC HAVE_PLATFORM_SMP_H) diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 90c97ba8c9..bfae4a2d40 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -38,6 +38,134 @@ #include #include +#ifndef AVM_NO_JIT_DWARF +#include +#include + +#if TERM_BYTES == 4 +// ELF32 structures +typedef struct +{ + unsigned char e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf_Ehdr; + +typedef struct +{ + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + uint32_t sh_addr; + uint32_t sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; +} Elf_Shdr; + +typedef struct +{ + uint32_t st_name; + uint32_t st_value; + uint32_t st_size; + unsigned char st_info; + unsigned char st_other; + uint16_t st_shndx; +} Elf_Sym; + +typedef struct +{ + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} Elf_Phdr; +#elif TERM_BYTES == 8 +// ELF64 structures +typedef struct +{ + unsigned char e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf_Ehdr; + +typedef struct +{ + uint32_t sh_name; + uint32_t sh_type; + uint64_t sh_flags; + uint64_t sh_addr; + uint64_t sh_offset; + uint64_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint64_t sh_addralign; + uint64_t sh_entsize; +} Elf_Shdr; + +typedef struct +{ + uint32_t st_name; + unsigned char st_info; + unsigned char st_other; + uint16_t st_shndx; + uint64_t st_value; + uint64_t st_size; +} Elf_Sym; + +typedef struct +{ + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +} Elf_Phdr; +#else +#error TERM_BYTES should be 4 or 8 +#endif + +// ELF constants +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define STT_FUNC 2 +#define STB_GLOBAL 1 +#define PT_LOAD 1 +#define PF_X 1 // Execute +#define PF_R 4 // Read + +#endif //#define ENABLE_TRACE #include "trace.h" @@ -1763,3 +1891,290 @@ const ModuleNativeInterface module_native_interface = { }; #endif + +#ifndef AVM_NO_JIT_DWARF + +// GDB JIT interface structures and constants +typedef enum +{ + JIT_NOACTION = 0, + JIT_REGISTER_FN, + JIT_UNREGISTER_FN +} jit_actions_t; + +struct jit_code_entry +{ + struct jit_code_entry *next_entry; + struct jit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +}; + +struct jit_descriptor +{ + uint32_t version; + uint32_t action_flag; + struct jit_code_entry *relevant_entry; + struct jit_code_entry *first_entry; +}; + +// Global GDB JIT interface descriptor +// This must have C linkage and specific symbol names for GDB to find it +struct jit_descriptor __jit_debug_descriptor = { 1, 0, NULL, NULL }; + +// GDB sets breakpoint on this function to be notified of new JIT code +void __attribute__((noinline)) __jit_debug_register_code(void) +{ + // GDB will set a breakpoint here +} + +// Create a minimal ELF file for debugging with proper PIE support +static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_data, size_t original_elf_size, + uintptr_t load_address, size_t *new_elf_size) +{ + TRACE("create_minimal_elf_for_debugging: original_elf_size=%zu, load_address=0x%lx\n", + original_elf_size, load_address); + + // Extract symbol table and string table from original ELF + const char *symtab_data = NULL; + size_t symtab_size = 0; + const char *strtab_data = NULL; + size_t strtab_size = 0; + + // Parse original ELF to extract symbol and string tables + if (original_elf_size < sizeof(Elf_Ehdr)) { + fprintf(stderr, "ERROR: Original ELF too small for header\n"); + return NULL; + } + + const Elf_Ehdr *ehdr = (const Elf_Ehdr *) original_elf_data; + const Elf_Shdr *shdrs = (const Elf_Shdr *) (original_elf_data + ehdr->e_shoff); + + // Find .symtab and .strtab sections + for (int i = 0; i < ehdr->e_shnum; i++) { + if (shdrs[i].sh_type == SHT_SYMTAB) { + symtab_data = (const char *) original_elf_data + shdrs[i].sh_offset; + symtab_size = shdrs[i].sh_size; + } else if (shdrs[i].sh_type == SHT_STRTAB && i != ehdr->e_shstrndx) { + strtab_data = (const char *) original_elf_data + shdrs[i].sh_offset; + strtab_size = shdrs[i].sh_size; + } + } + + if (!symtab_data || !strtab_data) { + fprintf(stderr, "ERROR: Could not find symbol or string table in original ELF\n"); + return NULL; + } + + // Section name strings: "\0.text\0.symtab\0.strtab\0.shstrtab\0" + const char *section_names = "\0.text\0.symtab\0.strtab\0.shstrtab\0"; + size_t shstrtab_size = 32; // strlen of section_names + + // Calculate size of new minimal ELF (ELF header + 1 program header + 5 section headers + data) + size_t elf_size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (5 * sizeof(Elf_Shdr)) + symtab_size + strtab_size + shstrtab_size; + + uint8_t *new_elf = (uint8_t *) malloc(elf_size); + if (!new_elf) { + fprintf(stderr, "ERROR: Failed to allocate memory for new ELF\n"); + return NULL; + } + memset(new_elf, 0, elf_size); + + // Create ELF header + const Elf_Ehdr *orig_ehdr = (const Elf_Ehdr *) original_elf_data; + Elf_Ehdr *new_ehdr = (Elf_Ehdr *) new_elf; + memcpy(new_ehdr->e_ident, orig_ehdr->e_ident, 16); + new_ehdr->e_type = orig_ehdr->e_type; + new_ehdr->e_machine = orig_ehdr->e_machine; + new_ehdr->e_version = orig_ehdr->e_version; + new_ehdr->e_entry = 0; + new_ehdr->e_phoff = sizeof(Elf_Ehdr); + new_ehdr->e_shoff = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr); + new_ehdr->e_flags = orig_ehdr->e_flags; + new_ehdr->e_ehsize = sizeof(Elf_Ehdr); + new_ehdr->e_phentsize = sizeof(Elf_Phdr); + new_ehdr->e_phnum = 1; + new_ehdr->e_shentsize = sizeof(Elf_Shdr); + new_ehdr->e_shnum = 5; // null, .text, .symtab, .strtab, .shstrtab + new_ehdr->e_shstrndx = 4; // .shstrtab is the section name string table + + // Create program header (PT_LOAD segment) + Elf_Phdr *new_phdr = (Elf_Phdr *) (new_elf + sizeof(Elf_Ehdr)); + new_phdr->p_type = PT_LOAD; + new_phdr->p_flags = PF_R | PF_X; + + new_phdr->p_offset = 0; + new_phdr->p_vaddr = load_address; + new_phdr->p_paddr = load_address; + + // Find the actual .text section size from the original ELF + const Elf_Shdr *orig_shdrs = (const Elf_Shdr *) (original_elf_data + orig_ehdr->e_shoff); + + size_t code_size = 0; + + // Look for .text section in original ELF + for (int i = 0; i < orig_ehdr->e_shnum; i++) { + const Elf_Shdr *shdr = &orig_shdrs[i]; + if (shdr->sh_type == 1 && (shdr->sh_flags & 6) == 6) { // SHT_PROGBITS + SHF_ALLOC + SHF_EXECINSTR + code_size = shdr->sh_size; + break; + } + } + + if (code_size == 0) { + fprintf(stderr, "ERROR: Could not find .text section in original ELF\n"); + free(new_elf); + return NULL; + } + + new_phdr->p_filesz = code_size; // Size in file + new_phdr->p_memsz = code_size; // Size in memory + new_phdr->p_align = 1; + + // Create section headers + Elf_Shdr *new_shdrs = (Elf_Shdr *) (new_elf + sizeof(Elf_Ehdr) + sizeof(Elf_Phdr)); + size_t current_offset = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (5 * sizeof(Elf_Shdr)); + + // Section 0: null section (required) + new_shdrs[0] = (Elf_Shdr){ 0 }; + + // Section 1: .text section + new_shdrs[1].sh_name = 1; // ".text\0" at offset 1 in section names + new_shdrs[1].sh_type = 1; // SHT_PROGBITS + new_shdrs[1].sh_flags = 6; // SHF_ALLOC | SHF_EXECINSTR + new_shdrs[1].sh_addr = load_address; + new_shdrs[1].sh_offset = 0; // No actual .text data in this ELF, but debugger uses load_address + new_shdrs[1].sh_size = code_size; // Set proper size so debugger knows the extent + new_shdrs[1].sh_addralign = 1; + + // Section 2: .symtab + new_shdrs[2].sh_name = 7; // ".symtab\0" at offset 7 in section names + new_shdrs[2].sh_type = SHT_SYMTAB; + new_shdrs[2].sh_offset = current_offset; + new_shdrs[2].sh_size = symtab_size; + new_shdrs[2].sh_link = 3; // Points to .strtab + +#if TERM_BYTES == 8 + new_shdrs[2].sh_addralign = 8; +#else + new_shdrs[2].sh_addralign = 4; +#endif + + new_shdrs[2].sh_entsize = sizeof(Elf_Sym); + current_offset += symtab_size; + + // Section 3: .strtab + new_shdrs[3].sh_name = 15; // ".strtab\0" at offset 15 in section names + new_shdrs[3].sh_type = SHT_STRTAB; + new_shdrs[3].sh_offset = current_offset; + new_shdrs[3].sh_size = strtab_size; + new_shdrs[3].sh_addralign = 1; + current_offset += strtab_size; + + // Section 4: .shstrtab (section name string table) + new_shdrs[4].sh_name = 23; // ".shstrtab\0" at offset 23 in section names + new_shdrs[4].sh_type = SHT_STRTAB; + new_shdrs[4].sh_offset = current_offset; + new_shdrs[4].sh_size = shstrtab_size; + new_shdrs[4].sh_addralign = 1; + + // Copy symbol table data and patch symbol addresses + uint8_t *new_symtab = new_elf + new_shdrs[2].sh_offset; + memcpy(new_symtab, symtab_data, symtab_size); + + // With PT_LOAD program header, the debugger should automatically apply the base address + // Copy string table data + uint8_t *new_strtab = new_elf + new_shdrs[3].sh_offset; + memcpy(new_strtab, strtab_data, strtab_size); + + // Copy section name string table data + uint8_t *new_shstrtab = new_elf + new_shdrs[4].sh_offset; + memcpy(new_shstrtab, section_names, shstrtab_size); + + *new_elf_size = elf_size; + return new_elf; +} + +void jit_debug_register_code(Module *mod, const void *native_code, size_t native_size, ModuleNativeEntryPoint entry_point) +{ + UNUSED(mod); + + if (!native_code || native_size < 8) { + fprintf(stderr, "jit_debug_register_code: no native code or too small\n"); + return; + } + + // Parse the NativeCodeChunk header to find where the ELF starts + const uint8_t *data = (const uint8_t *) native_code; + uint32_t info_size = READ_32_UNALIGNED(data); + + if (info_size + 4 > native_size) { + fprintf(stderr, "jit_debug_register_code: invalid info_size\n"); + return; + } + + // Check if there's an ELF header after the NativeCodeChunk header + const uint8_t *elf_start = data + 4 + info_size; + size_t elf_size = native_size - (4 + info_size); + + if (elf_size < 16) { + fprintf(stderr, "jit_debug_register_code: no space for ELF header\n"); + return; + } + + // Check for ELF magic: 0x7f, 'E', 'L', 'F' + if (elf_start[0] != 0x7f || elf_start[1] != 'E' || elf_start[2] != 'L' || elf_start[3] != 'F') { + fprintf(stderr, "jit_debug_register_code: no ELF header found, not registering debug info\n"); + return; + } + + // Allocate memory for the JIT code entry (but not for the ELF data itself) + struct jit_code_entry *entry = malloc(sizeof(struct jit_code_entry)); + if (!entry) { + return; + } + + // Use the actual mapped entry point address as the load address + uintptr_t load_address = (uintptr_t) entry_point; + + // Create a minimal ELF file with proper symbols for debugging + size_t new_elf_size; + const uint8_t *new_elf = create_minimal_elf_for_debugging(elf_start, elf_size, load_address, &new_elf_size); + + if (!new_elf) { + fprintf(stderr, "ERROR: Failed to create minimal ELF for debugging\n"); + return; + } + + // Initialize the entry with the new ELF + entry->next_entry = NULL; + entry->prev_entry = NULL; + entry->symfile_addr = (const char *) new_elf; + entry->symfile_size = new_elf_size; + + // Add to GDB's linked list + if (__jit_debug_descriptor.first_entry) { + __jit_debug_descriptor.first_entry->prev_entry = entry; + entry->next_entry = __jit_debug_descriptor.first_entry; + } + __jit_debug_descriptor.first_entry = entry; + + // TODO: Store entry pointer in module for later unregistration + + // Notify GDB that new code has been registered + __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + __jit_debug_descriptor.relevant_entry = entry; + __jit_debug_register_code(); +} + +void jit_debug_unregister_code(Context *ctx, Module *mod) +{ + UNUSED(ctx); + UNUSED(mod); + + // TODO: Implement unregistration + // Need to store the jit_code_entry pointer in the module structure + // and retrieve it here to properly unregister +} + +#endif // AVM_NO_JIT_DWARF diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index c7764166dd..0bfbb7aad8 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -225,6 +225,34 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream); */ enum TrapAndLoadResult jit_trap_and_load(Context *ctx, Module *mod, uint32_t label); +#ifndef AVM_NO_JIT_DWARF +/** + * @brief Register JIT-compiled code with debug info with GDB/LLDB + * + * @details This function registers native code and associated DWARF debug + * information with the debugger using the GDB JIT interface. This allows + * debuggers to show function names and source line information for JIT code. + * + * @param mod The module containing the JIT code + * @param native_code Pointer to the native machine code + * @param native_size Size of the native code in bytes + * @param entry_point The actual mapped entry point address + */ +void jit_debug_register_code(Module *mod, const void *native_code, size_t native_size, ModuleNativeEntryPoint entry_point); + +/** + * @brief Unregister JIT-compiled code from debugger + * + * @details This function unregisters previously registered JIT code from + * the debugger. Should be called when a module is unloaded. + * + * @param ctx The context + * @param mod The module being unloaded + */ +void jit_debug_unregister_code(Context *ctx, Module *mod); + +#endif + #ifdef __cplusplus } #endif diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 3500992fb1..1b5528b522 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -349,6 +349,13 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary size_t offset = ENDIAN_SWAP_32(native_code->info_size) + ENDIAN_SWAP_32(native_code->architectures[arch_index].offset) + sizeof(native_code->info_size); ModuleNativeEntryPoint module_entry_point = sys_map_native_code((const uint8_t *) &native_code->info_size, ENDIAN_SWAP_32(native_code->size), offset); module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), module_entry_point); + +#ifndef AVM_NO_JIT_DWARF + // Register debug info with debugger (will check for embedded ELF) + const void *chunk_start = (const uint8_t *) &native_code->info_size; + size_t chunk_size = ENDIAN_SWAP_32(native_code->size); + jit_debug_register_code(mod, chunk_start, chunk_size, module_entry_point); +#endif break; } } @@ -466,6 +473,11 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary COLD_FUNC void module_destroy(Module *module) { +#ifndef AVM_NO_JIT_DWARF + // Unregister DWARF debug info from debugger if it was registered + jit_debug_unregister_code(NULL, module); +#endif + free(module->labels); free(module->imported_funcs); free(module->literals_table); diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index 267a4b3d29..6d6285886f 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -27,6 +27,7 @@ macro(jit_precompile module_name) ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${AVM_JIT_TARGET_ARCH}.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${AVM_JIT_TARGET_ARCH}_asm.beam ) diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 45473d9f10..6aa216b5dc 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -26,6 +26,7 @@ set(ERLANG_MODULES tests jit_tests jit_tests_common + jit_dwarf_tests jit_aarch64_tests jit_aarch64_asm_tests jit_armv6m_tests @@ -36,5 +37,9 @@ set(ERLANG_MODULES jit_x86_64_asm_tests ) -pack_archive(test_jit_lib ERLC_FLAGS -DTEST MODULES ${ERLANG_MODULES}) +if (NOT AVM_DISABLE_JIT_DWARF) + pack_archive(test_jit_lib ERLC_FLAGS -DTEST -DJIT_DWARF MODULES ${ERLANG_MODULES}) +else() + pack_archive(test_jit_lib ERLC_FLAGS -DTEST MODULES ${ERLANG_MODULES}) +endif() pack_eunit(test_jit estdlib eavmlib etest jit) diff --git a/tests/libs/jit/jit_dwarf_tests.erl b/tests/libs/jit/jit_dwarf_tests.erl new file mode 100644 index 0000000000..0588ae25f2 --- /dev/null +++ b/tests/libs/jit/jit_dwarf_tests.erl @@ -0,0 +1,276 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_dwarf_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include("../../../libs/jit/src/opcodes.hrl"). + +basic_dwarf_state_test() -> + % Create a basic DWARF state + State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024), + + % Add some test data + State1 = jit_dwarf:opcode(State, ?OP_FUNC_INFO), + State2 = jit_dwarf:function(State1, get_value, 2), + State3 = jit_dwarf:line(State2, 42), + + % Verify state contains our data + ?assert(is_tuple(State3)), + + % Test stream interface + Stream = jit_dwarf:stream(State3), + ?assert(is_binary(Stream)). + +elf_generation_test() -> + % Create state with some debug info + State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024), + % Some opcode + State1 = jit_dwarf:opcode(State, <<"test_opcode/2">>), + State2 = jit_dwarf:function(State1, test_func, 1), + State3 = jit_dwarf:line(State2, 100), + + % Generate ELF + case jit_dwarf:elf(State3, <<>>) of + false -> + ok; + {ok, ElfBinary, _ElfWithText} -> + % Verify ELF magic + <<127, $E, $L, $F, _Rest/binary>> = ElfBinary, + + % Verify ELF header structure + ?assert(byte_size(ElfBinary) >= 52), + + % Extract and verify key header fields + <<_Magic:4/binary, Class, Endian, _Version, _OSABI, _ABIVersion:8/binary, + Type:16/little, Machine:16/little, _ElfVersion:32/little, _Entry:32/little, + _PHOff:32/little, SHOff:32/little, _Flags:32/little, EHSize:16/little, + _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, SHNum:16/little, + _SHStrNdx:16/little, _/binary>> = ElfBinary, + + % Verify basic ELF structure + + % ELFCLASS32 + ?assertEqual(1, Class), + % ELFDATA2LSB + ?assertEqual(1, Endian), + % ET_REL + ?assertEqual(1, Type), + % EM_ARM + ?assertEqual(40, Machine), + % ELF header size + ?assertEqual(52, EHSize), + + % Verify we have the expected sections + + % null + 4 debug sections + shstrtab + ?assert(SHNum >= 6), + % Section headers after ELF header + ?assert(SHOff > 52), + + % Verify the ELF is complete (section headers exist) + + % Headers should exist + ExpectedMinSize = SHOff + (SHNum * 40), + ?assert(byte_size(ElfBinary) >= ExpectedMinSize) + end. + +section_header_test() -> + State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024), + State1 = jit_dwarf:function(State, main, 0), + + case jit_dwarf:elf(State1, <<>>) of + false -> + ok; + {ok, ElfBinary, _ElfWithText} -> + % Extract section header info from ELF header (parse full header) + <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little, + _Entry:32/little, _PHOff:32/little, SHOff:32/little, _Flags:32/little, + _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, + SHNum:16/little, _SHStrNdx:16/little, _Rest/binary>> = ElfBinary, + + % Verify we can read section headers + SectionHeadersSize = SHNum * 40, + SectionHeadersStart = SHOff, + + ?assert(byte_size(ElfBinary) >= SectionHeadersStart + SectionHeadersSize), + + % Extract first section header (should be null) + <<_:SectionHeadersStart/binary, NullHeader:40/binary, _/binary>> = ElfBinary, + % All zeros + ?assertEqual(<<0:320>>, NullHeader) + end. + +string_table_test() -> + State = jit_dwarf:new(jit_armv6m, string_test, jit_stream_binary, 1024), + + case jit_dwarf:elf(State, <<>>) of + false -> + ok; + {ok, ElfBinary, _ElfWithText} -> + % Find string table section - parse ELF header + <<_ElfMagic2:16/binary, _Type2:16/little, _Machine2:16/little, _Version2:32/little, + _Entry2:32/little, _PHOff2:32/little, SHOff:32/little, _Flags2:32/little, + _EHSize2:16/little, _PHEntSize2:16/little, _PHNum2:16/little, _SHEntSize2:16/little, + _SHNum:16/little, SHStrNdx:16/little, _Rest2/binary>> = ElfBinary, + + % Extract string table section header + StrTabHeaderOffset = SHOff + (SHStrNdx * 40), + <<_:StrTabHeaderOffset/binary, _StrName:32/little, _StrType:32/little, + _StrFlags:32/little, _StrAddr:32/little, StrOffset:32/little, StrSize:32/little, + _/binary>> = ElfBinary, + + % Extract string table data + <<_:StrOffset/binary, StringTable:StrSize/binary, _/binary>> = ElfBinary, + + % Verify string table contains expected section names + StrTabStrings = binary:split(StringTable, <<0>>, [global]), + + ?assert(lists:member(<<".debug_info">>, StrTabStrings)), + ?assert(lists:member(<<".shstrtab">>, StrTabStrings)) + end. + +elf_with_text_test() -> + % Test the new elf_with_text/2 function that creates complete ELF with .text section + State = jit_dwarf:new(jit_x86_64, test_module, jit_stream_binary, 1024), + + % Some dummy x86_64 native code (mov rax, 1; ret) + NativeCode = <<16#48, 16#c7, 16#c0, 16#01, 16#00, 16#00, 16#00, 16#c3>>, + + % Generate complete ELF with debug info and .text section + case jit_dwarf:elf(State, NativeCode) of + false -> + ok; + {ok, _DebugOnlyELF, CombinedELF} -> + % Verify ELF magic + <<127, $E, $L, $F, _Rest/binary>> = CombinedELF, + + % Parse ELF header to check section count (should be 9: null + 6 debug sections + .text + shstrtab) + <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little, + _Entry:32/little, _PHOff:32/little, _SHOff:32/little, _Flags:32/little, + _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, + SHNum:16/little, _SHStrNdx:16/little, _/binary>> = CombinedELF, + + % Should have 9 sections total + ?assertEqual(9, SHNum), + + % Verify the native code is present in the binary + ?assert(binary:match(CombinedELF, NativeCode) =/= nomatch), + + % Verify ELF is larger due to added .text section + {ok, DebugOnlyELF, _} = jit_dwarf:elf(State, <<>>), + ?assert(byte_size(CombinedELF) > byte_size(DebugOnlyELF)) + end. + +text_section_properties_test() -> + % Test that the .text section has proper properties + State = jit_dwarf:new(jit_aarch64, test_module, jit_stream_binary, 1024), + + % AArch64 native code (mov x0, #42; ret) + NativeCode = <<16#d2800540, 16#d65f03c0>>, + + case jit_dwarf:elf(State, NativeCode) of + false -> + ok; + {ok, _DebugOnlyELF, CombinedELF} -> + % Parse ELF to find .text section + <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little, + _Entry:32/little, _PHOff:32/little, SHOff:32/little, _Flags:32/little, + _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, + SHNum:16/little, SHStrNdx:16/little, RestOfFile/binary>> = CombinedELF, + + % Extract section headers + SectionHeadersStart = SHOff - 52, + <<_SectionData:SectionHeadersStart/binary, SectionHeaders/binary>> = RestOfFile, + + % Extract string table to find .text section by name + StringTableHeaderOffset = SHStrNdx * 40, + <<_:StringTableHeaderOffset/binary, _StrName:32/little, _StrType:32/little, + _StrFlags:32/little, _StrAddr:32/little, StrOffset:32/little, StrSize:32/little, + _/binary>> = SectionHeaders, + + % Extract string table content + StrTableFileOffset = StrOffset - 52, + <<_:StrTableFileOffset/binary, StringTable:StrSize/binary, _/binary>> = _SectionData, + + % Find .text section by scanning all section headers + TextSectionFound = find_text_section(SectionHeaders, StringTable, SHNum, 0), + ?assert(TextSectionFound =/= not_found), + + {TextType, TextFlags, TextSize, TextAddr} = TextSectionFound, + + % Verify .text section properties + SHT_PROGBITS = 1, + SHF_ALLOC = 2, + SHF_EXECINSTR = 4, + ExpectedFlags = SHF_ALLOC bor SHF_EXECINSTR, + + ?assertEqual(SHT_PROGBITS, TextType), + ?assertEqual(ExpectedFlags, TextFlags), + ?assertEqual(byte_size(NativeCode), TextSize), + % Should be 0 for relocatable + ?assertEqual(0, TextAddr) + end. + +different_architectures_test() -> + % Test elf_with_text with different JIT backends + Backends = [jit_x86_64, jit_aarch64, jit_armv6m], + % Simple nop instruction + NativeCode = <<16#90>>, + + lists:foreach( + fun(Backend) -> + State = jit_dwarf:new(Backend, test_module, jit_stream_binary, 1024), + case jit_dwarf:elf(State, NativeCode) of + false -> + ok; + {ok, _DebugOnlyELF, CombinedELF} -> + % Verify ELF magic and basic structure + <<127, $E, $L, $F, _Rest/binary>> = CombinedELF, + % Verify native code is present + ?assert(binary:match(CombinedELF, NativeCode) =/= nomatch) + end + end, + Backends + ). + +% Helper function to find .text section in ELF +find_text_section(_Headers, _StringTable, 0, _Index) -> + not_found; +find_text_section(Headers, StringTable, Remaining, Index) -> + HeaderOffset = Index * 40, + <<_:HeaderOffset/binary, NameOffset:32/little, Type:32/little, Flags:32/little, Addr:32/little, + _Offset:32/little, Size:32/little, _/binary>> = Headers, + + % Extract section name from string table + SectionName = extract_string_at_offset(StringTable, NameOffset), + + case SectionName of + <<".text">> -> + {Type, Flags, Size, Addr}; + _ -> + find_text_section(Headers, StringTable, Remaining - 1, Index + 1) + end. + +% Helper function to extract null-terminated string at given offset +extract_string_at_offset(StringTable, Offset) -> + <<_:Offset/binary, Rest/binary>> = StringTable, + [String | _] = binary:split(Rest, <<0>>, []), + String. diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index c7d88480d1..56206f7eac 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -100,13 +100,35 @@ 16#00, 16#00, 16#06, 16#00, 16#00, 16#00, 16#00, 16#41, 16#51, 16#61, 16#81, 16#91, 16#B1>> ). -compile_minimal_x86_64_test() -> +-ifdef(JIT_DWARF). +compile_stream_setup(CodeChunk) -> + Stream0 = jit_dwarf:new(jit_x86_64, test_module, jit_stream_binary, 0), + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, + Stream1 = jit_dwarf:append( + Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + ), + Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_dwarf, Stream1), + {LabelsCount, Stream2}. + +compile_stream_finalize(Stream3) -> + DwarfStream = jit_x86_64:stream(Stream3), + jit_dwarf:stream(DwarfStream). +-else. +compile_stream_setup(CodeChunk) -> Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_0, + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, Stream1 = jit_stream_binary:append( Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) ), Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + {LabelsCount, Stream2}. + +compile_stream_finalize(Stream3) -> + jit_x86_64:stream(Stream3). +-endif. + +compile_minimal_x86_64_test() -> + {LabelsCount, Stream2} = compile_stream_setup(?CODE_CHUNK_0), {_LabelsCount, Stream3} = jit:compile( ?CODE_CHUNK_0, fun(_) -> undefined end, @@ -115,7 +137,7 @@ compile_minimal_x86_64_test() -> jit_x86_64, Stream2 ), - Stream4 = jit_x86_64:stream(Stream3), + Stream4 = compile_stream_finalize(Stream3), <<16:32, LabelsCount:32, ?JIT_FORMAT_VERSION:16, 1:16, ?JIT_ARCH_X86_64:16, ?JIT_VARIANT_PIC:16, 0:32, Code/binary>> = Stream4, {JumpTable, _} = split_binary(Code, (LabelsCount + 1) * 5), @@ -148,13 +170,7 @@ backend_to_arch(jit_aarch64) -> ?JIT_ARCH_AARCH64; backend_to_arch(jit_armv6m) -> ?JIT_ARCH_ARMV6M. compile_stream_for_backend(Backend, CodeChunk, AtomChunk, TypeChunk) -> - Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, - Arch = backend_to_arch(Backend), - Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC) - ), - Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + {LabelsCount, Stream2} = compile_stream_setup(CodeChunk), AtomResolver = jit_precompile:atom_resolver(AtomChunk), LiteralResolver = fun(_) -> test_literal end, @@ -164,7 +180,7 @@ compile_stream_for_backend(Backend, CodeChunk, AtomChunk, TypeChunk) -> {LabelsCount, Stream3} = jit:compile( CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 ), - Backend:stream(Stream3). + compile_stream_finalize(Stream3). term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> CompiledCode = compile_stream_for_backend( diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index 2d130cad03..5411862592 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -27,6 +27,7 @@ start() -> etest:test([ jit_tests, + jit_dwarf_tests, jit_aarch64_tests, jit_aarch64_asm_tests, jit_armv6m_tests, From 75e515f9f7b5ef83b0fe9e3b95e3da2a3fa00ebb Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 18 Oct 2025 19:36:01 +0200 Subject: [PATCH 95/97] riscv32: Add DWARF support Signed-off-by: Paul Guyot --- libs/jit/src/jit_dwarf.erl | 4 +++- libs/jit/src/jit_riscv32.erl | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/libs/jit/src/jit_dwarf.erl b/libs/jit/src/jit_dwarf.erl index 10d482d0ee..535431881c 100644 --- a/libs/jit/src/jit_dwarf.erl +++ b/libs/jit/src/jit_dwarf.erl @@ -350,6 +350,7 @@ elf(_State, _NativeCode) -> -define(EM_ARM, 40). -define(EM_X86_64, 62). -define(EM_AARCH64, 183). +-define(EM_RISCV, 243). -define(SHT_PROGBITS, 1). -define(SHT_SYMTAB, 2). -define(SHT_STRTAB, 3). @@ -369,7 +370,8 @@ elf(_State, _NativeCode) -> %% Map JIT backend to ELF machine type backend_to_machine_type(jit_x86_64) -> ?EM_X86_64; backend_to_machine_type(jit_aarch64) -> ?EM_AARCH64; -backend_to_machine_type(jit_armv6m) -> ?EM_ARM. +backend_to_machine_type(jit_armv6m) -> ?EM_ARM; +backend_to_machine_type(jit_riscv32) -> ?EM_RISCV. %% Map JIT backend to ELF flags backend_to_elf_flags(jit_armv6m) -> diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 191fc3cdc8..e4bc9746ee 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -237,6 +237,8 @@ -define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]). -define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]). +-include("jit_backend_dwarf_impl.hrl"). + %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. %% sizeof(uintptr_t) From abd9f489138f03d3a11a14d4b631b3d314ce4cc4 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 23 Oct 2025 07:57:13 +0200 Subject: [PATCH 96/97] dwarf: WIP Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 18 +- libs/jit/src/jit_armv6m.erl | 18 +- libs/jit/src/jit_dwarf.erl | 432 +++++++++++++++++++++------ libs/jit/src/jit_riscv32.erl | 18 +- libs/jit/src/jit_x86_64.erl | 18 +- src/libAtomVM/jit.c | 553 +++++++++++++++++++++++++++++++++-- 6 files changed, 952 insertions(+), 105 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index a82731c73b..9b35569f5d 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -77,7 +77,8 @@ dwarf_opcode/2, dwarf_label/2, dwarf_function/3, - dwarf_line/2 + dwarf_line/2, + dwarf_ctx_register/0 ]). -endif. @@ -87,6 +88,10 @@ -include("primitives.hrl"). +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + %-define(ASSERT(Expr), true = Expr). -define(ASSERT(_Expr), ok). @@ -2338,3 +2343,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -spec add_label(state(), integer() | reference(), integer()) -> state(). add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (x0/r0 in aarch64) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_X0_REG_AARCH64. +-endif. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 921b1be1a8..7343473b18 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -77,7 +77,8 @@ dwarf_opcode/2, dwarf_label/2, dwarf_function/3, - dwarf_line/2 + dwarf_line/2, + dwarf_ctx_register/0 ]). -endif. @@ -87,6 +88,10 @@ -include("primitives.hrl"). +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + -define(ASSERT(Expr), true = Expr). %% ARMv6-M AAPCS32 ABI: r0-r3 are used for argument passing and return value. @@ -3160,3 +3165,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label -spec add_label(state(), integer() | reference(), integer()) -> state(). add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (r0 in ARM) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_R0_REG_ARMV6M. +-endif. diff --git a/libs/jit/src/jit_dwarf.erl b/libs/jit/src/jit_dwarf.erl index 535431881c..b68d20f681 100644 --- a/libs/jit/src/jit_dwarf.erl +++ b/libs/jit/src/jit_dwarf.erl @@ -20,6 +20,8 @@ -module(jit_dwarf). +-include("jit_dwarf.hrl"). + -record(dwarf, { % Backend module (jit_armv6m, etc.) backend :: module(), @@ -279,6 +281,7 @@ elf(#dwarf{module_name = ModuleName, backend = Backend} = State, NativeCode) -> DebugLineSection = generate_debug_line_section(State, SourceFile), DebugAbbrevSection = generate_debug_abbrev_section_with_opcodes(), DebugStrSection = generate_debug_str_section(State, SourceFile), + DebugArangesSection = generate_debug_aranges_section(State), % Generate symbol table sections for function names {SymtabSection, StrtabSection} = generate_symbol_table(State, Backend), @@ -289,6 +292,7 @@ elf(#dwarf{module_name = ModuleName, backend = Backend} = State, NativeCode) -> {<<".debug_line">>, DebugLineSection}, {<<".debug_abbrev">>, DebugAbbrevSection}, {<<".debug_str">>, DebugStrSection}, + {<<".debug_aranges">>, DebugArangesSection}, {<<".symtab">>, SymtabSection}, {<<".strtab">>, StrtabSection} ], @@ -315,58 +319,6 @@ elf(_State, _NativeCode) -> -ifdef(JIT_DWARF). -%% DWARF constants --define(DW_TAG_compile_unit, 16#11). --define(DW_TAG_subprogram, 16#2e). --define(DW_TAG_lexical_block, 16#0b). --define(DW_TAG_label, 16#0a). --define(DW_AT_name, 16#03). --define(DW_AT_comp_dir, 16#1b). --define(DW_AT_producer, 16#25). --define(DW_AT_language, 16#13). --define(DW_AT_low_pc, 16#11). --define(DW_AT_high_pc, 16#12). --define(DW_AT_stmt_list, 16#10). --define(DW_FORM_string, 16#08). --define(DW_FORM_addr, 16#01). --define(DW_FORM_data4, 16#06). --define(DW_FORM_data1, 16#0b). --define(DW_FORM_udata, 16#0f). --define(DW_LANG_C, 16#02). --define(DW_LANG_Erlang, 16#46). --define(DW_LANG_Elixir, 16#47). --define(DW_LANG_Gleam, 16#48). - -%% ELF constants --define(EI_MAG0, 16#7f). --define(EI_MAG1, $E). --define(EI_MAG2, $L). --define(EI_MAG3, $F). --define(ELFCLASS32, 1). --define(ELFCLASS64, 2). --define(ELFDATA2LSB, 1). --define(EV_CURRENT, 1). --define(ET_REL, 1). --define(EM_ARM, 40). --define(EM_X86_64, 62). --define(EM_AARCH64, 183). --define(EM_RISCV, 243). --define(SHT_PROGBITS, 1). --define(SHT_SYMTAB, 2). --define(SHT_STRTAB, 3). --define(SHT_ARM_ATTRIBUTES, 16#70000003). --define(SHF_ALLOC, 2). --define(SHF_EXECINSTR, 4). - -%% ARM EABI flags - -% EABI version 5 --define(EF_ARM_EABI_VER5, 16#05000000). -% Soft float ABI --define(EF_ARM_ABI_FLOAT_SOFT, 16#00000200). -% ARM architecture v6-M (Thumb-only) --define(EF_ARM_ARCH_V6M, 16#00000009). - %% Map JIT backend to ELF machine type backend_to_machine_type(jit_x86_64) -> ?EM_X86_64; backend_to_machine_type(jit_aarch64) -> ?EM_AARCH64; @@ -477,6 +429,52 @@ generate_debug_str_section(#dwarf{module_name = ModuleName}, SourceFile) -> ], iolist_to_binary(Strings). +generate_debug_aranges_section(#dwarf{backend = Backend} = State) -> + % Get word size and calculate address range + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + {LowPC, HighPC} = calculate_address_range(State), + Length = HighPC - LowPC, + + % Calculate padding needed to align descriptor to 2*address_size + % Header so far: version(2) + debug_info_offset(4) + addr_size(1) + seg_size(1) = 8 bytes + % Need to align to 2*WordSize boundary + HeaderSize = 8, + TupleAlignment = 2 * WordSize, + PaddingSize = (TupleAlignment - (HeaderSize rem TupleAlignment)) rem TupleAlignment, + Padding = <<0:(PaddingSize*8)/little>>, + + % Header + Header = << + % DWARF version + 2:16/little, + % Debug info offset (always 0 - first compile unit) + 0:32/little, + % Address size + WordSize, + % Segment size (0 for flat address space) + 0 + >>, + + % Address descriptors + Descriptors = << + % Address range descriptor + LowPC:WordSizeInBits/little, % Start address + Length:WordSizeInBits/little, % Length + % Terminating entry (two zero addresses) + 0:WordSizeInBits/little, + 0:WordSizeInBits/little + >>, + + % Combine all parts + HeaderAndTable = <
>, + + % Calculate total length (header + table - 4 for the length field itself) + TotalLength = byte_size(HeaderAndTable), + + % Build final section with length prefix + <>. + generate_debug_abbrev_section_with_opcodes() -> % Abbreviation table << @@ -508,7 +506,7 @@ generate_debug_abbrev_section_with_opcodes() -> ?DW_FORM_addr, % Statement list ?DW_AT_stmt_list, - ?DW_FORM_data4, + ?DW_FORM_sec_offset, % End of attributes 0, 0, @@ -552,8 +550,8 @@ generate_debug_abbrev_section_with_opcodes() -> 4, % Tag ?DW_TAG_subprogram, - % Has no children - 0, + % Has children (ctx parameter) + 1, % Name attribute (module:function/arity) ?DW_AT_name, ?DW_FORM_string, @@ -567,18 +565,144 @@ generate_debug_abbrev_section_with_opcodes() -> 0, 0, + % Abbrev 5: DW_TAG_formal_parameter (for ctx parameter with type) + % Abbreviation code + 5, + % Tag + ?DW_TAG_formal_parameter, + % Has no children + 0, + % Name attribute (parameter name) + ?DW_AT_name, + ?DW_FORM_string, + % Type attribute (reference to type DIE) + ?DW_AT_type, + ?DW_FORM_ref4, + % Location attribute (register location) + ?DW_AT_location, + ?DW_FORM_exprloc, + % End of attributes + 0, + 0, + + % Abbrev 6: DW_TAG_base_type (for term/uintptr_t) + % Abbreviation code + 6, + % Tag + ?DW_TAG_base_type, + % Has no children + 0, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Byte size + ?DW_AT_byte_size, + ?DW_FORM_data1, + % Encoding + ?DW_AT_encoding, + ?DW_FORM_data1, + % End of attributes + 0, + 0, + + % Abbrev 7: DW_TAG_pointer_type (for Context*) + % Abbreviation code + 7, + % Tag + ?DW_TAG_pointer_type, + % Has no children + 0, + % Byte size + ?DW_AT_byte_size, + ?DW_FORM_data1, + % Type attribute (points to Context structure) + ?DW_AT_type, + ?DW_FORM_ref4, + % End of attributes + 0, + 0, + + % Abbrev 8: DW_TAG_structure_type (for Context) + % Abbreviation code + 8, + % Tag + ?DW_TAG_structure_type, + % Has children (members) + 1, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Byte size + ?DW_AT_byte_size, + ?DW_FORM_data4, + % End of attributes + 0, + 0, + + % Abbrev 9: DW_TAG_member (for structure members) + % Abbreviation code + 9, + % Tag + ?DW_TAG_member, + % Has no children + 0, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Type attribute + ?DW_AT_type, + ?DW_FORM_ref4, + % Data member location (offset from structure start) + ?DW_AT_data_member_location, + ?DW_FORM_data4, + % End of attributes + 0, + 0, + + % Abbrev 10: DW_TAG_array_type (for term x[MAX_REG+1]) + % Abbreviation code + 10, + % Tag + ?DW_TAG_array_type, + % Has children (subrange) + 1, + % Type attribute (element type) + ?DW_AT_type, + ?DW_FORM_ref4, + % End of attributes + 0, + 0, + + % Abbrev 11: DW_TAG_subrange_type (for array bounds) + % Abbreviation code + 11, + % Tag + ?DW_TAG_subrange_type, + % Has no children + 0, + % Upper bound + ?DW_AT_upper_bound, + ?DW_FORM_data1, + % End of attributes + 0, + 0, + % End of abbreviations 0 >>. generate_debug_info_section_with_opcodes( - #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName} = + #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName, backend = Backend} = State, SourceFile ) -> % Calculate address ranges {LowPC, HighPC} = calculate_address_range(State), + % Get word size from backend and convert to bits + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + % Build content first to calculate actual length CompileUnitContent = << % DWARF version @@ -586,7 +710,7 @@ generate_debug_info_section_with_opcodes( % Abbreviation offset 0:32/little, % Address size - 4, + WordSize, % Compilation unit DIE (abbreviation 1) 1, % DW_AT_name @@ -601,24 +725,35 @@ generate_debug_info_section_with_opcodes( % DW_AT_language ?DW_LANG_Erlang:32/little, % for now, we always say Erlang % DW_AT_low_pc - LowPC:32/little, + LowPC:WordSizeInBits/little, % DW_AT_high_pc - HighPC:32/little, + HighPC:WordSizeInBits/little, % DW_AT_stmt_list (offset into .debug_line) 0:32/little >>, + % Calculate base offset for type DIEs + % DW_FORM_ref4 offsets are relative to start of compile unit (the length field itself) + % So we need to add 4 bytes for the length field + % CompileUnitContent already includes the header (version + abbrev_offset + addr_size) + TypeDIEsBaseOffset = 4 + byte_size(CompileUnitContent), + io:format("DEBUG CU: ContentSize=~p + 4 (length) = ~p~n", + [byte_size(CompileUnitContent), TypeDIEsBaseOffset]), + + % Generate type DIEs and get the Context* type offset + {TypeDIEs, ContextPtrTypeOffset} = generate_type_dies(State, TypeDIEsBaseOffset), + % Generate DIEs for functions, opcodes and labels - FunctionDIEs = generate_function_dies_with_module(Functions, ModuleName), - OpcodeDIEs = generate_opcode_dies(Opcodes), - LabelDIEs = generate_label_dies(Labels), + FunctionDIEs = generate_function_dies_with_module(Functions, ModuleName, State, ContextPtrTypeOffset, HighPC), + OpcodeDIEs = generate_opcode_dies(Opcodes, Backend), + LabelDIEs = generate_label_dies(Labels, Backend), % End of children marker EndMarker = <<0>>, % Calculate actual unit length (everything after the length field) Content = - <>, UnitLength = byte_size(Content), @@ -739,10 +874,18 @@ generate_debug_line_section(#dwarf{lines = Lines, opcodes = _Opcodes}, SourceFil FileMapping = lists:zip(UniqueFullPaths, lists:seq(1, length(FileEntries))), Program = generate_line_program(Lines, FileMapping), - % Calculate actual header length (everything from version to end of file table) + % Calculate actual header length (everything after header_length field to end of file table) HeaderPlusTablesContent = <>, - % -4 for header_length field itself - HeaderLength = byte_size(HeaderContent) - 4 + byte_size(HeaderPlusTablesContent), + % -6 to exclude version (2 bytes) and header_length field itself (4 bytes) + DebugHeaderContentSize = byte_size(HeaderContent), + DebugStdOpcodeSize = byte_size(StdOpcodeLengths), + DebugFileTableSize = byte_size(FileTable), + DebugHeaderPlusTablesSize = byte_size(HeaderPlusTablesContent), + io:format("DEBUG: HeaderContent=~p StdOpcodes=~p FileTable=~p HeaderPlusTables=~p~n", + [DebugHeaderContentSize, DebugStdOpcodeSize, DebugFileTableSize, DebugHeaderPlusTablesSize]), + HeaderLength = byte_size(HeaderContent) - 6 + byte_size(HeaderPlusTablesContent), + io:format("DEBUG: HeaderLength = ~p - 6 + ~p = ~p~n", + [DebugHeaderContentSize, DebugHeaderPlusTablesSize, HeaderLength]), % Build corrected header with actual length CorrectedHeader = << @@ -1014,27 +1157,120 @@ encode_sleb128_negative(Value) -> <> end. +%% Generate type DIEs for Context structure and return the Context* type offset +generate_type_dies(#dwarf{backend = Backend}, BaseOffset) -> + % Get word size from backend + WordSize = Backend:word_size(), + + % Abbrev 6: term base type (uintptr_t) + TermTypeDIE = << + 6, % Abbreviation code + "term", 0, % Name + WordSize, % Byte size + ?DW_ATE_unsigned % Encoding (unsigned) + >>, + TermTypeOffset = BaseOffset, + io:format("DEBUG TYPE OFFSETS: Base=~p Term=~p~n", [BaseOffset, TermTypeOffset]), + + % Abbrev 10: Array type for x[MAX_REG+1] (term x[17]) + % Abbrev 11: Subrange type + XArraySubrangeDIE = << + 11, % Abbreviation code + 16 % Upper bound (MAX_REG = 16, so array is [0..16]) + >>, + XArrayTypeDIE = << + 10, % Abbreviation code + TermTypeOffset:32/little, % Type (term) + XArraySubrangeDIE/binary, + 0 % End of children + >>, + XArrayTypeOffset = BaseOffset + byte_size(TermTypeDIE), + + % Abbrev 8: Context structure type + % Only include the x array member for now (most important for debugging) + XOffset = case Backend of + jit_x86_64 -> 16#30; + jit_aarch64 -> 16#30; + _ -> 16#18 % riscv32 and armv6m + end, + XMemberDIE = << + 9, % Abbreviation code + "x", 0, % Name + XArrayTypeOffset:32/little, % Type (term array) + XOffset:32/little % Data member location + >>, + % Estimate Context size (actual size varies, but this is good enough) + ContextSize = 512, + ContextStructDIE = << + 8, % Abbreviation code + "Context", 0, % Name + ContextSize:32/little, % Byte size + XMemberDIE/binary, + 0 % End of children + >>, + ContextStructOffset = BaseOffset + byte_size(TermTypeDIE) + byte_size(XArrayTypeDIE), + + % Abbrev 7: Context* pointer type + ContextPtrTypeDIE = << + 7, % Abbreviation code + WordSize, % Byte size + ContextStructOffset:32/little % Type (Context) + >>, + ContextPtrTypeOffset = BaseOffset + byte_size(TermTypeDIE) + byte_size(XArrayTypeDIE) + byte_size(ContextStructDIE), + + % Combine all type DIEs + AllTypes = <>, + + {AllTypes, ContextPtrTypeOffset}. + %% Generate DIEs for functions as DW_TAG_subprogram with module:func/arity naming -generate_function_dies_with_module(Functions, ModuleName) -> +generate_function_dies_with_module(Functions, ModuleName, #dwarf{backend = Backend}, ContextPtrTypeOffset, CodeSize) -> % Filter and sort functions by address ValidFunctions = lists:sort([ {Offset, FunctionName, Arity} || {Offset, FunctionName, Arity} <- Functions, Offset >= 0 ]), + % Calculate function sizes by finding the next function's offset + % For the last function, use CodeSize to determine its end + FunctionsWithSizes = case ValidFunctions of + [] -> []; + _ -> + lists:zipwith( + fun({Offset, Name, Arity}, NextFunc) -> + Size = case NextFunc of + {NextOffset, _, _} -> NextOffset - Offset; + end_of_code -> CodeSize - Offset % Last function extends to end of code + end, + {Offset, Name, Arity, Size} + end, + ValidFunctions, + tl(ValidFunctions) ++ [end_of_code] + ) + end, + % Generate DIE for each function FunctionDIEsList = [ - generate_function_die_with_module(Offset, FunctionName, Arity, ModuleName) - || {Offset, FunctionName, Arity} <- ValidFunctions + generate_function_die_with_module(Offset, FunctionName, Arity, Size, ModuleName, Backend, ContextPtrTypeOffset) + || {Offset, FunctionName, Arity, Size} <- FunctionsWithSizes ], iolist_to_binary(FunctionDIEsList). %% Generate DIE for a single function with module name -generate_function_die_with_module(Offset, FunctionName, Arity, ModuleName) -> +generate_function_die_with_module(Offset, FunctionName, Arity, FunctionSize, ModuleName, Backend, ContextPtrTypeOffset) -> % Create module:function/arity format FunctionString = list_to_binary(io_lib:format("~s:~s/~B", [ModuleName, FunctionName, Arity])), - % Estimate function size (can be improved later) - FunctionSize = 100, + + % Get the DWARF register number for ctx from the backend + CtxRegNum = Backend:dwarf_ctx_register(), + + % Generate ctx parameter DIE + CtxParamDIE = generate_ctx_parameter_die(CtxRegNum, ContextPtrTypeOffset), + + % Get word size for addresses and convert to bits + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + << % Abbreviation code (4 = DW_TAG_subprogram) 4, @@ -1042,23 +1278,52 @@ generate_function_die_with_module(Offset, FunctionName, Arity, ModuleName) -> FunctionString/binary, 0, % DW_AT_low_pc - Offset:32/little, + Offset:WordSizeInBits/little, % DW_AT_high_pc (low_pc + size) - (Offset + FunctionSize):32/little + (Offset + FunctionSize):WordSizeInBits/little, + % Child: ctx parameter + CtxParamDIE/binary, + % End of children marker + 0 + >>. + +%% Generate DIE for ctx parameter +generate_ctx_parameter_die(CtxRegNum, ContextPtrTypeOffset) -> + % DW_FORM_exprloc requires a ULEB128 length followed by the expression + % Expression: DW_OP_reg0 + register_number (single byte) + % DW_OP_regN means the value is in register N + RegOpcode = ?DW_OP_reg0 + CtxRegNum, + LocationExpr = <>, + LocationExprLen = encode_uleb128(byte_size(LocationExpr)), + + << + % Abbreviation code (5 = DW_TAG_formal_parameter) + 5, + % DW_AT_name + "ctx", + 0, + % DW_AT_type (reference to Context* type) + ContextPtrTypeOffset:32/little, + % DW_AT_location (exprloc: length + expression) + LocationExprLen/binary, + LocationExpr/binary >>. %% Generate DIEs for opcodes as DW_TAG_lexical_block -generate_opcode_dies(Opcodes) -> +generate_opcode_dies(Opcodes, Backend) -> % Filter and sort opcodes by address ValidOpcodes = lists:sort([{Offset, Opcode} || {Offset, Opcode} <- Opcodes, Offset >= 0]), % Generate DIE for each opcode - OpcodeDIEsList = [generate_opcode_die(Offset, Opcode) || {Offset, Opcode} <- ValidOpcodes], + OpcodeDIEsList = [generate_opcode_die(Offset, Opcode, Backend) || {Offset, Opcode} <- ValidOpcodes], iolist_to_binary(OpcodeDIEsList). %% Generate DIE for a single opcode -generate_opcode_die(Offset, Opcode) -> +generate_opcode_die(Offset, Opcode, Backend) -> OpcodeString = list_to_binary(io_lib:format("~s@~B", [Opcode, Offset])), + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + << % Abbreviation code (2 = DW_TAG_lexical_block) 2, @@ -1066,21 +1331,24 @@ generate_opcode_die(Offset, Opcode) -> OpcodeString/binary, 0, % DW_AT_low_pc - Offset:32/little + Offset:WordSizeInBits/little >>. %% Generate DIEs for labels as DW_TAG_label -generate_label_dies(Labels) -> +generate_label_dies(Labels, Backend) -> % Filter and sort labels by address ValidLabels = lists:sort([{Offset, Label} || {Offset, Label} <- Labels, Offset >= 0]), % Generate DIE for each label - LabelDIEsList = [generate_label_die(Offset, Label) || {Offset, Label} <- ValidLabels], + LabelDIEsList = [generate_label_die(Offset, Label, Backend) || {Offset, Label} <- ValidLabels], iolist_to_binary(LabelDIEsList). %% Generate DIE for a single label -generate_label_die(Offset, Label) -> +generate_label_die(Offset, Label, Backend) -> LabelString = list_to_binary(io_lib:format("label_~B", [Label])), + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + << % Abbreviation code (3 = DW_TAG_label) 3, @@ -1088,7 +1356,7 @@ generate_label_die(Offset, Label) -> LabelString/binary, 0, % DW_AT_low_pc - Offset:32/little + Offset:WordSizeInBits/little >>. %% Generate symbol table for function names and opcode symbols diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index e4bc9746ee..f27bc35e40 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -78,7 +78,8 @@ dwarf_opcode/2, dwarf_label/2, dwarf_function/3, - dwarf_line/2 + dwarf_line/2, + dwarf_ctx_register/0 ]). -endif. @@ -88,6 +89,10 @@ -include("primitives.hrl"). +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + -define(ASSERT(Expr), true = Expr). %% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers). @@ -3057,3 +3062,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label -spec add_label(state(), integer() | reference(), integer()) -> state(). add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (a0 in RISC-V) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_A0_REG_RISCV32. +-endif. diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 332ae3c25a..9ef6c6441f 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -77,7 +77,8 @@ dwarf_opcode/2, dwarf_label/2, dwarf_function/3, - dwarf_line/2 + dwarf_line/2, + dwarf_ctx_register/0 ]). -endif. @@ -87,6 +88,10 @@ -include("primitives.hrl"). +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + -define(ASSERT(Expr), true = Expr). %% System V X86_64 calling conventions which we apply here. @@ -2067,3 +2072,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -spec add_label(state(), integer() | reference(), integer()) -> state(). add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (rdi in x86_64) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_RDI_REG_X86_64. +-endif. diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index bfae4a2d40..060bd01974 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -165,9 +165,12 @@ typedef struct #define PF_X 1 // Execute #define PF_R 4 // Read +// ELF symbol type extraction +#define ELF_ST_TYPE(info) ((info) & 0xf) + #endif -//#define ENABLE_TRACE +#define ENABLE_TRACE #include "trace.h" // Verify matching atom index in default_atoms.hrl @@ -1928,6 +1931,296 @@ void __attribute__((noinline)) __jit_debug_register_code(void) // GDB will set a breakpoint here } +// DWARF parsing helpers for address patching + +// Read unsigned LEB128 (used in DWARF for variable-length integers) +static size_t read_uleb128(const uint8_t *data, size_t *offset, uint64_t *value) +{ + *value = 0; + int shift = 0; + size_t start = *offset; + + while (1) { + uint8_t byte = data[(*offset)++]; + *value |= ((uint64_t)(byte & 0x7f)) << shift; + if ((byte & 0x80) == 0) { + break; + } + shift += 7; + } + + return *offset - start; +} + +// Structure to hold parsed abbreviation entry +typedef struct { + uint64_t code; + uint64_t tag; + uint8_t has_children; + // Attributes stored as pairs of (name, form) + uint64_t *attrs; // Dynamic array of attribute name/form pairs + size_t attr_count; +} dwarf_abbrev_t; + +// Parse a single abbreviation from .debug_abbrev +static bool parse_abbrev(const uint8_t *abbrev_data, size_t abbrev_size, size_t *offset, dwarf_abbrev_t *abbrev) +{ + if (*offset >= abbrev_size) { + return false; + } + + // Read abbreviation code + read_uleb128(abbrev_data, offset, &abbrev->code); + if (abbrev->code == 0) { + return false; // End of abbreviation table + } + + // Read tag + read_uleb128(abbrev_data, offset, &abbrev->tag); + + // Read has_children flag + abbrev->has_children = abbrev_data[(*offset)++]; + + // Count attributes first + size_t temp_offset = *offset; + size_t count = 0; + while (temp_offset < abbrev_size) { + uint64_t name, form; + read_uleb128(abbrev_data, &temp_offset, &name); + read_uleb128(abbrev_data, &temp_offset, &form); + if (name == 0 && form == 0) { + break; + } + count++; + } + + // Allocate and read attributes + abbrev->attr_count = count; + if (count > 0) { + abbrev->attrs = malloc(count * 2 * sizeof(uint64_t)); + for (size_t i = 0; i < count; i++) { + read_uleb128(abbrev_data, offset, &abbrev->attrs[i * 2]); // name + read_uleb128(abbrev_data, offset, &abbrev->attrs[i * 2 + 1]); // form + } + } else { + abbrev->attrs = NULL; + } + + // Skip terminator (0, 0) + (*offset) += 2; + + return true; +} + +// Parse all abbreviations from .debug_abbrev +static dwarf_abbrev_t *parse_abbrev_table(const uint8_t *abbrev_data, size_t abbrev_size, size_t *count) +{ + // First pass: count abbreviations + size_t offset = 0; + size_t abbrev_count = 0; + + while (offset < abbrev_size) { + uint64_t code; + read_uleb128(abbrev_data, &offset, &code); + if (code == 0) { + break; + } + + // Skip tag + uint64_t tag; + read_uleb128(abbrev_data, &offset, &tag); + offset++; // has_children + + // Skip attributes + while (offset < abbrev_size) { + uint64_t name, form; + read_uleb128(abbrev_data, &offset, &name); + read_uleb128(abbrev_data, &offset, &form); + if (name == 0 && form == 0) { + break; + } + } + + abbrev_count++; + } + + if (abbrev_count == 0) { + *count = 0; + return NULL; + } + + // Second pass: parse abbreviations + dwarf_abbrev_t *abbrevs = calloc(abbrev_count, sizeof(dwarf_abbrev_t)); + offset = 0; + size_t i = 0; + + while (offset < abbrev_size && i < abbrev_count) { + if (!parse_abbrev(abbrev_data, abbrev_size, &offset, &abbrevs[i])) { + break; + } + i++; + } + + *count = i; + return abbrevs; +} + +// Free abbreviation table +static void free_abbrev_table(dwarf_abbrev_t *abbrevs, size_t count) +{ + for (size_t i = 0; i < count; i++) { + free(abbrevs[i].attrs); + } + free(abbrevs); +} + +// Find abbreviation by code +static const dwarf_abbrev_t *find_abbrev(const dwarf_abbrev_t *abbrevs, size_t count, uint64_t code) +{ + for (size_t i = 0; i < count; i++) { + if (abbrevs[i].code == code) { + return &abbrevs[i]; + } + } + return NULL; +} + +// Get size of a DWARF form value +static size_t get_form_size(uint64_t form, uint8_t addr_size, const uint8_t *data, size_t offset) +{ + switch (form) { + case 0x01: // DW_FORM_addr + return addr_size; + case 0x03: // DW_FORM_block2 + return 2 + (data[offset] | (data[offset + 1] << 8)); + case 0x04: // DW_FORM_block4 + return 4 + (data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | (data[offset + 3] << 24)); + case 0x05: // DW_FORM_data2 + return 2; + case 0x06: // DW_FORM_data4 + return 4; + case 0x07: // DW_FORM_data8 + return 8; + case 0x08: // DW_FORM_string + return strlen((const char *)&data[offset]) + 1; + case 0x09: // DW_FORM_block + case 0x18: // DW_FORM_exprloc + // Variable length - LEB128 size followed by data + { + uint64_t block_len; + size_t temp = offset; + size_t leb_size = read_uleb128(data, &temp, &block_len); + return leb_size + block_len; // LEB128 size + block data + } + case 0x0f: // DW_FORM_udata + // Just a LEB128 value + { + uint64_t val; + size_t temp = offset; + return read_uleb128(data, &temp, &val); + } + case 0x13: // DW_FORM_ref4 + return 4; + case 0x0b: // DW_FORM_data1 + return 1; + case 0x0e: // DW_FORM_strp + return 4; + case 0x10: // DW_FORM_ref_addr + return addr_size; + case 0x11: // DW_FORM_ref1 + return 1; + case 0x12: // DW_FORM_ref2 + return 2; + case 0x14: // DW_FORM_ref8 + return 8; + case 0x17: // DW_FORM_sec_offset + return 4; + case 0x19: // DW_FORM_flag_present + return 0; + default: + TRACE("Unknown DWARF form: 0x%llx\n", (unsigned long long)form); + return 0; + } +} + +// Patch addresses in .debug_info using parsed abbreviations +static void patch_debug_info_addresses(uint8_t *debug_info, size_t debug_info_size, + const dwarf_abbrev_t *abbrevs, size_t abbrev_count, + uintptr_t load_address) +{ + if (debug_info_size < 11) { + return; + } + + // Parse compile unit header + uint8_t addr_size = debug_info[10]; + TRACE("Patching .debug_info with addr_size=%d\n", addr_size); + + // Skip: length(4) + version(2) + abbrev_offset(4) + addr_size(1) = 11 bytes + size_t offset = 11; + int patch_count = 0; + + // Parse DIEs + while (offset < debug_info_size) { + uint64_t abbrev_code; + size_t code_size = read_uleb128(debug_info, &offset, &abbrev_code); + + if (abbrev_code == 0) { + // Null DIE - end of siblings + continue; + } + + const dwarf_abbrev_t *abbrev = find_abbrev(abbrevs, abbrev_count, abbrev_code); + if (!abbrev) { + TRACE("Warning: Unknown abbreviation code %llu at offset %zu\n", + (unsigned long long)abbrev_code, offset - code_size); + break; + } + + // Process attributes + for (size_t i = 0; i < abbrev->attr_count; i++) { + uint64_t attr_name = abbrev->attrs[i * 2]; + uint64_t attr_form = abbrev->attrs[i * 2 + 1]; + + // Check if this is an address attribute (DW_FORM_addr) + if (attr_form == 0x01) { // DW_FORM_addr + // This is an address - patch it + if (addr_size == 8) { + uint64_t *addr = (uint64_t *)&debug_info[offset]; + uint64_t old_val = *addr; + *addr += load_address; + TRACE("Patched .debug_info[%zu] (attr 0x%llx): 0x%llx -> 0x%llx\n", + offset, (unsigned long long)attr_name, (unsigned long long)old_val, (unsigned long long)*addr); + patch_count++; + } else if (addr_size == 4) { + uint32_t *addr = (uint32_t *)&debug_info[offset]; + uint32_t old_val = *addr; + *addr += (uint32_t)load_address; + TRACE("Patched .debug_info[%zu] (attr 0x%llx): 0x%x -> 0x%x\n", + offset, (unsigned long long)attr_name, old_val, *addr); + patch_count++; + } + } + + // Skip to next attribute + size_t form_size = get_form_size(attr_form, addr_size, debug_info, offset); + if (form_size == 0) { + TRACE("Failed to get form size for form 0x%llx at offset %zu\n", + (unsigned long long)attr_form, offset); + return; + } + offset += form_size; + + if (offset > debug_info_size) { + TRACE("Offset exceeded debug_info size\n"); + return; + } + } + } + + TRACE("Total .debug_info patches: %d\n", patch_count); +} + // Create a minimal ELF file for debugging with proper PIE support static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_data, size_t original_elf_size, uintptr_t load_address, size_t *new_elf_size) @@ -1935,13 +2228,23 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat TRACE("create_minimal_elf_for_debugging: original_elf_size=%zu, load_address=0x%lx\n", original_elf_size, load_address); - // Extract symbol table and string table from original ELF + // Extract symbol table, string table, and DWARF sections from original ELF const char *symtab_data = NULL; size_t symtab_size = 0; const char *strtab_data = NULL; size_t strtab_size = 0; - - // Parse original ELF to extract symbol and string tables + const char *debug_info_data = NULL; + size_t debug_info_size = 0; + const char *debug_line_data = NULL; + size_t debug_line_size = 0; + const char *debug_abbrev_data = NULL; + size_t debug_abbrev_size = 0; + const char *debug_str_data = NULL; + size_t debug_str_size = 0; + const char *debug_aranges_data = NULL; + size_t debug_aranges_size = 0; + + // Parse original ELF to extract symbol, string, and DWARF tables if (original_elf_size < sizeof(Elf_Ehdr)) { fprintf(stderr, "ERROR: Original ELF too small for header\n"); return NULL; @@ -1949,15 +2252,33 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat const Elf_Ehdr *ehdr = (const Elf_Ehdr *) original_elf_data; const Elf_Shdr *shdrs = (const Elf_Shdr *) (original_elf_data + ehdr->e_shoff); + const char *shstrtab = (const char *) (original_elf_data + shdrs[ehdr->e_shstrndx].sh_offset); - // Find .symtab and .strtab sections + // Find .symtab, .strtab, and .debug_* sections for (int i = 0; i < ehdr->e_shnum; i++) { + const char *section_name = shstrtab + shdrs[i].sh_name; + if (shdrs[i].sh_type == SHT_SYMTAB) { symtab_data = (const char *) original_elf_data + shdrs[i].sh_offset; symtab_size = shdrs[i].sh_size; } else if (shdrs[i].sh_type == SHT_STRTAB && i != ehdr->e_shstrndx) { strtab_data = (const char *) original_elf_data + shdrs[i].sh_offset; strtab_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_info") == 0) { + debug_info_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_info_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_line") == 0) { + debug_line_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_line_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_abbrev") == 0) { + debug_abbrev_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_abbrev_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_str") == 0) { + debug_str_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_str_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_aranges") == 0) { + debug_aranges_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_aranges_size = shdrs[i].sh_size; } } @@ -1966,12 +2287,25 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat return NULL; } - // Section name strings: "\0.text\0.symtab\0.strtab\0.shstrtab\0" - const char *section_names = "\0.text\0.symtab\0.strtab\0.shstrtab\0"; - size_t shstrtab_size = 32; // strlen of section_names + TRACE("Found DWARF sections: .debug_info=%zu .debug_line=%zu .debug_abbrev=%zu .debug_str=%zu .debug_aranges=%zu\n", + debug_info_size, debug_line_size, debug_abbrev_size, debug_str_size, debug_aranges_size); + + // Section name strings: "\0.text\0.symtab\0.strtab\0.shstrtab\0.debug_info\0.debug_line\0.debug_abbrev\0.debug_str\0.debug_aranges\0" + const char *section_names = "\0.text\0.symtab\0.strtab\0.shstrtab\0.debug_info\0.debug_line\0.debug_abbrev\0.debug_str\0.debug_aranges\0"; + size_t shstrtab_size = 103; // strlen of section_names - // Calculate size of new minimal ELF (ELF header + 1 program header + 5 section headers + data) - size_t elf_size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (5 * sizeof(Elf_Shdr)) + symtab_size + strtab_size + shstrtab_size; + // Count how many sections we have (null + .text + .symtab + .strtab + .shstrtab + debug sections) + int section_count = 5; // Base sections + if (debug_info_data) section_count++; + if (debug_line_data) section_count++; + if (debug_abbrev_data) section_count++; + if (debug_str_data) section_count++; + if (debug_aranges_data) section_count++; + + // Calculate size of new minimal ELF (ELF header + 1 program header + section headers + data) + size_t elf_size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr)) + + symtab_size + strtab_size + shstrtab_size + + debug_info_size + debug_line_size + debug_abbrev_size + debug_str_size + debug_aranges_size; uint8_t *new_elf = (uint8_t *) malloc(elf_size); if (!new_elf) { @@ -1984,7 +2318,8 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat const Elf_Ehdr *orig_ehdr = (const Elf_Ehdr *) original_elf_data; Elf_Ehdr *new_ehdr = (Elf_Ehdr *) new_elf; memcpy(new_ehdr->e_ident, orig_ehdr->e_ident, 16); - new_ehdr->e_type = orig_ehdr->e_type; + // Use ET_DYN for JIT debugging - GDB expects shared object type for JIT code + new_ehdr->e_type = 3; // ET_DYN new_ehdr->e_machine = orig_ehdr->e_machine; new_ehdr->e_version = orig_ehdr->e_version; new_ehdr->e_entry = 0; @@ -1995,8 +2330,8 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat new_ehdr->e_phentsize = sizeof(Elf_Phdr); new_ehdr->e_phnum = 1; new_ehdr->e_shentsize = sizeof(Elf_Shdr); - new_ehdr->e_shnum = 5; // null, .text, .symtab, .strtab, .shstrtab - new_ehdr->e_shstrndx = 4; // .shstrtab is the section name string table + new_ehdr->e_shnum = section_count; + new_ehdr->e_shstrndx = 4; // .shstrtab is the section name string table (always section 4) // Create program header (PT_LOAD segment) Elf_Phdr *new_phdr = (Elf_Phdr *) (new_elf + sizeof(Elf_Ehdr)); @@ -2027,13 +2362,19 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat return NULL; } - new_phdr->p_filesz = code_size; // Size in file - new_phdr->p_memsz = code_size; // Size in memory + // PT_LOAD will cover code + DWARF sections in virtual memory + // This allows the debugger to apply base address relocation to DWARF + // We'll set p_memsz later after we know the total size + new_phdr->p_filesz = 0; // No actual code in this file + new_phdr->p_memsz = 0; // Will be set later new_phdr->p_align = 1; // Create section headers Elf_Shdr *new_shdrs = (Elf_Shdr *) (new_elf + sizeof(Elf_Ehdr) + sizeof(Elf_Phdr)); - size_t current_offset = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (5 * sizeof(Elf_Shdr)); + size_t current_offset = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr)); + + // Virtual address offset for DWARF sections (after code) + uintptr_t current_vaddr = load_address + code_size; // Section 0: null section (required) new_shdrs[0] = (Elf_Shdr){ 0 }; @@ -2077,20 +2418,186 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat new_shdrs[4].sh_offset = current_offset; new_shdrs[4].sh_size = shstrtab_size; new_shdrs[4].sh_addralign = 1; - - // Copy symbol table data and patch symbol addresses + current_offset += shstrtab_size; + + // Add DWARF sections if present + int next_section = 5; + + // Section 5: .debug_info (if present) + if (debug_info_data) { + new_shdrs[next_section].sh_name = 33; // ".debug_info\0" at offset 33 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC - make it part of PT_LOAD + new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_info_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_info_size; + current_vaddr += debug_info_size; + next_section++; + } + + // Section 6: .debug_line (if present) + if (debug_line_data) { + new_shdrs[next_section].sh_name = 45; // ".debug_line\0" at offset 45 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC + new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_line_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_line_size; + current_vaddr += debug_line_size; + next_section++; + } + + // Section 7: .debug_abbrev (if present) + if (debug_abbrev_data) { + new_shdrs[next_section].sh_name = 57; // ".debug_abbrev\0" at offset 57 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC + new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_abbrev_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_abbrev_size; + current_vaddr += debug_abbrev_size; + next_section++; + } + + // Section 8: .debug_str (if present) + if (debug_str_data) { + new_shdrs[next_section].sh_name = 71; // ".debug_str\0" at offset 71 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC + new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_str_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_str_size; + current_vaddr += debug_str_size; + next_section++; + } + + // Now set PT_LOAD to cover code + DWARF sections in virtual memory + new_phdr->p_memsz = current_vaddr - load_address; + TRACE("PT_LOAD covers 0x%lx to 0x%lx (size=0x%lx)\n", + (unsigned long)load_address, (unsigned long)current_vaddr, (unsigned long)new_phdr->p_memsz); + + // Section 9: .debug_aranges (if present) + // DISABLED: LLDB uses symbols for breakpoints, not .debug_aranges + // Keeping this corrupted actually made breakpoints work better! + if (false && debug_aranges_data) { + new_shdrs[next_section].sh_name = 82; // ".debug_aranges\0" at offset 82 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_aranges_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_aranges_size; + next_section++; + } + + // Copy symbol table data uint8_t *new_symtab = new_elf + new_shdrs[2].sh_offset; memcpy(new_symtab, symtab_data, symtab_size); - // With PT_LOAD program header, the debugger should automatically apply the base address // Copy string table data uint8_t *new_strtab = new_elf + new_shdrs[3].sh_offset; memcpy(new_strtab, strtab_data, strtab_size); + TRACE("Copied symbol table: %zu bytes, %zu symbols\n", symtab_size, symtab_size / sizeof(Elf_Sym)); + + // Debug: print first few function symbols + Elf_Sym *syms = (Elf_Sym *)new_symtab; + size_t num_syms = symtab_size / sizeof(Elf_Sym); + for (size_t i = 0; i < num_syms && i < 10; i++) { + if (ELF_ST_TYPE(syms[i].st_info) == STT_FUNC) { + const char *sym_name = (const char *)(new_strtab + syms[i].st_name); + TRACE(" Symbol[%zu]: %s @ 0x%lx (size=%zu)\n", i, sym_name, + (unsigned long)syms[i].st_value, (size_t)syms[i].st_size); + } + } + + // With PT_LOAD program header, the debugger should automatically apply the base address + // Copy section name string table data uint8_t *new_shstrtab = new_elf + new_shdrs[4].sh_offset; memcpy(new_shstrtab, section_names, shstrtab_size); + // Copy DWARF section data + next_section = 5; + + if (debug_info_data) { + uint8_t *new_debug_info = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_info, debug_info_data, debug_info_size); + + // No need to patch DWARF addresses - PT_LOAD handles relocation automatically + // since DWARF sections now have SHF_ALLOC and virtual addresses + TRACE("DWARF sections in PT_LOAD - debugger will apply base address\n"); + + next_section++; + } + + if (debug_line_data) { + uint8_t *new_debug_line = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_line, debug_line_data, debug_line_size); + next_section++; + } + + if (debug_abbrev_data) { + uint8_t *new_debug_abbrev = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_abbrev, debug_abbrev_data, debug_abbrev_size); + next_section++; + } + + if (debug_str_data) { + uint8_t *new_debug_str = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_str, debug_str_data, debug_str_size); + next_section++; + } + + // DISABLED: .debug_aranges not needed for LLDB breakpoints + if (false && debug_aranges_data) { + uint8_t *new_debug_aranges = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_aranges, debug_aranges_data, debug_aranges_size); + + // Patch .debug_aranges addresses to absolute addresses + // Structure: [length:4][version:2][debug_info_offset:4][addr_size:1][seg_size:1][padding:variable] + // [address:addr_size][length:addr_size][terminator:addr_size*2] + // Header is 4+2+4+1+1 = 12 bytes, then padding to align to 2*addr_size + if (debug_aranges_size >= 12) { + uint8_t addr_size = new_debug_aranges[10]; // Address size field at offset 4+2+4 = 10 + TRACE(".debug_aranges addr_size=%d\n", addr_size); + + // Calculate padding: header is 8 bytes (after the length field), align to 2*addr_size + size_t header_size = 8; // version(2) + debug_info_offset(4) + addr_size(1) + seg_size(1) + size_t tuple_alignment = 2 * addr_size; + size_t padding_size = (tuple_alignment - (header_size % tuple_alignment)) % tuple_alignment; + size_t descriptor_offset = 4 + header_size + padding_size; // Skip length field + header + padding + + TRACE(".debug_aranges descriptor at offset %zu (header=%zu, padding=%zu)\n", + descriptor_offset, header_size, padding_size); + + if (debug_aranges_size >= descriptor_offset + addr_size * 2) { + if (addr_size == 8) { + // Patch the address range start address (64-bit) + uint64_t *range_start = (uint64_t *)(new_debug_aranges + descriptor_offset); + uint64_t old_addr = *range_start; + *range_start += load_address; + TRACE("Patched .debug_aranges: 0x%llx -> 0x%llx\n", (unsigned long long)old_addr, (unsigned long long)*range_start); + } else if (addr_size == 4) { + // Patch the address range start address (32-bit) + uint32_t *range_start = (uint32_t *)(new_debug_aranges + descriptor_offset); + uint32_t old_addr = *range_start; + *range_start += (uint32_t)load_address; + TRACE("Patched .debug_aranges: 0x%x -> 0x%x\n", old_addr, *range_start); + } + } + } + + next_section++; + } + *new_elf_size = elf_size; return new_elf; } @@ -2146,6 +2653,14 @@ void jit_debug_register_code(Module *mod, const void *native_code, size_t native return; } + // Debug: dump ELF to file for inspection + FILE *f = fopen("/tmp/jit_debug.elf", "wb"); + if (f) { + fwrite(new_elf, 1, new_elf_size, f); + fclose(f); + TRACE("Wrote JIT ELF to /tmp/jit_debug.elf (%zu bytes)\n", new_elf_size); + } + // Initialize the entry with the new ELF entry->next_entry = NULL; entry->prev_entry = NULL; From f81becf0b5f780d343a288755562fd6d39750455 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 23 Oct 2025 08:23:17 +0200 Subject: [PATCH 97/97] dwarf: wip Signed-off-by: Paul Guyot --- libs/jit/src/jit_dwarf.hrl | 104 ++++++++++++++++++++++++++++++++++++ src/libAtomVM/jit.c | 106 +++++++++++++++++++------------------ 2 files changed, 159 insertions(+), 51 deletions(-) create mode 100644 libs/jit/src/jit_dwarf.hrl diff --git a/libs/jit/src/jit_dwarf.hrl b/libs/jit/src/jit_dwarf.hrl new file mode 100644 index 0000000000..4c071d5273 --- /dev/null +++ b/libs/jit/src/jit_dwarf.hrl @@ -0,0 +1,104 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +%% DWARF Tag constants +-define(DW_TAG_compile_unit, 16#11). +-define(DW_TAG_subprogram, 16#2e). +-define(DW_TAG_lexical_block, 16#0b). +-define(DW_TAG_label, 16#0a). +-define(DW_TAG_formal_parameter, 16#05). +-define(DW_TAG_pointer_type, 16#0f). +-define(DW_TAG_structure_type, 16#13). +-define(DW_TAG_member, 16#0d). +-define(DW_TAG_array_type, 16#01). +-define(DW_TAG_subrange_type, 16#21). +-define(DW_TAG_base_type, 16#24). + +%% DWARF Attribute constants +-define(DW_AT_name, 16#03). +-define(DW_AT_comp_dir, 16#1b). +-define(DW_AT_producer, 16#25). +-define(DW_AT_language, 16#13). +-define(DW_AT_low_pc, 16#11). +-define(DW_AT_high_pc, 16#12). +-define(DW_AT_stmt_list, 16#10). +-define(DW_AT_type, 16#49). +-define(DW_AT_data_member_location, 16#38). +-define(DW_AT_byte_size, 16#0b). +-define(DW_AT_encoding, 16#3e). +-define(DW_AT_location, 16#02). +-define(DW_AT_upper_bound, 16#2f). + +%% DWARF Form constants +-define(DW_FORM_string, 16#08). +-define(DW_FORM_addr, 16#01). +-define(DW_FORM_data4, 16#06). +-define(DW_FORM_data1, 16#0b). +-define(DW_FORM_udata, 16#0f). +-define(DW_FORM_ref4, 16#13). +-define(DW_FORM_sec_offset, 16#17). +-define(DW_FORM_exprloc, 16#18). + +%% DWARF Encoding constants +-define(DW_ATE_unsigned, 16#07). +-define(DW_ATE_signed, 16#05). + +%% DWARF Location expression opcodes +-define(DW_OP_reg0, 16#50). +-define(DW_OP_fbreg, 16#91). + +%% DWARF Language constants +-define(DW_LANG_C, 16#02). +-define(DW_LANG_Erlang, 16#46). +-define(DW_LANG_Elixir, 16#47). +-define(DW_LANG_Gleam, 16#48). + +%% ELF constants +-define(EI_MAG0, 16#7f). +-define(EI_MAG1, $E). +-define(EI_MAG2, $L). +-define(EI_MAG3, $F). +-define(ELFCLASS32, 1). +-define(ELFCLASS64, 2). +-define(ELFDATA2LSB, 1). +-define(EV_CURRENT, 1). +-define(ET_REL, 1). +-define(EM_ARM, 40). +-define(EM_X86_64, 62). +-define(EM_AARCH64, 183). +-define(EM_RISCV, 243). +-define(SHT_PROGBITS, 1). +-define(SHT_SYMTAB, 2). +-define(SHT_STRTAB, 3). +-define(SHT_ARM_ATTRIBUTES, 16#70000003). +-define(SHF_ALLOC, 2). +-define(SHF_EXECINSTR, 4). + +%% ARM EABI flags +-define(EF_ARM_EABI_VER5, 16#05000000). +-define(EF_ARM_ABI_FLOAT_SOFT, 16#00000200). +-define(EF_ARM_ARCH_V6M, 16#00000009). + +%% DWARF register numbers +%% These follow the DWARF register numbering conventions for each architecture +-define(DWARF_RDI_REG_X86_64, 5). % rdi register in x86_64 +-define(DWARF_X0_REG_AARCH64, 0). % x0 register in aarch64 +-define(DWARF_A0_REG_RISCV32, 10). % a0 register in RISC-V +-define(DWARF_R0_REG_ARMV6M, 0). % r0 register in ARM diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 060bd01974..fb5014df87 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -2302,9 +2302,30 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat if (debug_str_data) section_count++; if (debug_aranges_data) section_count++; + // Find the actual .text section size from the original ELF + const Elf_Ehdr *orig_ehdr = (const Elf_Ehdr *) original_elf_data; + const Elf_Shdr *orig_shdrs = (const Elf_Shdr *) (original_elf_data + orig_ehdr->e_shoff); + + size_t code_size = 0; + + // Look for .text section in original ELF + for (int i = 0; i < orig_ehdr->e_shnum; i++) { + const Elf_Shdr *shdr = &orig_shdrs[i]; + if (shdr->sh_type == 1 && (shdr->sh_flags & 6) == 6) { // SHT_PROGBITS + SHF_ALLOC + SHF_EXECINSTR + code_size = shdr->sh_size; + break; + } + } + + if (code_size == 0) { + fprintf(stderr, "ERROR: Could not find .text section in original ELF\n"); + return NULL; + } + // Calculate size of new minimal ELF (ELF header + 1 program header + section headers + data) + // IMPORTANT: We now include code_size so we can copy the actual JIT code into the file size_t elf_size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr)) + - symtab_size + strtab_size + shstrtab_size + + code_size + symtab_size + strtab_size + shstrtab_size + debug_info_size + debug_line_size + debug_abbrev_size + debug_str_size + debug_aranges_size; uint8_t *new_elf = (uint8_t *) malloc(elf_size); @@ -2315,11 +2336,11 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat memset(new_elf, 0, elf_size); // Create ELF header - const Elf_Ehdr *orig_ehdr = (const Elf_Ehdr *) original_elf_data; Elf_Ehdr *new_ehdr = (Elf_Ehdr *) new_elf; memcpy(new_ehdr->e_ident, orig_ehdr->e_ident, 16); - // Use ET_DYN for JIT debugging - GDB expects shared object type for JIT code - new_ehdr->e_type = 3; // ET_DYN + // Use ET_EXEC for JIT debugging - code is loaded at fixed address + // ET_EXEC is the correct type for executables with PT_LOAD at specific addresses + new_ehdr->e_type = 2; // ET_EXEC new_ehdr->e_machine = orig_ehdr->e_machine; new_ehdr->e_version = orig_ehdr->e_version; new_ehdr->e_entry = 0; @@ -2338,43 +2359,25 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat new_phdr->p_type = PT_LOAD; new_phdr->p_flags = PF_R | PF_X; - new_phdr->p_offset = 0; + // PT_LOAD will start where code is in the file and map to load_address in memory + // p_offset will be set after we know where code is + new_phdr->p_offset = 0; // Will be set after we copy code new_phdr->p_vaddr = load_address; new_phdr->p_paddr = load_address; - - // Find the actual .text section size from the original ELF - const Elf_Shdr *orig_shdrs = (const Elf_Shdr *) (original_elf_data + orig_ehdr->e_shoff); - - size_t code_size = 0; - - // Look for .text section in original ELF - for (int i = 0; i < orig_ehdr->e_shnum; i++) { - const Elf_Shdr *shdr = &orig_shdrs[i]; - if (shdr->sh_type == 1 && (shdr->sh_flags & 6) == 6) { // SHT_PROGBITS + SHF_ALLOC + SHF_EXECINSTR - code_size = shdr->sh_size; - break; - } - } - - if (code_size == 0) { - fprintf(stderr, "ERROR: Could not find .text section in original ELF\n"); - free(new_elf); - return NULL; - } - - // PT_LOAD will cover code + DWARF sections in virtual memory - // This allows the debugger to apply base address relocation to DWARF - // We'll set p_memsz later after we know the total size - new_phdr->p_filesz = 0; // No actual code in this file - new_phdr->p_memsz = 0; // Will be set later + new_phdr->p_filesz = 0; // Will be set after we copy data + new_phdr->p_memsz = 0; // Will be set later after we know total size new_phdr->p_align = 1; // Create section headers Elf_Shdr *new_shdrs = (Elf_Shdr *) (new_elf + sizeof(Elf_Ehdr) + sizeof(Elf_Phdr)); size_t current_offset = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr)); - // Virtual address offset for DWARF sections (after code) - uintptr_t current_vaddr = load_address + code_size; + // Copy the actual JIT code into the file right after section headers + // This allows GDB's BFD to recognize it as a valid object file + uint8_t *code_dest = new_elf + current_offset; + memcpy(code_dest, (void*)load_address, code_size); + size_t code_file_offset = current_offset; + current_offset += code_size; // Section 0: null section (required) new_shdrs[0] = (Elf_Shdr){ 0 }; @@ -2384,8 +2387,8 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat new_shdrs[1].sh_type = 1; // SHT_PROGBITS new_shdrs[1].sh_flags = 6; // SHF_ALLOC | SHF_EXECINSTR new_shdrs[1].sh_addr = load_address; - new_shdrs[1].sh_offset = 0; // No actual .text data in this ELF, but debugger uses load_address - new_shdrs[1].sh_size = code_size; // Set proper size so debugger knows the extent + new_shdrs[1].sh_offset = code_file_offset; // Point to code we copied into the file + new_shdrs[1].sh_size = code_size; new_shdrs[1].sh_addralign = 1; // Section 2: .symtab @@ -2421,19 +2424,19 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat current_offset += shstrtab_size; // Add DWARF sections if present + // DWARF sections don't need SHF_ALLOC - they're debug info only, not loaded at runtime int next_section = 5; // Section 5: .debug_info (if present) if (debug_info_data) { new_shdrs[next_section].sh_name = 33; // ".debug_info\0" at offset 33 in section names new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS - new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC - make it part of PT_LOAD - new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_flags = 0; // No ALLOC - debug info only + new_shdrs[next_section].sh_addr = 0; new_shdrs[next_section].sh_offset = current_offset; new_shdrs[next_section].sh_size = debug_info_size; new_shdrs[next_section].sh_addralign = 1; current_offset += debug_info_size; - current_vaddr += debug_info_size; next_section++; } @@ -2441,13 +2444,12 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat if (debug_line_data) { new_shdrs[next_section].sh_name = 45; // ".debug_line\0" at offset 45 in section names new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS - new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC - new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_flags = 0; + new_shdrs[next_section].sh_addr = 0; new_shdrs[next_section].sh_offset = current_offset; new_shdrs[next_section].sh_size = debug_line_size; new_shdrs[next_section].sh_addralign = 1; current_offset += debug_line_size; - current_vaddr += debug_line_size; next_section++; } @@ -2455,13 +2457,12 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat if (debug_abbrev_data) { new_shdrs[next_section].sh_name = 57; // ".debug_abbrev\0" at offset 57 in section names new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS - new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC - new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_flags = 0; + new_shdrs[next_section].sh_addr = 0; new_shdrs[next_section].sh_offset = current_offset; new_shdrs[next_section].sh_size = debug_abbrev_size; new_shdrs[next_section].sh_addralign = 1; current_offset += debug_abbrev_size; - current_vaddr += debug_abbrev_size; next_section++; } @@ -2469,20 +2470,23 @@ static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_dat if (debug_str_data) { new_shdrs[next_section].sh_name = 71; // ".debug_str\0" at offset 71 in section names new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS - new_shdrs[next_section].sh_flags = 2; // SHF_ALLOC - new_shdrs[next_section].sh_addr = current_vaddr; + new_shdrs[next_section].sh_flags = 0; + new_shdrs[next_section].sh_addr = 0; new_shdrs[next_section].sh_offset = current_offset; new_shdrs[next_section].sh_size = debug_str_size; new_shdrs[next_section].sh_addralign = 1; current_offset += debug_str_size; - current_vaddr += debug_str_size; next_section++; } - // Now set PT_LOAD to cover code + DWARF sections in virtual memory - new_phdr->p_memsz = current_vaddr - load_address; - TRACE("PT_LOAD covers 0x%lx to 0x%lx (size=0x%lx)\n", - (unsigned long)load_address, (unsigned long)current_vaddr, (unsigned long)new_phdr->p_memsz); + // PT_LOAD covers only the .text section (code) + // DWARF sections are not loadable - they're debug info only + new_phdr->p_offset = code_file_offset; + new_phdr->p_memsz = code_size; + new_phdr->p_filesz = code_size; + TRACE("PT_LOAD covers 0x%lx to 0x%lx (size=0x%lx), filesz=0x%lx\n", + (unsigned long)load_address, (unsigned long)(load_address + code_size), + (unsigned long)new_phdr->p_memsz, (unsigned long)new_phdr->p_filesz); // Section 9: .debug_aranges (if present) // DISABLED: LLDB uses symbols for breakpoints, not .debug_aranges