From 47b43302d2e2ab92436a9402345c1327559e7f4d Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 25 Oct 2025 16:43:21 +0200 Subject: [PATCH 01/28] Implement support for private_append Add a new `term_reuse_binary` to reuse a refc binary, taking advantage of private_append compiler optimization. Add handling of out of memory errors in term_alloc_refc_binary by raising out of memory error instead of aborting. Replaced uses of `term_create_empty_binary` with `term_create_uninitialized_binary` since we don't need to set the binary content to zeroes. Update `and_/3` signature in JIT backends to handle and with a new register, and performed few optimizations accordingly by removing unnecessary copy. Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 288 ++++++++++++++++----------- libs/jit/src/jit_aarch64.erl | 17 +- libs/jit/src/jit_armv6m.erl | 35 ++-- libs/jit/src/jit_x86_64.erl | 27 ++- libs/jit/src/primitives.hrl | 3 +- libs/jit/src/term.hrl | 2 + src/libAtomVM/jit.c | 17 +- src/libAtomVM/jit.h | 1 + src/libAtomVM/opcodesswitch.h | 46 ++++- src/libAtomVM/term.c | 68 ++++++- src/libAtomVM/term.h | 27 ++- tests/libs/jit/jit_aarch64_tests.erl | 8 +- tests/libs/jit/jit_armv6m_tests.erl | 14 +- tests/libs/jit/jit_x86_64_tests.erl | 8 +- 14 files changed, 396 insertions(+), 165 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 1992bf5841..9babe4184c 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -363,7 +363,7 @@ first_pass(<>, MMod, MSt0, State0) -> % Same module: fast intra-module return fun(BSt0) -> % Mask to get lower 24 bits and shift right by 2 for offset - BSt1 = MMod:and_(BSt0, CpReg0, 16#FFFFFF), + {BSt1, CpReg0} = MMod:and_(BSt0, {free, CpReg0}, 16#FFFFFF), {BSt3, CPReg1} = MMod:shift_right(BSt1, {free, CpReg0}, 2), % Jump to continuation (this is a tail call) MMod:jump_to_continuation(BSt3, {free, CPReg1}) @@ -631,7 +631,7 @@ first_pass(<>, MMod, MSt0, State0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, BSt0 ), - BSt2 = MMod:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = MMod:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = MMod:move_array_element(BSt2, Reg, 0, Reg), % Optimization : ((Reg & 0x3F) != 0x8) && ((Reg & 0x3F) != 0x18) % is equivalent to (Reg & 0x2F) != 0x8 @@ -684,9 +684,9 @@ first_pass(<>, MMod, MSt0, State0) -> MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg), - MSt6 = MMod:and_(MSt5, Reg, ?TERM_BOXED_TAG_MASK), + {MSt6, Reg} = MMod:and_(MSt5, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt7 = cond_jump_to_label( {'and', [{Reg, '!=', ?TERM_BOXED_REF}, {Reg, '!=', ?TERM_BOXED_EXTERNAL_REF}]}, Label, @@ -775,7 +775,7 @@ first_pass(<>, MMod, MSt0, State0) -> {Arity, Rest3} = decode_literal(Rest2), ?TRACE("OP_TEST_ARITY ~p, ~p, ~p\n", [Label, Arg1, Arity]), {MSt2, Reg} = MMod:move_to_native_register(MSt1, Arg1), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), {MSt5, ArityReg} = MMod:shift_right(MSt4, {free, Reg}, 6), MSt6 = cond_jump_to_label({{free, ArityReg}, '!=', Arity}, Label, MMod, MSt5), @@ -882,7 +882,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt3, TailDest, Rest3} = decode_dest(Rest2, MMod, MSt2), ?TRACE("OP_GET_LIST ~p, ~p, ~p\n", [List, HeadDest, TailDest]), {MSt4, Reg} = MMod:move_to_native_register(MSt3, List), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt6 = MMod:move_array_element(MSt5, Reg, ?LIST_HEAD_INDEX, HeadDest), MSt7 = MMod:free_native_registers(MSt6, [HeadDest]), MSt8 = MMod:move_array_element(MSt7, Reg, ?LIST_TAIL_INDEX, TailDest), @@ -898,7 +898,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Dest, Rest3} = decode_dest(Rest2, MMod, MSt1), ?TRACE("OP_GET_TUPLE_ELEMENT ~p, ~p, ~p\n", [Source, Element, Dest]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, Source), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, Element + 1, Dest), MSt6 = MMod:free_native_registers(MSt5, [Reg, Dest]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -911,7 +911,7 @@ first_pass(<>, MMod, MSt0, State0) -> {Position, Rest3} = decode_literal(Rest2), ?TRACE("OP_SET_TUPLE_ELEMENT ~p, ~p, ~p\n", [NewElement, Tuple, Position]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, Tuple), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_to_array_element(MSt4, NewElement, Reg, Position + 1), MSt6 = MMod:free_native_registers(MSt5, [NewElement, Reg]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -1011,7 +1011,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_CONTEXT_ENSURE_FPREGS, [ctx]), MSt3 = MMod:free_native_registers(MSt2, [ResultReg]), {MSt4, Reg} = MMod:move_to_native_register(MSt3, SrcValue), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt6 = MMod:move_to_vm_register(MSt5, {free, {ptr, Reg, 1}}, FPReg), ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State0); @@ -1294,7 +1294,7 @@ first_pass(<>, MMod, MSt0, State0) -> MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ctx, jit_state, offset, ?BADARG_ATOM]) end), {MSt8, BSOffsetReg1} = MMod:shift_right(MSt7, {free, BSOffsetReg0}, 3), - MSt9 = MMod:and_(MSt8, BSBinaryReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt9, BSBinaryReg0} = MMod:and_(MSt8, {free, BSBinaryReg0}, ?TERM_PRIMARY_CLEAR_MASK), {MSt10, SizeReg} = MMod:get_array_element(MSt9, {free, BSBinaryReg0}, 1), {MSt13, SizeValue} = if @@ -1462,9 +1462,9 @@ first_pass(<>, MMod, MSt0, State0) -> MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg), - MSt6 = MMod:and_(MSt5, Reg, ?TERM_BOXED_TAG_MASK), + {MSt6, Reg} = MMod:and_(MSt5, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt7 = cond_jump_to_label( {'and', [ {Reg, '!=', ?TERM_BOXED_REFC_BINARY}, @@ -1509,12 +1509,17 @@ first_pass(<>, MMod, MSt0, State0) -> ] ), MSt2 = handle_error_if({'(bool)', {free, MemoryEnsureFreeReg}, '==', false}, MMod, MSt1), - {MSt3, CreatedBin} = MMod:call_primitive(MSt2, ?PRIM_TERM_CREATE_EMPTY_BINARY, [ctx, 0]), - MSt4 = MMod:set_bs(MSt3, CreatedBin), - MSt5 = MMod:move_to_vm_register(MSt4, CreatedBin, {x_reg, 0}), - MSt6 = MMod:free_native_registers(MSt5, [CreatedBin]), - ?ASSERT_ALL_NATIVE_FREE(MSt6), - first_pass(Rest0, MMod, MSt6, State0); + {MSt3, CreatedBin} = MMod:call_primitive(MSt2, ?PRIM_TERM_CREATE_UNINITIALIZED_BINARY, [ctx, 0]), + MSt4 = MMod:if_block(MSt3, {CreatedBin, '==', ?TERM_INVALID_TERM}, fun(BSt0) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM + ]) + end), + MSt5 = MMod:set_bs(MSt4, CreatedBin), + MSt6 = MMod:move_to_vm_register(MSt5, CreatedBin, {x_reg, 0}), + MSt7 = MMod:free_native_registers(MSt6, [CreatedBin]), + ?ASSERT_ALL_NATIVE_FREE(MSt7), + first_pass(Rest0, MMod, MSt7, State0); % 136 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -1778,7 +1783,7 @@ first_pass(<>, MMod, MSt0, State0) -> Src, Live, {free, SrcSizeReg}, MMod, MSt7 ), {MSt9, NewMapPtrReg} = MMod:call_primitive(MSt8, ?PRIM_TERM_COPY_MAP, [ctx, NewSrc]), - MSt10 = MMod:and_(MSt9, NewMapPtrReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt10, NewMapPtrReg} = MMod:and_(MSt9, {free, NewMapPtrReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt11, Rest6} = lists:foldl( fun(_Index, {ASt0, ARest0}) -> {ASt1, Key, ARest1} = decode_compact_term(ARest0, MMod, ASt0, State0), @@ -1880,14 +1885,13 @@ first_pass(<>, MMod, MSt0, State0) -> ]) end), {MSt6, SrcReg} = MMod:move_to_native_register(MSt5, Src), - {MSt7, MapReg} = MMod:copy_to_native_register(MSt6, SrcReg), - MSt8 = MMod:and_(MSt7, MapReg, ?TERM_PRIMARY_CLEAR_MASK), - MSt9 = MMod:add(MSt8, MapReg, MMod:word_size() * 2), - {MSt10, Dest1, Rest5} = decode_dest(Rest4, MMod, MSt9), + {MSt7, MapReg} = MMod:and_(MSt6, SrcReg, ?TERM_PRIMARY_CLEAR_MASK), + MSt8 = MMod:add(MSt7, MapReg, MMod:word_size() * 2), + {MSt9, Dest1, Rest5} = decode_dest(Rest4, MMod, MSt8), ?TRACE(",~p", [Dest1]), - MSt11 = MMod:move_array_element(MSt10, MapReg, {free, PosReg1}, Dest1), - MSt12 = MMod:free_native_registers(MSt11, [Dest1]), - {MSt13, Rest6} = lists:foldl( + MSt10 = MMod:move_array_element(MSt9, MapReg, {free, PosReg1}, Dest1), + MSt11 = MMod:free_native_registers(MSt10, [Dest1]), + {MSt12, Rest6} = lists:foldl( fun(_Index, {AccMSt0, AccRest0}) -> {AccMSt1, Key, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0), ?TRACE(",~p", [Key]), @@ -1912,13 +1916,13 @@ first_pass(<>, MMod, MSt0, State0) -> AccMSt8 = MMod:free_native_registers(AccMSt7, [Dest]), {AccMSt8, AccRest2} end, - {MSt12, Rest5}, + {MSt11, Rest5}, lists:seq(2, ListSize div 2) ), ?TRACE("]\n", []), - MSt14 = MMod:free_native_registers(MSt13, [MapReg, SrcReg]), - ?ASSERT_ALL_NATIVE_FREE(MSt14), - first_pass(Rest6, MMod, MSt14, State0); + MSt13 = MMod:free_native_registers(MSt12, [MapReg, SrcReg]), + ?ASSERT_ALL_NATIVE_FREE(MSt13), + first_pass(Rest6, MMod, MSt13, State0); % 159 first_pass( <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 @@ -1933,7 +1937,7 @@ first_pass( MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt5, TagReg0} = MMod:get_array_element(MSt4, Reg, 0), MSt6 = cond_jump_to_label( {TagReg0, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5 @@ -1987,7 +1991,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), ?TRACE("OP_GET_HD ~p, ~p\n", [SrcValue, Dest]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, SrcValue), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, ?LIST_HEAD_INDEX, Dest), MSt6 = MMod:free_native_registers(MSt5, [Dest, Reg]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -1999,7 +2003,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), ?TRACE("OP_GET_TL ~p, ~p\n", [SrcValue, Dest]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, SrcValue), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, ?LIST_TAIL_INDEX, Dest), MSt6 = MMod:free_native_registers(MSt5, [Dest, Reg]), ?ASSERT_ALL_NATIVE_FREE(MSt6), @@ -2011,7 +2015,7 @@ first_pass(<>, MMod, MSt0, State0) -> {ListSize, Rest2} = decode_extended_list_header(Rest1), ?TRACE("OP_PUT_TUPLE2 ~p, [", [Dest]), {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_TERM_ALLOC_TUPLE, [ctx, ListSize]), - MSt3 = MMod:and_(MSt2, ResultReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, ResultReg} = MMod:and_(MSt2, {free, ResultReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt4, Rest3} = lists:foldl( fun(Index, {AccMSt0, AccRest0}) -> {AccMSt1, Element, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0), @@ -2040,13 +2044,13 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), MSt6 = MMod:free_native_registers(MSt5, [MatchStateRegPtr]), - MSt7 = MMod:and_(MSt6, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt7, BSBinaryReg} = MMod:and_(MSt6, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt8, ResultTerm, NewMatchState} = do_get_tail( Src, Live, BSOffsetReg, BSBinaryReg, MMod, MSt7 ), MSt9 = MMod:free_native_registers(MSt8, [BSBinaryReg]), {MSt10, MatchStateReg1} = MMod:move_to_native_register(MSt9, NewMatchState), - MSt11 = MMod:and_(MSt10, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK), + {MSt11, MatchStateReg1} = MMod:and_(MSt10, {free, MatchStateReg1}, ?TERM_PRIMARY_CLEAR_MASK), MSt12 = MMod:move_to_array_element(MSt11, BSOffsetReg, MatchStateReg1, 2), MSt13 = MMod:move_to_vm_register(MSt12, ResultTerm, Dest), MSt14 = MMod:free_native_registers(MSt13, [MatchStateReg1, BSOffsetReg, ResultTerm, Dest]), @@ -2073,7 +2077,7 @@ first_pass(<>, MMod, MSt0, State0) -> {_Live, Rest3} = decode_literal(Rest2), ?TRACE("OP_BS_GET_POSITION ~p, ~p, ~p\n", [Src, Dest, _Live]), {MSt3, Reg} = MMod:move_to_native_register(MSt2, Src), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 2, Reg), MSt6 = MMod:shift_left(MSt5, Reg, 4), MSt7 = MMod:or_(MSt6, Reg, ?TERM_INTEGER_TAG), @@ -2136,7 +2140,7 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, ResultReg} = MMod:call_primitive(MSt1, ?PRIM_TERM_ALLOC_FUN, [ ctx, jit_state, FunIndex, NumFree ]), - MSt3 = MMod:and_(MSt2, ResultReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, ResultReg} = MMod:and_(MSt2, {free, ResultReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt4, Rest4} = lists:foldl( fun(Index, {AccMSt0, AccRest0}) -> {AccMSt1, Element, AccRest1} = decode_compact_term(AccRest0, MMod, AccMSt0, State0), @@ -2221,8 +2225,8 @@ first_pass( {ListLen, Rest6} = decode_extended_list_header(Rest5), % Compute binary size and verify types in first iteration NBSegments = ListLen div 6, - {Rest7, MSt2, BinaryLitSize, BinaryRegSize, State1} = lists:foldl( - fun(_Index, {AccRest0, AccMSt0, AccLiteralSize0, AccSizeReg0, AccState0}) -> + {Rest7, MSt2, BinaryLitSize, BinaryRegSize, State1, ReuseSourceBinary} = lists:foldl( + fun(Index, {AccRest0, AccMSt0, AccLiteralSize0, AccSizeReg0, AccState0, AccReuseSrc}) -> {AtomTypeIndex, AccRest1} = decode_atom(AccRest0), AtomType = AtomResolver(AtomTypeIndex), {_Seg, AccRest2} = decode_literal(AccRest1), @@ -2242,10 +2246,13 @@ first_pass( AccMSt2, AccState0 ), + NewReuseSrc = + AccReuseSrc orelse + (Index =:= 1 andalso AtomType =:= private_append andalso Size =:= ?ALL_ATOM), AccMSt4 = MMod:free_native_registers(AccMSt3, [Src, Size]), - {AccRest6, AccMSt4, AccLiteralSize1, AccSizeReg1, AccState1} + {AccRest6, AccMSt4, AccLiteralSize1, AccSizeReg1, AccState1, NewReuseSrc} end, - {Rest6, MSt1, 0, undefined, State0}, + {Rest6, MSt1, 0, undefined, State0, false}, lists:seq(1, NBSegments) ), {MSt4, BinaryTotalSize} = @@ -2302,12 +2309,30 @@ first_pass( ] ), MSt14 = handle_error_if({'(bool)', {free, MemoryEnsureFreeReg}, '==', false}, MMod, MSt13), - {MSt15, CreatedBin} = MMod:call_primitive(MSt14, ?PRIM_TERM_CREATE_EMPTY_BINARY, [ - ctx, {free, BinaryTotalSizeInBytes} - ]), + {MSt17, InitialCreatedBin} = + case ReuseSourceBinary of + false -> + % No reuse - create the binary now + {MSt15, CreatedBinResult} = MMod:call_primitive( + MSt14, ?PRIM_TERM_CREATE_UNINITIALIZED_BINARY, [ + ctx, {free, BinaryTotalSizeInBytes} + ] + ), + MSt16 = MMod:if_block(MSt15, {CreatedBinResult, '==', ?TERM_INVALID_TERM}, fun( + BSt0 + ) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM + ]) + end), + {MSt16, CreatedBinResult}; + true -> + % Will reuse - defer creation until first segment + {MSt14, {private_append, BinaryTotalSizeInBytes}} + end, % We redo the decoding. Rest7 should still be equal to previous value. - {Rest7, MSt16, FinalOffset} = lists:foldl( - fun(_Index, {AccRest0, AccMSt0, AccOffset0}) -> + {Rest7, MSt18, FinalOffset, CreatedBin} = lists:foldl( + fun(_Index, {AccRest0, AccMSt0, AccOffset0, AccCreatedBin}) -> {AtomTypeIndex, AccRest1} = decode_atom(AccRest0), AtomType = AtomResolver(AtomTypeIndex), {_Seg, AccRest2} = decode_literal(AccRest1), @@ -2316,30 +2341,30 @@ first_pass( {AccMSt2, Src, AccRest5} = decode_compact_term(AccRest4, MMod, AccMSt1, State1), {AccMSt3, Size, AccRest6} = decode_compact_term(AccRest5, MMod, AccMSt2, State1), ?TRACE("{~p,~p,~p,~p,~p,~p},", [AtomType, _Seg, SegmentUnit, Flags, Src, Size]), - {AccMSt4, AccOffset1} = first_pass_bs_create_bin_insert_value( + {AccMSt4, AccOffset1, AccCreatedBin1} = first_pass_bs_create_bin_insert_value( AtomType, Flags, Src, Size, SegmentUnit, Fail, - CreatedBin, + AccCreatedBin, AccOffset0, MMod, AccMSt3 ), AccMSt5 = MMod:free_native_registers(AccMSt4, [Flags, Src, Size]), - {AccRest6, AccMSt5, AccOffset1} + {AccRest6, AccMSt5, AccOffset1, AccCreatedBin1} end, - {Rest6, MSt15, 0}, + {Rest6, MSt17, 0, InitialCreatedBin}, lists:seq(1, NBSegments) ), ?TRACE("]\n", []), - MSt17 = MMod:free_native_registers(MSt16, [FinalOffset]), - MSt18 = MMod:move_to_vm_register(MSt17, CreatedBin, Dest), - MSt19 = MMod:free_native_registers(MSt18, [CreatedBin, Dest]), - ?ASSERT_ALL_NATIVE_FREE(MSt19), - first_pass(Rest7, MMod, MSt19, State1); + MSt19 = MMod:free_native_registers(MSt18, [FinalOffset]), + MSt20 = MMod:move_to_vm_register(MSt19, CreatedBin, Dest), + MSt21 = MMod:free_native_registers(MSt20, [CreatedBin, Dest]), + ?ASSERT_ALL_NATIVE_FREE(MSt21), + first_pass(Rest7, MMod, MSt21, State1); % 178 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -2376,12 +2401,12 @@ first_pass( {Size, Rest2} = decode_literal(Rest1), {MSt1, Src, Rest3} = decode_compact_term(Rest2, MMod, MSt0, State0), {MSt2, SrcReg} = MMod:move_to_native_register(MSt1, Src), - MSt3 = MMod:and_(MSt2, SrcReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, SrcReg} = MMod:and_(MSt2, {free, SrcReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt4, Dest, Rest4} = decode_dest(Rest3, MMod, MSt3), {ListLen, Rest5} = decode_extended_list_header(Rest4), ?TRACE("OP_UPDATE_RECORD ~p, ~p, ~p, ~p, [", [Hint, Size, Src, Dest]), {MSt5, DestReg} = MMod:call_primitive(MSt4, ?PRIM_TERM_ALLOC_TUPLE, [ctx, Size]), - MSt6 = MMod:and_(MSt5, DestReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt6, DestReg} = MMod:and_(MSt5, {free, DestReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt7, ReuseReg} = MMod:move_to_native_register( MSt6, if @@ -2451,20 +2476,19 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt1, MatchState, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), {ListLen, Rest3} = decode_extended_list_header(Rest2), ?TRACE("OP_BS_MATCH ~p, ~p, [", [Fail, MatchState]), - {MSt2, MatchStateReg0} = MMod:copy_to_native_register(MSt1, MatchState), - MSt3 = MMod:and_(MSt2, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), - {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateReg0, 1), - {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateReg0, 2), - MSt6 = MMod:free_native_registers(MSt5, [MatchStateReg0]), - MSt7 = MMod:and_(MSt6, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt8, MatchStateReg1} = MMod:move_to_native_register(MSt7, MatchState), - {MSt9, Rest4, NewMatchState, NewBSOffsetReg} = first_pass_bs_match( - Fail, MatchStateReg1, BSBinaryReg, BSOffsetReg, ListLen, Rest3, MMod, MSt8, State0 + {MSt2, MatchStateReg0} = MMod:move_to_native_register(MSt1, MatchState), + {MSt3, MatchStateReg1} = MMod:and_(MSt2, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, BSBinaryReg} = MMod:get_array_element(MSt3, MatchStateReg1, 1), + {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateReg1, 2), + MSt6 = MMod:free_native_registers(MSt5, [MatchStateReg1]), + {MSt7, BSBinaryReg} = MMod:and_(MSt6, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), + {MSt8, Rest4, MatchStateReg2, NewBSOffsetReg} = first_pass_bs_match( + Fail, MatchStateReg0, BSBinaryReg, BSOffsetReg, ListLen, Rest3, MMod, MSt7, State0 ), ?TRACE("]\n", []), - MSt10 = MMod:free_native_registers(MSt9, [BSBinaryReg, NewBSOffsetReg, NewMatchState]), - ?ASSERT_ALL_NATIVE_FREE(MSt10), - first_pass(Rest4, MMod, MSt10, State0). + MSt9 = MMod:free_native_registers(MSt8, [BSBinaryReg, NewBSOffsetReg, MatchStateReg2]), + ?ASSERT_ALL_NATIVE_FREE(MSt9), + first_pass(Rest4, MMod, MSt9, State0). first_pass_bs_create_bin_compute_size( AtomType, Src, _Size, _SegmentUnit, Fail, AccLiteralSize0, AccSizeReg0, MMod, MSt0, State0 @@ -2540,7 +2564,7 @@ first_pass_bs_create_bin_compute_size( ) when AtomType =:= binary orelse AtomType =:= append orelse AtomType =:= private_append -> MSt1 = verify_is_binary(Src, Fail, MMod, MSt0), {MSt2, Reg} = MMod:copy_to_native_register(MSt1, Src), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 1, Reg), MSt5 = MMod:shift_left(MSt4, Reg, 3), case AccSizeReg0 of @@ -2566,7 +2590,7 @@ first_pass_bs_create_bin_compute_size( MSt1 = verify_is_binary(Src, Fail, MMod, MSt0), {MSt2, Reg0} = MMod:copy_to_native_register(MSt1, Size), {MSt3, Reg1} = MMod:copy_to_native_register(MSt2, Src), - MSt4 = MMod:and_(MSt3, Reg1, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg1} = MMod:and_(MSt3, {free, Reg1}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg1, 1, Reg1), MSt6 = MMod:shift_left(MSt5, Reg1, 3), MSt7 = MMod:if_block(MSt6, {{free, Reg0}, '!=', ?ALL_ATOM}, fun(BSt0) -> @@ -2600,7 +2624,7 @@ first_pass_bs_create_bin_insert_value( {MSt3, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt2, Offset, Size, 8 ), - {MSt3, NewOffset}; + {MSt3, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( utf16, Flags, Src, _Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2612,7 +2636,7 @@ first_pass_bs_create_bin_insert_value( {MSt4, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt3, Offset, Size, 8 ), - {MSt4, NewOffset}; + {MSt4, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( utf32, Flags, Src, _Size, _SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2627,7 +2651,7 @@ first_pass_bs_create_bin_insert_value( {MSt5, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt4, Offset, 4, 8 ), - {MSt5, NewOffset}; + {MSt5, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( integer, Flags, Src, Size, SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2648,7 +2672,7 @@ first_pass_bs_create_bin_insert_value( {MSt7, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt6, Offset, SizeValue, 1 ), - {MSt7, NewOffset}; + {MSt7, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( string, _Flags, Src, Size, SegmentUnit, Fail, CreatedBin, Offset, MMod, MSt0 ) -> @@ -2668,7 +2692,37 @@ first_pass_bs_create_bin_insert_value( {MSt6, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt5, Offset, BitSize, 1 ), - {MSt6, NewOffset}; + {MSt6, NewOffset, CreatedBin}; +first_pass_bs_create_bin_insert_value( + private_append, + _Flags, + Src, + _Size, + _SegmentUnit, + _Fail, + {private_append, BinaryTotalSizeInBytes}, + Offset, + MMod, + MSt0 +) -> + % Special case: first segment is private_append with undefined CreatedBin + % Get original size before reusing + {MSt1, OriginalSize} = term_binary_size(Src, MMod, MSt0), + % Reuse the source binary (content is already there, no need to copy) + {MSt2, CreatedBin} = MMod:call_primitive(MSt1, ?PRIM_TERM_REUSE_BINARY, [ + ctx, {free, Src}, {free, BinaryTotalSizeInBytes} + ]), + MSt3 = MMod:if_block(MSt2, {CreatedBin, '==', ?TERM_INVALID_TERM}, fun(BSt0) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, offset, ?OUT_OF_MEMORY_ATOM + ]) + end), + % Convert original size to bits and update offset + MSt4 = MMod:shift_left(MSt3, OriginalSize, 3), + {MSt5, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( + MMod, MSt4, Offset, OriginalSize, 1 + ), + {MSt5, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( AtomType, _Flags, Src, Size, _SegmentUnit, _Fail, CreatedBin, Offset, MMod, MSt0 ) when AtomType =:= binary orelse AtomType =:= append orelse AtomType =:= private_append -> @@ -2683,11 +2737,11 @@ first_pass_bs_create_bin_insert_value( {MSt3, NewOffset} = first_pass_bs_create_bin_insert_value_increment_offset( MMod, MSt2, Offset, SizeValue, 1 ), - {MSt3, NewOffset}; + {MSt3, NewOffset, CreatedBin}; first_pass_bs_create_bin_insert_value( - _OtherType, _Flag, _Src, _Size, _SegmentUnit, _Fail, _CreatedBin, Offset, _MMod, MSt0 + _OtherType, _Flag, _Src, _Size, _SegmentUnit, _Fail, CreatedBin, Offset, _MMod, MSt0 ) -> - {MSt0, Offset}. + {MSt0, Offset, CreatedBin}. first_pass_bs_create_bin_insert_value_increment_offset(_MMod, MSt0, Offset, Size, Unit) when is_integer(Offset) andalso is_integer(Size) andalso is_integer(Unit) @@ -2772,12 +2826,11 @@ first_pass_bs_match( first_pass_bs_match_skip(MatchState, BSOffsetReg, J1, Rest1, MMod, MSt0) end, % offset needs to be updated in the loop - {MSt2, MatchStateReg1} = MMod:copy_to_native_register(MSt1, NewMatchState), - MSt3 = MMod:and_(MSt2, MatchStateReg1, ?TERM_PRIMARY_CLEAR_MASK), - MSt4 = MMod:move_to_array_element(MSt3, NewBSOffsetReg, MatchStateReg1, 2), - MSt5 = MMod:free_native_registers(MSt4, [MatchStateReg1]), + {MSt2, MatchStateReg1} = MMod:and_(MSt1, NewMatchState, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = MMod:move_to_array_element(MSt2, NewBSOffsetReg, MatchStateReg1, 2), + MSt4 = MMod:free_native_registers(MSt3, [MatchStateReg1]), first_pass_bs_match( - Fail, NewMatchState, BSBinaryReg, NewBSOffsetReg, J2, Rest2, MMod, MSt5, State0 + Fail, NewMatchState, BSBinaryReg, NewBSOffsetReg, J2, Rest2, MMod, MSt4, State0 ). first_pass_bs_match_ensure_at_least( @@ -2862,7 +2915,7 @@ first_pass_bs_match_integer( MSt13 = MMod:free_native_registers(MSt12, [Result, Dest]), case MMod:available_regs(MSt9) of [] -> - MSt14 = MMod:and_(MSt13, MatchState, ?TERM_PRIMARY_CLEAR_MASK), + {MSt14, MatchState} = MMod:and_(MSt13, {free, MatchState}, ?TERM_PRIMARY_CLEAR_MASK), {MSt15, NewBSOffsetReg} = MMod:get_array_element(MSt14, MatchState, 2), MSt16 = MMod:or_(MSt15, MatchState, ?TERM_PRIMARY_BOXED), MSt17 = MMod:add(MSt16, NewBSOffsetReg, NumBits), @@ -2915,13 +2968,13 @@ first_pass_bs_match_binary( ), % Restore BSBinaryReg as it may have been gc'd as well {MSt9, MatchStateReg0} = MMod:copy_to_native_register(MSt8, NewMatchState), - MSt10 = MMod:and_(MSt9, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt10, MatchStateReg0} = MMod:and_(MSt9, {free, MatchStateReg0}, ?TERM_PRIMARY_CLEAR_MASK), MSt11 = MMod:move_array_element(MSt10, MatchStateReg0, 1, BSBinaryReg), MSt12 = MMod:free_native_registers(MSt11, [MatchStateReg0]), {MSt13, ResultTerm} = MMod:call_primitive(MSt12, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ ctx, BSBinaryReg, {free, BSOffseBytesReg}, MatchedBytes ]), - MSt14 = MMod:and_(MSt13, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt14, BSBinaryReg} = MMod:and_(MSt13, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt15, Dest, Rest5} = decode_dest(Rest4, MMod, MSt14), ?TRACE("~p},", [Dest]), MSt16 = MMod:move_to_vm_register(MSt15, ResultTerm, Dest), @@ -2958,10 +3011,10 @@ do_get_tail( ), % Restore BSBinaryReg as it may have been gc'd as well {MSt7, MatchStateReg0} = MMod:copy_to_native_register(MSt6, NewMatchState), - MSt8 = MMod:and_(MSt7, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + {MSt8, MatchStateReg0} = MMod:and_(MSt7, {free, MatchStateReg0}, ?TERM_PRIMARY_CLEAR_MASK), MSt9 = MMod:move_array_element(MSt8, MatchStateReg0, 1, BSBinaryReg), MSt10 = MMod:free_native_registers(MSt9, [MatchStateReg0]), - MSt11 = MMod:and_(MSt10, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt11, BSBinaryReg} = MMod:and_(MSt10, {free, BSBinaryReg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt12, TailBytesReg1} = MMod:get_array_element(MSt11, BSBinaryReg, 1), MSt13 = MMod:sub(MSt12, TailBytesReg0, BSOffseBytesReg), MSt14 = MMod:add(MSt13, BSBinaryReg, ?TERM_PRIMARY_BOXED), @@ -2995,7 +3048,7 @@ first_pass_bs_match_equal_colon_equal( MMod:jump_to_label(BSt0, Fail) end ), - MSt4 = MMod:and_(MSt3, Result, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Result} = MMod:and_(MSt3, {free, Result}, ?TERM_PRIMARY_CLEAR_MASK), {MSt5, IntValue} = MMod:get_array_element(MSt4, {free, Result}, 1), cond_jump_to_label({{free, IntValue}, '!=', PatternValue}, Fail, MMod, MSt5); _ -> @@ -3037,7 +3090,7 @@ term_is_boxed_with_tag_and_get_ptr(Label, Arg1, BoxedTag, MMod, MSt1) -> MSt3 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt5, BoxTagReg} = MMod:get_array_element(MSt4, Reg, 0), MSt6 = cond_jump_to_label( {{free, BoxTagReg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt5 @@ -3062,28 +3115,30 @@ verify_is_function({typed, Func, _Other}, MMod, MSt0) -> ]), {MSt2, Reg}; verify_is_function(Func, MMod, MSt0) -> - {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Func), + {MSt1, Reg} = MMod:move_to_native_register(MSt0, Func), MSt2 = MMod:if_block(MSt1, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ ctx, jit_state, offset, ?BADFUN_ATOM, Reg ]) end), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), - MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:if_block(MSt4, {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> - MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ - ctx, jit_state, offset, ?BADFUN_ATOM, Reg - ]) - end), - MSt6 = MMod:free_native_registers(MSt5, [Reg]), - MMod:move_to_native_register(MSt6, Func). + {MSt3, BoxedPtrReg} = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt4 = MMod:move_array_element(MSt3, BoxedPtrReg, 0, BoxedPtrReg), + MSt5 = MMod:if_block( + MSt4, {BoxedPtrReg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + MMod:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, Reg + ]) + end + ), + MSt6 = MMod:free_native_registers(MSt5, [BoxedPtrReg]), + {MSt6, Reg}. verify_is_binary_or_match_state(Label, Src, MMod, MSt0) -> {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Src), MSt2 = verify_is_boxed(MMod, MSt1, Reg, Label), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_BOXED_TAG_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt6 = cond_raise_badarg_or_jump_to_fail_label( {'and', [ {Reg, '!=', ?TERM_BOXED_REFC_BINARY}, @@ -3099,7 +3154,7 @@ verify_is_binary_or_match_state(Label, Src, MMod, MSt0) -> verify_is_boxed_with_tag(Label, {free, Reg}, BoxedTag, MMod, MSt0) when is_atom(Reg) -> MSt1 = verify_is_boxed(MMod, MSt0, Reg, Label), - MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg), cond_raise_badarg_or_jump_to_fail_label( {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt3 @@ -3107,7 +3162,7 @@ verify_is_boxed_with_tag(Label, {free, Reg}, BoxedTag, MMod, MSt0) when is_atom( verify_is_boxed_with_tag(Label, Arg1, BoxedTag, MMod, MSt1) -> {MSt2, Reg} = MMod:copy_to_native_register(MSt1, Arg1), MSt3 = verify_is_boxed(MMod, MSt2, Reg, Label), - MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt4, Reg} = MMod:and_(MSt3, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt5 = MMod:move_array_element(MSt4, Reg, 0, Reg), cond_raise_badarg_or_jump_to_fail_label( {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', BoxedTag}, Label, MMod, MSt5 @@ -3138,7 +3193,7 @@ verify_is_match_state_and_get_ptr(MMod, MSt0, Src) -> verify_is_match_state_and_get_ptr0(MMod, MSt2, Reg). verify_is_match_state_and_get_ptr0(MMod, MSt0, Reg) -> - MSt1 = MMod:and_(MSt0, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt1, Reg} = MMod:and_(MSt0, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), {MSt2, BoxTag} = MMod:get_array_element(MSt1, Reg, 0), MSt3 = cond_raise_badarg( {{free, BoxTag}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_BIN_MATCH_STATE}, MMod, MSt2 @@ -3207,9 +3262,9 @@ verify_is_any_integer(Arg1, Fail, MMod, MSt0) -> verify_is_binary(Arg1, FailLabel, MMod, MSt0) -> {MSt1, Reg} = MMod:copy_to_native_register(MSt0, Arg1), MSt2 = verify_is_boxed(MMod, MSt1, Reg, FailLabel), - MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt3, Reg} = MMod:and_(MSt2, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:and_(MSt4, Reg, ?TERM_BOXED_TAG_MASK), + {MSt5, Reg} = MMod:and_(MSt4, {free, Reg}, ?TERM_BOXED_TAG_MASK), MSt6 = cond_raise_badarg_or_jump_to_fail_label( {'and', [ {Reg, '!=', ?TERM_BOXED_REFC_BINARY}, @@ -3608,7 +3663,7 @@ term_get_tuple_arity(Tuple, MMod, MSt0) -> {free, TupleReg} -> MMod:move_to_native_register(MSt0, TupleReg); _ -> MMod:copy_to_native_register(MSt0, Tuple) end, - MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg), {MSt4, ArityReg} = MMod:shift_right(MSt3, {free, Reg}, 6), {MSt4, ArityReg}. @@ -3623,7 +3678,7 @@ term_get_map_keys(Map, MMod, MSt0) -> {free, MapReg} -> MMod:move_to_native_register(MSt0, MapReg); _ -> MMod:copy_to_native_register(MSt0, Map) end, - MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = MMod:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 1, Reg), {MSt3, Reg}. @@ -3687,9 +3742,14 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) -> {MSt1, Reg}. term_binary_size({free, BinReg}, MMod, MSt0) -> - MSt1 = MMod:and_(MSt0, BinReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt1, BinReg} = MMod:and_(MSt0, {free, BinReg}, ?TERM_PRIMARY_CLEAR_MASK), MSt2 = MMod:move_array_element(MSt1, BinReg, 1, BinReg), - {MSt2, BinReg}. + {MSt2, BinReg}; +term_binary_size(Src, MMod, MSt0) -> + {MSt1, SrcReg} = MMod:move_to_native_register(MSt0, Src), + {MSt2, SrcReg} = MMod:and_(MSt1, {free, SrcReg}, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = MMod:move_array_element(MSt2, SrcReg, 1, SrcReg), + {MSt3, SrcReg}. term_set_map_assoc(MapPtrReg, {free, PosReg}, {free, Key}, {free, Value}, MMod, MSt0) -> {MSt1, MapKeysReg} = MMod:get_array_element(MSt0, MapPtrReg, 1), @@ -3698,7 +3758,7 @@ term_set_map_assoc(MapPtrReg, {free, PosReg}, {free, Key}, {free, Value}, MMod, MMod:free_native_registers(MSt3, [PosReg, Value]). term_put_tuple_element({free, TupleReg}, PosReg, {free, Value}, MMod, MSt0) -> - MSt1 = MMod:and_(MSt0, TupleReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt1, TupleReg} = MMod:and_(MSt0, {free, TupleReg}, ?TERM_PRIMARY_CLEAR_MASK), MSt2 = MMod:move_to_array_element(MSt1, Value, TupleReg, PosReg, 1), MMod:free_native_registers(MSt2, [TupleReg, Value]). diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 1eba4fba80..234952045e 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -933,7 +933,7 @@ if_block_cond( ) when ?IS_GPR(Reg) -> % AND with mask OffsetBefore = StreamModule:offset(Stream0), - State1 = and_(State0, Reg, Mask), + {State1, Reg} = and_(State0, RegTuple, Mask), Stream1 = State1#state.stream, % Compare with value I2 = jit_aarch64_asm:cmp(Reg, Val), @@ -1945,9 +1945,18 @@ op_imm(#state{stream_module = StreamModule, stream = Stream0} = State, Op, RegA, %% @param Val immediate value to AND %% @return Updated backend state %%----------------------------------------------------------------------------- --spec and_(state(), aarch64_register(), integer()) -> state(). -and_(State, Reg, Val) -> - op_imm(State, and_, Reg, Reg, Val). +and_(State, {free, Reg}, Val) -> + NewState = op_imm(State, and_, Reg, Reg, Val), + {NewState, Reg}; +and_( + #state{available_regs = [ResultReg | T], used_regs = UR} = State, + Reg, + Val +) -> + NewState = op_imm( + State#state{available_regs = T, used_regs = [ResultReg | UR]}, and_, ResultReg, Reg, Val + ), + {NewState, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Perform bitwise OR of a register with an immediate value. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index b051850135..676cfdce93 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -74,6 +74,7 @@ -include_lib("jit.hrl"). -include("primitives.hrl"). +-include("term.hrl"). -define(ASSERT(Expr), true = Expr). @@ -1301,7 +1302,7 @@ if_block_cond( I1 = jit_armv6m_asm:mov(Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), State1 = State0#state{stream = Stream1}, - State2 = and_(State1#state{available_regs = AT}, Temp, Mask), + {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), Stream2 = State2#state.stream, % Compare with value I2 = jit_armv6m_asm:cmp(Temp, Val), @@ -1320,7 +1321,7 @@ if_block_cond( ) when ?IS_GPR(Reg) -> % AND with mask OffsetBefore = StreamModule:offset(Stream0), - State1 = and_(State0, Reg, Mask), + {State1, Reg} = and_(State0, RegTuple, Mask), Stream1 = State1#state.stream, % Compare with value I2 = jit_armv6m_asm:cmp(Reg, Val), @@ -2508,34 +2509,34 @@ get_module_index( %% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to %% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool %% by using BICS for -4. -and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) -> I1 = jit_armv6m_asm:lsls(Reg, Reg, 8), I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8), Stream1 = StreamModule:append(Stream0, <>), - State0#state{stream = Stream1}; + {State0#state{stream = Stream1}, Reg}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, - Reg, + {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), Stream1 = State1#state.stream, I = jit_armv6m_asm:bics(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}; + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, - Reg, + {free, Reg}, Val ) -> State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_armv6m_asm:ands(Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}; + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; and_( #state{stream_module = StreamModule, available_regs = []} = State0, - Reg, + {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> % No available registers, use r0 as temp and save it to r12 @@ -2552,10 +2553,10 @@ and_( % Restore r0 from r12 Restore = jit_armv6m_asm:mov(r0, ?IP_REG), Stream4 = StreamModule:append(Stream3, Restore), - State0#state{stream = Stream4}; + {State0#state{stream = Stream4}, Reg}; and_( #state{stream_module = StreamModule, available_regs = []} = State0, - Reg, + {free, Reg}, Val ) -> % No available registers, use r0 as temp and save it to r12 @@ -2572,7 +2573,17 @@ and_( % Restore r0 from r12 Restore = jit_armv6m_asm:mov(r0, ?IP_REG), Stream4 = StreamModule:append(Stream3, Restore), - State0#state{stream = Stream4}. + {State0#state{stream = Stream4}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} = + State0, + Reg, + ?TERM_PRIMARY_CLEAR_MASK +) -> + I1 = jit_armv6m_asm:lsrs(ResultReg, Reg, 2), + I2 = jit_armv6m_asm:lsls(ResultReg, ResultReg, 2), + Stream1 = StreamModule:append(State0#state.stream, <>), + {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}. or_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index df8e7cf1d6..119e1dbbc1 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -1826,7 +1826,9 @@ get_module_index( Reg }. -and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> +and_(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Val) when + ?IS_GPR(Reg) +-> % 32 bits instructions on x86-64 zero the high 32 bits I1 = if @@ -1834,7 +1836,28 @@ and_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) - true -> jit_x86_64_asm:andq(Val, Reg) end, Stream1 = StreamModule:append(Stream0, I1), - State#state{stream = Stream1}. + {State#state{stream = Stream1}, Reg}; +and_( + #state{ + stream_module = StreamModule, + available_regs = [ResultReg | T], + used_regs = UR, + stream = Stream0 + } = State, + Reg, + Val +) when + ?IS_GPR(Reg) +-> + I1 = jit_x86_64_asm:movq(Reg, ResultReg), + I2 = + if + Val >= 0, Val =< 16#FFFFFFFF -> jit_x86_64_asm:andl(Val, ResultReg); + true -> jit_x86_64_asm:andq(Val, ResultReg) + end, + Stream1 = StreamModule:append(Stream0, I1), + Stream2 = StreamModule:append(Stream1, I2), + {State#state{stream = Stream2, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. or_(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> I1 = jit_x86_64_asm:orq(Val, Reg), diff --git a/libs/jit/src/primitives.hrl b/libs/jit/src/primitives.hrl index 67ff60ecc8..a99fc076df 100644 --- a/libs/jit/src/primitives.hrl +++ b/libs/jit/src/primitives.hrl @@ -72,7 +72,7 @@ -define(PRIM_TERM_FIND_MAP_POS, 49). -define(PRIM_BITSTRING_UTF8_SIZE, 50). -define(PRIM_BITSTRING_UTF16_SIZE, 51). --define(PRIM_TERM_CREATE_EMPTY_BINARY, 52). +-define(PRIM_TERM_CREATE_UNINITIALIZED_BINARY, 52). -define(PRIM_DECODE_FLAGS_LIST, 53). -define(PRIM_BITSTRING_INSERT_UTF8, 54). -define(PRIM_BITSTRING_INSERT_UTF16, 55). @@ -92,6 +92,7 @@ -define(PRIM_BITSTRING_GET_UTF32, 69). -define(PRIM_TERM_COPY_MAP, 70). -define(PRIM_STACKTRACE_BUILD, 71). +-define(PRIM_TERM_REUSE_BINARY, 72). % Parameters to ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS % -define(MEMORY_NO_SHRINK, 0). diff --git a/libs/jit/src/term.hrl b/libs/jit/src/term.hrl index 9270de3244..eca86c623a 100644 --- a/libs/jit/src/term.hrl +++ b/libs/jit/src/term.hrl @@ -74,3 +74,5 @@ -define(REFC_BINARY_MIN_64, 64). -define(TERM_BOXED_REFC_BINARY_SIZE, 6). -define(BINARY_HEADER_SIZE, 2). + +-define(TERM_INVALID_TERM, 0). diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 39bfa963a1..6539f43c61 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -1295,10 +1295,16 @@ static int jit_bitstring_utf16_size(int c) return utf16_size; } -static term jit_term_create_empty_binary(Context *ctx, size_t len) +static term jit_term_create_uninitialized_binary(Context *ctx, size_t len) { - TRACE("jit_term_create_empty_binary: len=%d\n", (int) len); - return term_create_empty_binary(len, &ctx->heap, ctx->global); + TRACE("jit_term_create_uninitialized_binary: len=%d\n", (int) len); + return term_create_uninitialized_binary(len, &ctx->heap, ctx->global); +} + +static term jit_term_reuse_binary(Context *ctx, term src, size_t len) +{ + TRACE("jit_term_reuse_binary: src=0x%lx, len=%d\n", src, (int) len); + return term_reuse_binary(src, len, &ctx->heap, ctx->global); } static int jit_decode_flags_list(Context *ctx, JITState *jit_state, term flags) @@ -1715,7 +1721,7 @@ const ModuleNativeInterface module_native_interface = { jit_term_find_map_pos, jit_bitstring_utf8_size, jit_bitstring_utf16_size, - jit_term_create_empty_binary, + jit_term_create_uninitialized_binary, jit_decode_flags_list, jit_bitstring_insert_utf8, jit_bitstring_insert_utf16, @@ -1734,7 +1740,8 @@ const ModuleNativeInterface module_native_interface = { jit_bitstring_get_utf16, jit_bitstring_get_utf32, term_copy_map, - jit_stacktrace_build + jit_stacktrace_build, + jit_term_reuse_binary }; #endif diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index ee53259886..af31ed3b17 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -158,6 +158,7 @@ struct ModuleNativeInterface term (*bitstring_get_utf32)(term src, int flags_value); term (*term_copy_map)(Context *ctx, term src); term (*stacktrace_build)(Context *ctx); + term (*term_reuse_binary)(Context *ctx, term src, size_t len); }; extern const ModuleNativeInterface module_native_interface; diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index d8fc4106b0..547dbbfe74 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -4073,7 +4073,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) if (UNLIKELY(memory_ensure_free_with_roots(ctx, words + term_binary_heap_size(size_val), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); } - term t = term_create_empty_binary(size_val, &ctx->heap, ctx->global); + term t = term_create_uninitialized_binary(size_val, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = 0; @@ -4121,7 +4124,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) if (UNLIKELY(memory_ensure_free_with_roots(ctx, words + term_binary_heap_size(size_val / 8), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); } - term t = term_create_empty_binary(size_val / 8, &ctx->heap, ctx->global); + term t = term_create_uninitialized_binary(size_val / 8, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = 0; @@ -4529,7 +4535,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) if (UNLIKELY(memory_ensure_free_opt(ctx, term_binary_heap_size(0), MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); } - term t = term_create_empty_binary(0, &ctx->heap, ctx->global); + term t = term_create_uninitialized_binary(0, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = 0; @@ -4594,7 +4603,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) #ifdef IMPL_EXECUTE_LOOP TRACE("bs_append/8, fail=%u size=" AVM_INT_FMT " unit=%u src=0x%" TERM_X_FMT " dreg=%c%i\n", (unsigned) fail, size_val, (unsigned) unit, src, T_DEST_REG(dreg)); src = x_regs[live]; - term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global); + term t = term_create_uninitialized_binary(src_size + size_val / 8, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size); ctx->bs = t; @@ -4641,8 +4653,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) RAISE_ERROR(OUT_OF_MEMORY_ATOM); } DECODE_COMPACT_TERM(src, src_pc) - term t = term_create_empty_binary(src_size + size_val / 8, &ctx->heap, ctx->global); - memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), src_size); + term t = term_reuse_binary(src, src_size + size_val / 8, &ctx->heap, ctx->global); + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } ctx->bs = t; ctx->bs_offset = src_size * 8; @@ -6736,6 +6750,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) // Verify parameters and compute binary size in first iteration #ifdef IMPL_EXECUTE_LOOP size_t binary_size = 0; + term reuse_binary = term_invalid_term(); #endif for (size_t j = 0; j < nb_segments; j++) { term atom_type; @@ -6824,6 +6839,9 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) // We only support src as a binary of bytes here. segment_size = term_binary_size(src); segment_unit = 8; + if (atom_type == PRIVATE_APPEND_ATOM && j == 0) { + reuse_binary = src; + } } else { VERIFY_IS_INTEGER(size, "bs_create_bin/6", fail); avm_int_t signed_size_value = term_to_int(size); @@ -6864,7 +6882,17 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) if (UNLIKELY(memory_ensure_free_with_roots(ctx, alloc + term_binary_heap_size(binary_size / 8), live, x_regs, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); } - term t = term_create_empty_binary(binary_size / 8, &ctx->heap, ctx->global); + term t; + size_t original_size = 0; + if (term_is_invalid_term(reuse_binary)) { + t = term_create_uninitialized_binary(binary_size / 8, &ctx->heap, ctx->global); + } else { + original_size = term_binary_size(reuse_binary); + t = term_reuse_binary(reuse_binary, binary_size / 8, &ctx->heap, ctx->global); + } + if (UNLIKELY(term_is_invalid_term(t))) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } size_t offset = 0; for (size_t j = 0; j < nb_segments; j++) { @@ -6968,6 +6996,10 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) TRACE("bs_create_bin/6: current offset (%d) is not evenly divisible by 8\n", (int) offset); RAISE_ERROR(UNSUPPORTED_ATOM); } + if (reuse_binary == src && j == 0) { + segment_size = original_size * 8; + break; + } uint8_t *dst = (uint8_t *) term_binary_data(t) + (offset / 8); const uint8_t *bin = (const uint8_t *) term_binary_data(src); size_t binary_size = term_binary_size(src); diff --git a/src/libAtomVM/term.c b/src/libAtomVM/term.c index 838fd41eee..eb56bcf0ca 100644 --- a/src/libAtomVM/term.c +++ b/src/libAtomVM/term.c @@ -909,7 +909,7 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex if (IS_NULL_PTR(refc)) { // TODO propagate error to callers of this function, e.g., as an invalid term fprintf(stderr, "memory_create_refc_binary: Unable to allocate %zu bytes for refc_binary.\n", size); - AVM_ABORT(); + return term_invalid_term(); } boxed_value[3] = (term) refc; refc->ref_count = 1; // added to mso list, increment ref count @@ -919,6 +919,72 @@ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContex return ret; } +term term_reuse_binary(term src, size_t size, Heap *heap, GlobalContext *glb) +{ + if (!term_is_refc_binary(src) || term_refc_binary_is_const(src)) { + // Not a refc binary or it's a const refc binary - create a new one + size_t src_size = term_binary_size(src); + term t = term_create_uninitialized_binary(size, heap, glb); + // Copy the source data (up to the smaller of src_size and size) + size_t copy_size = src_size < size ? src_size : size; + memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), copy_size); + return t; + } + + term *boxed_value = term_to_term_ptr(src); + struct RefcBinary *old_refc = (struct RefcBinary *) boxed_value[3]; + size_t old_size = old_refc->size; + + // Only reuse if refcount is 1 (only this term references it) + if (old_refc->ref_count != 1) { + // Can't reuse - create a new binary instead + size_t src_size = term_binary_size(src); + term t = term_create_uninitialized_binary(size, heap, glb); + size_t copy_size = src_size < size ? src_size : size; + memcpy((void *) term_binary_data(t), (void *) term_binary_data(src), copy_size); + return t; + } + + // Lock the list of refc binaries while we're trying to realloc. + struct ListHead *refc_binaries = synclist_wrlock(&glb->refc_binaries); + + // Remove from list before realloc because realloc might move the memory + list_remove(&old_refc->head); + + // Realloc to new size. + size_t n = sizeof(struct RefcBinary) + size; + struct RefcBinary *new_refc = realloc(old_refc, n); + if (IS_NULL_PTR(new_refc)) { + // Re-add to list before unlocking + list_append(refc_binaries, &old_refc->head); + synclist_unlock(&glb->refc_binaries); + fprintf(stderr, "term_reuse_binary: Unable to reallocate %zu bytes for refc_binary.\n", size); + return term_invalid_term(); + } + + // Update size + new_refc->size = size; + + // Zero the new part if size increased + if (size > old_size) { + memset((char *) &new_refc->data + old_size, 0, size - old_size); + } + + // Update the boxed value to point to the new refc BEFORE unlocking + // so other threads see a consistent state + boxed_value[1] = (term) size; + boxed_value[3] = (term) new_refc; + + // Re-add to list after realloc (whether pointer changed or not) + list_append(refc_binaries, &new_refc->head); + + // Unlock the list of refc binaries + synclist_unlock(&glb->refc_binaries); + + // Return the same term (boxed_value pointer hasn't changed) + return src; +} + static term find_binary(term binary_or_state) { term t = binary_or_state; diff --git a/src/libAtomVM/term.h b/src/libAtomVM/term.h index 9a38768bcb..9c925ea5c8 100644 --- a/src/libAtomVM/term.h +++ b/src/libAtomVM/term.h @@ -286,7 +286,8 @@ TermCompareResult term_compare(term t, term other, TermCompareOpts opts, GlobalC * @param is_const designates whether the data pointed to is "const", such as a term literal * @param heap the heap to allocate the binary in * @param glb the global context as refc binaries are global - * @return a term (reference) pointing to the newly allocated binary in the process heap. + * @return a term (reference) pointing to the newly allocated binary in the process heap or + * `term_invalid_term()` if there isn't enough memory to allocate the refc buffer. */ term term_alloc_refc_binary(size_t size, bool is_const, Heap *heap, GlobalContext *glb); @@ -1262,7 +1263,8 @@ static inline const char *term_binary_data(term t) * @param size size of binary data buffer. * @param heap the heap to allocate the binary in * @param glb the global context as refc binaries are global -* @return a term pointing to the boxed binary pointer. +* @return a term pointing to the boxed binary pointer or `term_invalid_term()` +* if there isn't enough memory to allocate the refc buffer */ static inline term term_create_uninitialized_binary(size_t size, Heap *heap, GlobalContext *glb) { @@ -1350,7 +1352,9 @@ static inline void term_set_refc_binary_data(term t, const void *data) static inline term term_from_const_binary(const void *data, size_t size, Heap *heap, GlobalContext *glb) { term binary = term_alloc_refc_binary(size, true, heap, glb); - term_set_refc_binary_data(binary, data); + if (LIKELY(!term_is_invalid_term(binary))) { + term_set_refc_binary_data(binary, data); + } return binary; } @@ -1366,10 +1370,25 @@ static inline term term_from_const_binary(const void *data, size_t size, Heap *h static inline term term_create_empty_binary(size_t size, Heap *heap, GlobalContext *glb) { term t = term_create_uninitialized_binary(size, heap, glb); - memset((char *) term_binary_data(t), 0x00, size); + if (LIKELY(!term_is_invalid_term(t))) { + memset((char *) term_binary_data(t), 0x00, size); + } return t; } +/** +* @brief Reuse a binary. If the binary is a refc binary with a ref count of +* 1, try to reuse it. Otherwise, create a new binary and copy the data. +* +* @details Try to reuse a binary and return a term pointing to it. +* @param src binary to reuse. +* @param size size of binary data buffer. +* @param heap the heap to allocate memory in +* @param glb the global context as refc binaries are global +* @return a term pointing to the boxed binary pointer. +*/ +term term_reuse_binary(term src, size_t size, Heap *heap, GlobalContext *glb); + static inline bool term_normalize_binary_pos_len(term binary, avm_int_t pos, avm_int_t len, BinaryPosLen *pos_len) { avm_int_t size = (avm_int_t) term_binary_size(binary); diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 23291a400c..00e5de8bfe 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -892,7 +892,7 @@ call_bif_with_large_literal_integer_test() -> get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, -4), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, -4), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), @@ -922,7 +922,7 @@ is_integer_test() -> ?BACKEND:jump_to_label(BSt0, Label) end ), - MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), ?BACKEND:if_block( MSt3, @@ -972,7 +972,7 @@ is_number_test() -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), - BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), cond_jump_to_label( {'and', [ @@ -1081,7 +1081,7 @@ call_fun_test() -> ]) end ), - State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), State7 = ?BACKEND:if_block( State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index ceaf926d7d..d4e4802fed 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -107,7 +107,7 @@ call_primitive_6_args_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK), % Get another register for the last parameter to test {free, Reg} handling {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments @@ -1549,7 +1549,7 @@ call_bif_with_large_literal_integer_test() -> get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), @@ -1580,7 +1580,7 @@ is_integer_test() -> ?BACKEND:jump_to_label(BSt0, Label) end ), - MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), ?BACKEND:if_block( MSt3, @@ -1642,7 +1642,7 @@ is_number_test() -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), - BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), cond_jump_to_label( {'and', [ @@ -2187,7 +2187,7 @@ call_fun_test() -> ]) end ), - State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), State7 = ?BACKEND:if_block( State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> @@ -3184,7 +3184,7 @@ and_register_exhaustion_negative_test() -> {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), % Test negative immediate (-4) which should use BICS with r0 as temp - StateResult = ?BACKEND:and_(StateNoRegs, r7, -4), + {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, -4), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << " 0: 6987 ldr r7, [r0, #24]\n" @@ -3210,7 +3210,7 @@ and_register_exhaustion_positive_test() -> {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), % Test positive immediate (0x3F) which should use ANDS with r0 as temp - StateResult = ?BACKEND:and_(StateNoRegs, r7, 16#3F), + {StateResult, r7} = ?BACKEND:and_(StateNoRegs, {free, r7}, 16#3F), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << " 0: 6987 ldr r7, [r0, #24]\n" diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 9aa86b6427..45fc71df51 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -957,7 +957,7 @@ call_bif_with_large_literal_integer_test() -> get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, -4), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, -4), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), @@ -987,7 +987,7 @@ is_integer_test() -> ?BACKEND:jump_to_label(BSt0, Label) end ), - MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), ?BACKEND:if_block( MSt3, @@ -1039,7 +1039,7 @@ is_number_test() -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), - BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), cond_jump_to_label( {'and', [ @@ -1148,7 +1148,7 @@ call_fun_test() -> ]) end ), - State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), State7 = ?BACKEND:if_block( State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> From f28aba99fc46341fcd56bc3d3909a5d8a08adcb7 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 29 Sep 2025 21:49:52 +0200 Subject: [PATCH 02/28] JIT: factorize tail calls to reduce binary size Use a cache to remember tail calls that were already implemented and replace further implementations of the same tail call with a jump to the previous implementation. Coverage shows that all cases are covered in libs/estdlib/src and libs/jit/src: OP_RETURN: 50 misses, 1735 hits (97%) OP_JUMP/OP_CALL_LAST/OP_CALL_ONLY: 656 misses, 389 hits (37%) OP_CALL_LAST: 220 misses, 206 hits (48%) OP_FUNC_INFO: 58 misses, 1619 hits (97%) Signed-off-by: Paul Guyot --- libs/jit/src/jit.erl | 110 +++++++++++++++++++++++++++-------- libs/jit/src/jit_aarch64.erl | 8 +++ libs/jit/src/jit_armv6m.erl | 35 ++++++----- libs/jit/src/jit_x86_64.erl | 8 +++ 4 files changed, 124 insertions(+), 37 deletions(-) diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 9babe4184c..f4b57768bb 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -100,7 +100,8 @@ labels_count :: pos_integer(), atom_resolver :: fun((integer()) -> atom()), literal_resolver :: fun((integer()) -> any()), - type_resolver :: fun((integer()) -> any()) + type_resolver :: fun((integer()) -> any()), + tail_cache :: [{tuple(), non_neg_integer()}] }). -type stream() :: any(). @@ -142,7 +143,8 @@ compile( labels_count = LabelsCount, atom_resolver = AtomResolver, literal_resolver = LiteralResolver, - type_resolver = TypeResolver + type_resolver = TypeResolver, + tail_cache = [] }, {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), MSt3 = second_pass(MMod, MSt2, State1), @@ -170,18 +172,30 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest1, MMod, MSt1, State0); % 2 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_ModuleAtom, Rest1} = decode_atom(Rest0), {_FunctionName, Rest2} = decode_atom(Rest1), {_Arity, Rest3} = decode_literal(Rest2), ?TRACE("OP_FUNC_INFO ~p, ~p, ~p\n", [_ModuleAtom, _FunctionName, _Arity]), - % Implement function clause at the previous label. (TODO: optimize it out to save space) - MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [ - ctx, jit_state, offset, ?FUNCTION_CLAUSE_ATOM - ]), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest3, MMod, MSt1, State0); + % Implement function clause at the previous label. + Offset = MMod:offset(MSt0), + {MSt1, OffsetReg} = MMod:move_to_native_register(MSt0, Offset), + TailCacheKey = {call_primitive_last, ?PRIM_RAISE_ERROR, [OffsetReg, ?FUNCTION_CLAUSE_ATOM]}, + State1 = + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + MSt3 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, {free, OffsetReg}, ?FUNCTION_CLAUSE_ATOM + ]), + State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, CacheOffset} -> + MSt2 = MMod:jump_to_offset(MSt1, CacheOffset), + MSt3 = MMod:free_native_registers(MSt2, [OffsetReg]), + State0 + end, + ?ASSERT_ALL_NATIVE_FREE(MSt3), + first_pass(Rest3, MMod, MSt3, State1); % 3 first_pass( <>, MMod, MSt0, #state{labels_count = LabelsCount} = State @@ -203,26 +217,56 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State0); % 5 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), {NWords, Rest3} = decode_literal(Rest2), ?TRACE("OP_CALL_LAST ~p, ~p, ~p\n", [_Arity, Label, NWords]), - MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), - MSt2 = MMod:increment_sp(MSt1, NWords + 1), - MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + TailCacheKey0 = {op_call_last, NWords, Label}, + case lists:keyfind(TailCacheKey0, 1, TC) of + false -> + Offset0 = MMod:offset(MSt0), + MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), + MSt2 = MMod:increment_sp(MSt1, NWords + 1), + TailCacheKey1 = {op_call_only, Label}, + case lists:keyfind(TailCacheKey1, 1, TC) of + false -> + Offset1 = MMod:offset(MSt2), + MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + State1 = State0#state{ + tail_cache = [{TailCacheKey1, Offset1}, {TailCacheKey0, Offset0} | TC] + }; + {TailCacheKey1, Offset1} -> + MSt3 = MMod:jump_to_offset(MSt2, Offset1), + State1 = State0#state{ + tail_cache = [{TailCacheKey0, Offset0} | TC] + } + end; + {TailCacheKey0, Offset0} -> + MSt3 = MMod:jump_to_offset(MSt0, Offset0), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt3), - first_pass(Rest3, MMod, MSt3, State0); + first_pass(Rest3, MMod, MSt3, State1); % 6 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), ?TRACE("OP_CALL_ONLY ~p, ~p\n", [_Arity, Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest2, MMod, MSt1, State0); + first_pass(Rest2, MMod, MSt1, State1); % 7 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -348,7 +392,7 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 19 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_RETURN\n", []), % Optimized return: check if returning within same module @@ -371,9 +415,18 @@ first_pass(<>, MMod, MSt0, State0) -> ), MSt5 = MMod:free_native_registers(MSt4, [CpReg0]), % Different module: use existing slow path - MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + TailCacheKey = {call_primitive_last, ?PRIM_RETURN}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt5), + MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt6 = MMod:jump_to_offset(MSt5, Offset), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt6), - first_pass(Rest, MMod, MSt6, State0); + first_pass(Rest, MMod, MSt6, State1); % 20 first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), @@ -836,13 +889,22 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 61 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt0, #state{tail_cache = TC} = State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_JUMP ~p\n", [Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest1, MMod, MSt1, State0); + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}); + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0) + end; % 62 % Same implementation as OP_TRY, to confirm. first_pass(<>, MMod, MSt0, State0) -> diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 234952045e..48f4202bee 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -38,6 +38,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -531,6 +532,13 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + Rel = TargetOffset - Offset, + I1 = jit_aarch64_asm:b(Rel), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. %% This is used for optimized intra-module returns. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 676cfdce93..f3269588c0 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -38,6 +38,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -727,6 +728,12 @@ jump_to_label( Stream1 = StreamModule:append(Stream0, CodeBlock), State1#state{stream = Stream1}. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register %% The continuation points to a function prologue, so we need to compute @@ -789,15 +796,14 @@ jump_to_continuation( % Free all registers as this is a terminal instruction State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}. -branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) when - LabelOffset - Offset =< 2050, LabelOffset - Offset >= -2044 +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 -> % Near branch: use direct B instruction - Rel = LabelOffset - Offset, - CodeBlock = jit_armv6m_asm:b(Rel), - {State, CodeBlock}; -branch_to_label_code( - #state{available_regs = [TempReg | _]} = State0, Offset, Label, {Label, LabelOffset} + Rel = TargetOffset - Offset, + jit_armv6m_asm:b(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, Offset, TargetOffset ) -> % Far branch: use register-based sequence, need temporary register if @@ -808,19 +814,22 @@ branch_to_label_code( I3 = jit_armv6m_asm:bx(TempReg), % Unaligned : need nop I4 = jit_armv6m_asm:nop(), - LiteralValue = LabelOffset - Offset - 5, + LiteralValue = TargetOffset - Offset - 5, I5 = <>, - CodeBlock = <>; + <>; true -> % Unaligned I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), I2 = jit_armv6m_asm:add(TempReg, pc), I3 = jit_armv6m_asm:bx(TempReg), - LiteralValue = LabelOffset - Offset - 5, + LiteralValue = TargetOffset - Offset - 5, I4 = <>, - CodeBlock = <> - end, - {State0, CodeBlock}; + <> + end. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; branch_to_label_code( #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false ) -> diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 119e1dbbc1..cb3e5ae8a3 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -38,6 +38,7 @@ return_if_not_equal_to_ctx/2, jump_to_label/2, jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -524,6 +525,13 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + RelOffset = TargetOffset - Offset, + I1 = jit_x86_64_asm:jmp(RelOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + %%----------------------------------------------------------------------------- %% @doc Jump to a continuation address stored in a register. %% This is used for optimized intra-module returns. From a95a378b57c297cf48d9ea29a2f89831add27459 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 29 Sep 2025 23:24:21 +0200 Subject: [PATCH 03/28] armv6m: use literal pool to reduce binary size Signed-off-by: Paul Guyot --- libs/estdlib/src/code_server.erl | 5 +- libs/jit/src/jit_armv6m.erl | 89 +++-- tests/libs/jit/jit_armv6m_tests.erl | 577 +++++++++++++++------------- 3 files changed, 364 insertions(+), 307 deletions(-) diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index 427d5fa529..69aa359327 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -174,11 +174,12 @@ load(Module) -> BackendModule, BackendState0 ), - Stream1 = BackendModule:stream(BackendState1), + BackendState2 = BackendModule:flush(BackendState1), + Stream1 = BackendModule:stream(BackendState2), code_server:set_native_code(Module, LabelsCount, Stream1), End = erlang:system_time(millisecond), io:format("~B ms (bytecode: ~B bytes, native code: ~B bytes)\n", [ - End - Start, byte_size(Code), BackendModule:offset(BackendState1) + End - Start, byte_size(Code), BackendModule:offset(BackendState2) ]) catch T:V:S -> diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index f3269588c0..9e602c57d8 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -135,7 +135,8 @@ available_regs :: [armv6m_register()], used_regs :: [armv6m_register()], labels :: [{integer() | reference(), integer()}], - variant :: non_neg_integer() + variant :: non_neg_integer(), + literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}] }). -type state() :: #state{}. @@ -248,7 +249,8 @@ new(Variant, StreamModule, Stream) -> available_regs = ?AVAILABLE_REGS, used_regs = [], labels = [], - variant = Variant + variant = Variant, + literal_pool = [] }. %%----------------------------------------------------------------------------- @@ -633,7 +635,8 @@ call_primitive_last( State2 = set_registers_args(State1, ArgsForTailCall, 0), tail_call_with_jit_state_registers_only(State2, Temp) end, - State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State5). %%----------------------------------------------------------------------------- %% @doc Tail call to address in register, restoring prolog registers including @@ -726,13 +729,15 @@ jump_to_label( Offset = StreamModule:offset(Stream0), {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), - State1#state{stream = Stream1}. + State2 = State1#state{stream = Stream1}, + flush_literal_pool(State2). jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> Offset = StreamModule:offset(Stream0), CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), Stream1 = StreamModule:append(Stream0, CodeBlock), - State#state{stream = Stream1}. + State2 = State#state{stream = Stream1}, + flush_literal_pool(State2). %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register @@ -794,7 +799,8 @@ jump_to_continuation( Code = <>, Stream2 = StreamModule:append(State1#state.stream, Code), % Free all registers as this is a terminal instruction - State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}. + State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State2). branch_to_offset_code(_State, Offset, TargetOffset) when TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 @@ -1737,7 +1743,7 @@ set_registers_args( UsedRegs, Args ), - State0#state{ + State1#state{ stream = Stream1, available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) @@ -2631,41 +2637,42 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re I2 = jit_armv6m_asm:negs(Reg, Reg), Stream1 = StreamModule:append(Stream0, <>), State#state{stream = Stream1}; -mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> - %% Use a literal pool with a branch instruction (branch-over pattern) - %% Calculate where literal will be placed (must be word-aligned) - %% After LDR (2 bytes) + Branch (2 bytes) = 4 bytes from current position - CurrentOffset = StreamModule:offset(Stream0), - OffsetAfterInstructions = CurrentOffset + 4, - %% Find next word-aligned position for literal - LiteralPosition = - case OffsetAfterInstructions rem 4 of - % Already aligned - 0 -> OffsetAfterInstructions; - % Add 2 bytes padding to align - _ -> OffsetAfterInstructions + 2 +mov_immediate( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State, Reg, Val +) -> + LdrInstructionAddr = StreamModule:offset(Stream0), + I1 = jit_armv6m_asm:ldr(Reg, {pc, 0}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1, literal_pool = [{LdrInstructionAddr, Reg, Val} | LP]}. + +flush_literal_pool(#state{literal_pool = []} = State) -> + State; +flush_literal_pool( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State +) -> + % Align + Offset = StreamModule:offset(Stream0), + Stream1 = + if + Offset rem 4 =:= 0 -> Stream0; + true -> StreamModule:append(Stream0, <<0:16>>) end, - PaddingNeeded = LiteralPosition - OffsetAfterInstructions, - - %% Calculate LDR PC-relative offset - %% PC = (current_instruction_address & ~3) + 4 - LdrInstructionAddr = CurrentOffset, - LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, - LiteralOffset = LiteralPosition - LdrPC, - - %% Generate: ldr rTemp, [pc, #LiteralOffset] ; Load from literal - I1 = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), - %% Calculate branch offset - %% Branch is at CurrentOffset + 2, need to jump past literal - BranchPosition = CurrentOffset + 2, - % After the 4-byte literal - TargetPosition = LiteralPosition + 4, - BranchOffset = TargetPosition - BranchPosition, - I2 = jit_armv6m_asm:b(BranchOffset), - %% Generate padding if needed (just zeros) - Padding = <<0:(PaddingNeeded * 8)>>, - Stream1 = StreamModule:append(Stream0, <>), - State#state{stream = Stream1}. + % Lay all values and update ldr instructions + Stream2 = lists:foldl( + fun({LdrInstructionAddr, Reg, Val}, AccStream) -> + LiteralPosition = StreamModule:offset(AccStream), + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + LdrInstruction = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), + AccStream1 = StreamModule:append(AccStream, <>), + StreamModule:replace( + AccStream1, LdrInstructionAddr, LdrInstruction + ) + end, + Stream1, + lists:reverse(LP) + ), + State#state{stream = Stream2, literal_pool = []}. sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when (Val >= 0 andalso Val =< 255) orelse is_atom(Val) diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index d4e4802fed..4f78d06163 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -312,13 +312,13 @@ call_primitive_last_5_args_test() -> " 6: 9700 str r7, [sp, #0]\n" " 8: 9902 ldr r1, [sp, #8]\n" " a: 2204 movs r2, #4\n" - " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" - " e: e001 b.n 0x14\n" - " 10: 02cb lsrs r3, r1, #16\n" - " 12: 0000 movs r0, r0\n" - " 14: 47b0 blx r6\n" - " 16: b002 add sp, #8\n" - " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: 47b0 blx r6\n" + " 10: b002 add sp, #8\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + % Literal pool + " 14: 02cb lsls r3, r1, #11\n" + " 16: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -535,17 +535,19 @@ if_block_test_() -> ?BACKEND:add(BSt0, RegB, 2) end ), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" - " 6: da04 bge.n 0x12\n" - " 8: 0400 lsls r0, r0, #16\n" - " a: 0000 movs r0, r0\n" - " c: 42af cmp r7, r5\n" - " e: dafe bge.n 0xe\n" - " 10: 3602 adds r6, #2" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: da01 bge.n 0xc\n" + " 8: dafe bge.n 0x8\n" + " a: 3602 adds r6, #2\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 0400 lsls r0, r0, #16\n" + " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -713,17 +715,19 @@ if_block_test_() -> ?BACKEND:add(BSt0, RegB, 1) end ), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), Dump = << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 69c6 ldr r6, [r0, #28]\n" - " 4: 4d00 ldr r5, [pc, #0] ; (0x8)\n" - " 6: e001 b.n 0xc\n" - " 8: 07cb lsls r3, r1, #31\n" - " a: 0000 movs r0, r0\n" - " c: 42af cmp r7, r5\n" - " e: d000 beq.n 0x12\n" - " 10: 3601 adds r6, #1" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: 42af cmp r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3601 adds r6, #1\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 07cb lsls r3, r1, #31\n" + " 12: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -1395,35 +1399,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 128: 3f01 subs r7, #1\n" " 12a: 60b7 str r7, [r6, #8]\n" " 12c: d004 beq.n 0x138\n" - " 12e: e011 b.n 0x154\n" + " 12e: e00f b.n 0x150\n" " 130: 46c0 nop ; (mov r8, r8)\n" " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" - " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" - " 13c: e002 b.n 0x144\n" - " 13e: 0000 movs r0, r0\n" - " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" - " 144: 19f6 adds r6, r6, r7\n" - " 146: 9f00 ldr r7, [sp, #0]\n" - " 148: 607e str r6, [r7, #4]\n" - " 14a: 6897 ldr r7, [r2, #8]\n" - " 14c: 9e05 ldr r6, [sp, #20]\n" - " 14e: 9705 str r7, [sp, #20]\n" - " 150: 46b6 mov lr, r6\n" - " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 154: 6817 ldr r7, [r2, #0]\n" - " 156: 9e05 ldr r6, [sp, #20]\n" - " 158: 9705 str r7, [sp, #20]\n" - " 15a: 46b6 mov lr, r6\n" - " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 15e: 46c0 nop ; (mov r8, r8)\n" - " 160: 6857 ldr r7, [r2, #4]\n" - " 162: 9e05 ldr r6, [sp, #20]\n" - " 164: 9705 str r7, [sp, #20]\n" - " 166: 46b6 mov lr, r6\n" - " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#124), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1459,35 +1461,33 @@ call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> " 128: 3f01 subs r7, #1\n" " 12a: 60b7 str r7, [r6, #8]\n" " 12c: d004 beq.n 0x138\n" - " 12e: e011 b.n 0x154\n" + " 12e: e00f b.n 0x150\n" " 130: 46c0 nop ; (mov r8, r8)\n" " 132: 46c0 nop ; (mov r8, r8)\n" " 134: 46c0 nop ; (mov r8, r8)\n" " 136: 46c0 nop ; (mov r8, r8)\n" " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" - " 13a: 4e01 ldr r6, [pc, #4] ; (0x140)\n" - " 13c: e002 b.n 0x144\n" - " 13e: 0000 movs r0, r0\n" - " 140: fedd ffff stcl2 15, cr13, [sp, #-1020] ; 0xfffffc04\n" - " 144: 19f6 adds r6, r6, r7\n" - " 146: 9f00 ldr r7, [sp, #0]\n" - " 148: 607e str r6, [r7, #4]\n" - " 14a: 6897 ldr r7, [r2, #8]\n" - " 14c: 9e05 ldr r6, [sp, #20]\n" - " 14e: 9705 str r7, [sp, #20]\n" - " 150: 46b6 mov lr, r6\n" - " 152: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 154: 6817 ldr r7, [r2, #0]\n" - " 156: 9e05 ldr r6, [sp, #20]\n" - " 158: 9705 str r7, [sp, #20]\n" - " 15a: 46b6 mov lr, r6\n" - " 15c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 15e: 46c0 nop ; (mov r8, r8)\n" - " 160: 6857 ldr r7, [r2, #4]\n" - " 162: 9e05 ldr r6, [sp, #20]\n" - " 164: 9705 str r7, [sp, #20]\n" - " 166: 46b6 mov lr, r6\n" - " 168: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, {_, RelevantBinary} = split_binary(Stream, 16#122), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1517,32 +1517,31 @@ call_bif_with_large_literal_integer_test() -> " c: bc05 pop {r0, r2}\n" " e: 6bd6 ldr r6, [r2, #60] ; 0x3c\n" " 10: b4c5 push {r0, r2, r6, r7}\n" - " 12: 4901 ldr r1, [pc, #4] ; (0x18)\n" - " 14: e002 b.n 0x1c\n" - " 16: 0000 movs r0, r0\n" - " 18: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" - " 1c: 47b0 blx r6\n" - " 1e: 4605 mov r5, r0\n" - " 20: bcc5 pop {r0, r2, r6, r7}\n" - " 22: b405 push {r0, r2}\n" - " 24: b082 sub sp, #8\n" - " 26: 9500 str r5, [sp, #0]\n" - " 28: 2100 movs r1, #0\n" - " 2a: 2201 movs r2, #1\n" - " 2c: 6983 ldr r3, [r0, #24]\n" - " 2e: 47b8 blx r7\n" - " 30: 4607 mov r7, r0\n" - " 32: b002 add sp, #8\n" - " 34: bc05 pop {r0, r2}\n" - " 36: 2f00 cmp r7, #0\n" - " 38: d105 bne.n 0x46\n" - " 3a: 6997 ldr r7, [r2, #24]\n" - " 3c: 223c movs r2, #60 ; 0x3c\n" - " 3e: 9e05 ldr r6, [sp, #20]\n" - " 40: 9705 str r7, [sp, #20]\n" - " 42: 46b6 mov lr, r6\n" - " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 46: 6187 str r7, [r0, #24]" + " 12: 490b ldr r1, [pc, #44] ; (0x40)\n" + " 14: 47b0 blx r6\n" + " 16: 4605 mov r5, r0\n" + " 18: bcc5 pop {r0, r2, r6, r7}\n" + " 1a: b405 push {r0, r2}\n" + " 1c: b082 sub sp, #8\n" + " 1e: 9500 str r5, [sp, #0]\n" + " 20: 2100 movs r1, #0\n" + " 22: 2201 movs r2, #1\n" + " 24: 6983 ldr r3, [r0, #24]\n" + " 26: 47b8 blx r7\n" + " 28: 4607 mov r7, r0\n" + " 2a: b002 add sp, #8\n" + " 2c: bc05 pop {r0, r2}\n" + " 2e: 2f00 cmp r7, #0\n" + " 30: d108 bne.n 0x44\n" + " 32: 6997 ldr r7, [r2, #24]\n" + " 34: 2234 movs r2, #52 ; 0x34\n" + " 36: 9e05 ldr r6, [sp, #20]\n" + " 38: 9705 str r7, [sp, #20]\n" + " 3a: 46b6 mov lr, r6\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3e: 0000 movs r0, r0\n" + " 40: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" + " 44: 6187 str r7, [r0, #24]" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1879,48 +1878,46 @@ wait_timeout_test() -> Stream = ?BACKEND:stream(State10), Dump = << - " 0: a707 add r7, pc, #28 ; (adr r7, 0x22)\n" + " 0: a706 add r7, pc, #24 ; (adr r7, 0x1c)\n" " 2: 3701 adds r7, #1\n" " 4: 9e00 ldr r6, [sp, #0]\n" " 6: 6077 str r7, [r6, #4]\n" - " 8: 4f00 ldr r7, [pc, #0] ; (0xc)\n" - " a: e001 b.n 0x10\n" - " c: 1388 asrs r0, r1, #14\n" - " e: 0000 movs r0, r0\n" - " 10: 6f96 ldr r6, [r2, #120] ; 0x78\n" - " 14: 463a mov r2, r7\n" - " 16: 232a movs r3, #42 ; 0x2a\n" - " 18: 9f05 ldr r7, [sp, #20]\n" - " 1a: 9605 str r6, [sp, #20]\n" - " 1c: 46be mov lr, r7\n" - " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 20: 46c0 nop ; (mov r8, r8)\n" - " 22: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 24: 6d57 ldr r7, [r2, #84] ; 0x54\n" - " 26: b405 push {r0, r2}\n" - " 28: 9902 ldr r1, [sp, #8]\n" - " 2a: 47b8 blx r7\n" - " 2c: 4607 mov r7, r0\n" - " 2e: bc05 pop {r0, r2}\n" - " 30: 4287 cmp r7, r0\n" - " 32: d001 beq.n 0x38\n" - " 34: 4638 mov r0, r7\n" - " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 38: 2784 movs r7, #132 ; 0x84\n" - " 3a: 59d7 ldr r7, [r2, r7]\n" - " 3c: b405 push {r0, r2}\n" - " 3e: 2102 movs r1, #2\n" - " 40: 47b8 blx r7\n" - " 42: 4607 mov r7, r0\n" - " 44: bc05 pop {r0, r2}\n" - " 46: 2f00 cmp r7, #0\n" - " 48: d105 bne.n 0x56\n" - " 4a: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" - " 4c: 222a movs r2, #42 ; 0x2a\n" - " 4e: 9e05 ldr r6, [sp, #20]\n" - " 50: 9705 str r7, [sp, #20]\n" - " 52: 46b6 mov lr, r6\n" - " 54: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 8: 4f03 ldr r7, [pc, #12] ; (0x18)\n" + " a: 6f96 ldr r6, [r2, #120] ; 0x78\n" + " c: 463a mov r2, r7\n" + " e: 232a movs r3, #42 ; 0x2a\n" + " 10: 9f05 ldr r7, [sp, #20]\n" + " 12: 9605 str r6, [sp, #20]\n" + " 14: 46be mov lr, r7\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: 1388 asrs r0, r1, #14\n" + " 1a: 0000 movs r0, r0\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 20: b405 push {r0, r2}\n" + " 22: 9902 ldr r1, [sp, #8]\n" + " 24: 47b8 blx r7\n" + " 26: 4607 mov r7, r0\n" + " 28: bc05 pop {r0, r2}\n" + " 2a: 4287 cmp r7, r0\n" + " 2c: d001 beq.n 0x32\n" + " 2e: 4638 mov r0, r7\n" + " 30: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 32: 2784 movs r7, #132 ; 0x84\n" + " 34: 59d7 ldr r7, [r2, r7]\n" + " 36: b405 push {r0, r2}\n" + " 38: 2102 movs r1, #2\n" + " 3a: 47b8 blx r7\n" + " 3c: 4607 mov r7, r0\n" + " 3e: bc05 pop {r0, r2}\n" + " 40: 2f00 cmp r7, #0\n" + " 42: d105 bne.n 0x50\n" + " 44: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" + " 46: 222a movs r2, #42 ; 0x2a\n" + " 48: 9e05 ldr r6, [sp, #20]\n" + " 4a: 9705 str r7, [sp, #20]\n" + " 4c: 46b6 mov lr, r6\n" + " 4e: bdf2 pop {r1, r4, r5, r6, r7, pc}" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2224,55 +2221,55 @@ call_fun_test() -> " 24: 2403 movs r4, #3\n" " 26: 4025 ands r5, r4\n" " 28: 2d02 cmp r5, #2\n" - " 2a: d00c beq.n 0x46\n" + " 2a: d00b beq.n 0x44\n" " 2c: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" " 2e: b082 sub sp, #8\n" " 30: 9600 str r6, [sp, #0]\n" " 32: 9902 ldr r1, [sp, #8]\n" " 34: 222e movs r2, #46 ; 0x2e\n" - " 36: 4b01 ldr r3, [pc, #4] ; (0x3c)\n" - " 38: e002 b.n 0x40\n" - " 3a: 0000 movs r0, r0\n" - " 3c: 018b lsls r3, r1, #6\n" + " 36: 4b02 ldr r3, [pc, #8] ; (0x40)\n" + " 38: 47b8 blx r7\n" + " 3a: b002 add sp, #8\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 3e: 0000 movs r0, r0\n" - " 40: 47b8 blx r7\n" - " 42: b002 add sp, #8\n" - " 44: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 46: 2503 movs r5, #3\n" - " 48: 43ae bics r6, r5\n" - " 4a: 6836 ldr r6, [r6, #0]\n" - " 4c: 4635 mov r5, r6\n" - " 4e: 243f movs r4, #63 ; 0x3f\n" - " 50: 4025 ands r5, r4\n" - " 52: 2d14 cmp r5, #20\n" - " 54: d00b beq.n 0x6e\n" - " 56: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" - " 58: b082 sub sp, #8\n" - " 5a: 9600 str r6, [sp, #0]\n" - " 5c: 9902 ldr r1, [sp, #8]\n" - " 5e: 2258 movs r2, #88 ; 0x58\n" - " 60: 4b00 ldr r3, [pc, #0] ; (0x64)\n" - " 62: e001 b.n 0x68\n" - " 64: 018b lsls r3, r1, #6\n" + " 40: 018b lsls r3, r1, #6\n" + " 42: 0000 movs r0, r0\n" + " 44: 2503 movs r5, #3\n" + " 46: 43ae bics r6, r5\n" + " 48: 6836 ldr r6, [r6, #0]\n" + " 4a: 4635 mov r5, r6\n" + " 4c: 243f movs r4, #63 ; 0x3f\n" + " 4e: 4025 ands r5, r4\n" + " 50: 2d14 cmp r5, #20\n" + " 52: d00b beq.n 0x6c\n" + " 54: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 56: b082 sub sp, #8\n" + " 58: 9600 str r6, [sp, #0]\n" + " 5a: 9902 ldr r1, [sp, #8]\n" + " 5c: 2256 movs r2, #86 ; 0x56\n" + " 5e: 4b02 ldr r3, [pc, #8] ; (0x68)\n" + " 60: 47b8 blx r7\n" + " 62: b002 add sp, #8\n" + " 64: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" " 66: 0000 movs r0, r0\n" - " 68: 47b8 blx r7\n" - " 6a: b002 add sp, #8\n" - " 6c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 6e: 9d00 ldr r5, [sp, #0]\n" - " 70: 682e ldr r6, [r5, #0]\n" - " 72: 6836 ldr r6, [r6, #0]\n" - " 74: 0636 lsls r6, r6, #24\n" - " 76: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" - " 78: 432e orrs r6, r5\n" - " 7a: 65c6 str r6, [r0, #92] ; 0x5c\n" - " 7c: 2680 movs r6, #128 ; 0x80\n" - " 7e: 5996 ldr r6, [r2, r6]\n" - " 80: 463a mov r2, r7\n" - " 82: 2300 movs r3, #0\n" - " 84: 9f05 ldr r7, [sp, #20]\n" - " 86: 9605 str r6, [sp, #20]\n" - " 88: 46be mov lr, r7\n" - " 8a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 68: 018b lsls r3, r1, #6\n" + " 6a: 0000 movs r0, r0\n" + " 6c: 9d00 ldr r5, [sp, #0]\n" + " 6e: 682e ldr r6, [r5, #0]\n" + " 70: 6836 ldr r6, [r6, #0]\n" + " 72: 0636 lsls r6, r6, #24\n" + " 74: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" + " 76: 432e orrs r6, r5\n" + " 78: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 7a: 2680 movs r6, #128 ; 0x80\n" + " 7c: 5996 ldr r6, [r2, r6]\n" + " 7e: 463a mov r2, r7\n" + " 80: 2300 movs r3, #0\n" + " 82: 9f05 ldr r7, [sp, #20]\n" + " 84: 9605 str r6, [sp, #20]\n" + " 86: 46be mov lr, r7\n" + " 88: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 8a: 0000 movs r0, r0\n" " 8c: 0240 lsls r0, r0, #9\n" " 8e: 0000 movs r0, r0\n" " 90: b5f2 push {r1, r4, r5, r6, r7, lr}" @@ -2281,7 +2278,8 @@ call_fun_test() -> move_to_vm_register_test0(State, Source, Dest, Dump) -> State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). move_to_vm_register_test_() -> @@ -2294,88 +2292,101 @@ move_to_vm_register_test_() -> ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << " 0: 2700 movs r7, #0\n" - " 2: 6187 str r7, [r0, #24]" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << " 0: 2700 movs r7, #0\n" - " 2: 6587 str r7, [r0, #88] ; 0x58" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {ptr, r6}, << " 0: 2700 movs r7, #0\n" - " 2: 6037 str r7, [r6, #0]" + " 2: 6037 str r7, [r6, #0]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << " 0: 2600 movs r6, #0\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 60be str r6, [r7, #8]" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << " 0: 2600 movs r6, #0\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 653e str r6, [r7, #80] ; 0x50" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6187 str r7, [r0, #24]" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << " 0: 272a movs r7, #42 ; 0x2a\n" - " 2: 6587 str r7, [r0, #88] ; 0x58" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << " 0: 262a movs r6, #42 ; 0x2a\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 60be str r6, [r7, #8]" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << " 0: 262a movs r6, #42 ; 0x2a\n" " 2: 6947 ldr r7, [r0, #20]\n" - " 4: 653e str r6, [r7, #80] ; 0x50" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 99, {ptr, r3}, << " 0: 2763 movs r7, #99 ; 0x63\n" - " 2: 601f str r7, [r3, #0]" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 6207 str r7, [r0, #32]" + " 2: 6207 str r7, [r0, #32]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, << " 0: 69c7 ldr r7, [r0, #28]\n" - " 2: 600f str r7, [r1, #0]" + " 2: 600f str r7, [r1, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: ptr to x_reg ?_test(begin move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, << " 0: 6827 ldr r7, [r4, #0]\n" - " 2: 6247 str r7, [r0, #36] ; 0x24" + " 2: 6247 str r7, [r0, #36] ; 0x24\n" + " 4: e07c b.n 0x100" >>) end), %% Test: x_reg to y_reg @@ -2383,7 +2394,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << " 0: 6987 ldr r7, [r0, #24]\n" " 2: 6946 ldr r6, [r0, #20]\n" - " 4: 6077 str r7, [r6, #4]" + " 4: 6077 str r7, [r6, #4]\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to x_reg @@ -2391,7 +2403,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6837 ldr r7, [r6, #0]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to y_reg @@ -2399,41 +2412,47 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6877 ldr r7, [r6, #4]\n" - " 4: 6247 str r7, [r0, #36] ; 0x24" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, r5, {x_reg, 0}, << - " 0: 6185 str r5, [r0, #24]" + " 0: 6185 str r5, [r0, #24]\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, r6, {x_reg, extra}, << - " 0: 6586 str r6, [r0, #88] ; 0x58" + " 0: 6586 str r6, [r0, #88] ; 0x58\n" + " 2: e07d b.n 0x100" >>) end), %% Test: Native register to ptr ?_test(begin move_to_vm_register_test0(State0, r4, {ptr, r3}, << - " 0: 601c str r4, [r3, #0]" + " 0: 601c str r4, [r3, #0]\n" + " 2: e07d b.n 0x100" >>) end), %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, r1, {y_reg, 0}, << " 0: 6947 ldr r7, [r0, #20]\n" - " 2: 6039 str r1, [r7, #0]" + " 2: 6039 str r1, [r7, #0]\n" + " 4: e07c b.n 0x100" >>) end), %% Test: Large immediate to x_reg (32-bit literal pool, aligned case) ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6187 str r7, [r0, #24]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case) @@ -2442,55 +2461,57 @@ move_to_vm_register_test_() -> State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), %% Then do large immediate which should handle unaligned case State2 = ?BACKEND:move_to_vm_register(State1, 16#12345678, {x_reg, 0}), - Stream = ?BACKEND:stream(State2), + State3 = ?BACKEND:jump_to_offset(State2, 16#100), + Stream = ?BACKEND:stream(State3), Expected = dump_to_bin(<< " 0: 6019 str r1, [r3, #0]\n" " 2: 4f01 ldr r7, [pc, #4] ; (0x8)\n" - " 4: e002 b.n 0xc\n" - " 6: 0000 movs r0, r0\n" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100\n" " 8: 5678 ldrsb r0, [r7, r1]\n" - " a: 1234 asrs r4, r6, #8\n" - " c: 6187 str r7, [r0, #24]" + " a: 1234 asrs r4, r6, #8" >>), ?assertEqual(Expected, Stream) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6587 str r7, [r0, #88] ; 0x58" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6946 ldr r6, [r0, #20]\n" - " a: 60b7 str r7, [r6, #8]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 60b7 str r7, [r6, #8]\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 6946 ldr r6, [r0, #20]\n" - " a: 6537 str r7, [r6, #80] ; 0x50" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 6537 str r7, [r6, #80] ; 0x50\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: Large immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 5678 ldrsb r0, [r7, r1]\n" - " 6: 1234 asrs r4, r6, #8\n" - " 8: 601f str r7, [r3, #0]" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" >>) end), %% Test: x_reg to y_reg (high index) @@ -2498,7 +2519,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << " 0: 6d47 ldr r7, [r0, #84] ; 0x54\n" " 2: 6946 ldr r6, [r0, #20]\n" - " 4: 67f7 str r7, [r6, #124] ; 0x7c" + " 4: 67f7 str r7, [r6, #124] ; 0x7c\n" + " 6: e07b b.n 0x100" >>) end), %% Test: y_reg to x_reg (high index) @@ -2506,7 +2528,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << " 0: 6946 ldr r6, [r0, #20]\n" " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c\n" - " 4: 6547 str r7, [r0, #84] ; 0x54" + " 4: 6547 str r7, [r0, #84] ; 0x54\n" + " 6: e07b b.n 0x100" >>) end), %% Test: Large y_reg index (32) that exceeds str immediate offset limit @@ -2516,7 +2539,8 @@ move_to_vm_register_test_() -> " 2: 6947 ldr r7, [r0, #20]\n" " 4: 2580 movs r5, #128 ; 0x80\n" " 6: 443d add r5, r7\n" - " 8: 602e str r6, [r5, #0]" + " 8: 602e str r6, [r5, #0]\n" + " a: e079 b.n 0x100" >>) end), %% Test: Negative immediate to x_reg @@ -2524,7 +2548,8 @@ move_to_vm_register_test_() -> move_to_vm_register_test0(State0, -1, {x_reg, 0}, << " 0: 2701 movs r7, #1\n" " 2: 427f negs r7, r7\n" - " 4: 6187 str r7, [r0, #24]" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100" >>) end) ] @@ -2787,11 +2812,12 @@ move_to_native_register_test_() -> %% move_to_native_register/2: -256 (boundary case, should use literal pool) ?_test(begin {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), - Stream = ?BACKEND:stream(State1), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(r7, Reg), Dump = << " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" + " 2: e07d b.n 0x100\n" " 4: ff00 ffff vmaxnm.f32 , q8, " >>, ?assertEqual(dump_to_bin(Dump), Stream) @@ -2895,7 +2921,9 @@ move_to_native_register_test_() -> add_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:add(State0, Reg, Imm), - Stream = ?BACKEND:stream(State1), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). add_test_() -> @@ -2907,21 +2935,24 @@ add_test_() -> [ ?_test(begin add_test0(State0, r2, 2, << - " 0: 3202 adds r2, #2" + " 0: 3202 adds r2, #2\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin add_test0(State0, r2, 256, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 0100 lsls r0, r0, #4\n" + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 19d2 adds r2, r2, r7\n" + " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" - " 8: 19d2 adds r2, r2, r7" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" >>) end), ?_test(begin add_test0(State0, r2, r3, << - " 0: 18d2 adds r2, r2, r3" + " 0: 18d2 adds r2, r2, r3\n" + " 2: e07d b.n 0x100" >>) end) ] @@ -2929,7 +2960,9 @@ add_test_() -> sub_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:sub(State0, Reg, Imm), - Stream = ?BACKEND:stream(State1), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), ?assertEqual(dump_to_bin(Dump), Stream). sub_test_() -> @@ -2941,21 +2974,24 @@ sub_test_() -> [ ?_test(begin sub_test0(State0, r2, 2, << - " 0: 3a02 subs r2, #2" + " 0: 3a02 subs r2, #2\n" + " 2: e07d b.n 0x100" >>) end), ?_test(begin sub_test0(State0, r2, 256, << - " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" - " 2: e001 b.n 0x8\n" - " 4: 0100 lsls r0, r0, #4\n" + " 0: 4f01 ldr r7, [pc, #4] @ (0xc)\n" + " 2: 1bd2 subs r2, r2, r7\n" + " 4: e07c b.n 0x100\n" " 6: 0000 movs r0, r0\n" - " 8: 1bd2 subs r2, r2, r7" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" >>) end), ?_test(begin sub_test0(State0, r2, r3, << - " 0: 1ad2 subs r2, r2, r3" + " 0: 1ad2 subs r2, r2, r3\n" + " 2: e07d b.n 0x110" >>) end) ] @@ -3256,23 +3292,36 @@ alloc_boxed_integer_fragment_large_test() -> {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ ctx, {avm_int64_t, 16#123456789ABCDEF0} ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), ?assertEqual(r7, ResultReg), - Stream = ?BACKEND:stream(State1), + Stream = ?BACKEND:stream(State2), Dump = << - " 0: 6bd7 ldr r7, [r2, #60] ; 0x3c\n" + " 0: 6bd7 ldr r7, [r2, #60] @ 0x3c\n" " 2: b405 push {r0, r2}\n" - " 4: 4a00 ldr r2, [pc, #0] ; (0x8)\n" - " 6: e001 b.n 0xc\n" - " 8: def0 udf #240 ; 0xf0\n" - " a: 9abc ldr r2, [sp, #752] ; 0x2f0\n" - " c: 4b00 ldr r3, [pc, #0] ; (0x10)\n" - " e: e001 b.n 0x14\n" - " 10: 5678 ldrsb r0, [r7, r1]\n" - " 12: 1234 asrs r4, r6, #8\n" - " 14: 47b8 blx r7\n" - " 16: 4607 mov r7, r0\n" - " 18: bc05 pop {r0, r2}" + " 4: 4a06 ldr r2, [pc, #24] @ (0x20)\n" + " 6: 4b07 ldr r3, [pc, #28] @ (0x24)\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: 6cd6 ldr r6, [r2, #76] @ 0x4c\n" + " 10: b082 sub sp, #8\n" + " 12: 9700 str r7, [sp, #0]\n" + " 14: 9902 ldr r1, [sp, #8]\n" + " 16: 2210 movs r2, #16\n" + " 18: 4b03 ldr r3, [pc, #12] @ (0x28)\n" + " 1a: 47b0 blx r6\n" + " 1c: b002 add sp, #8\n" + " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 20: def0 udf #240 @ 0xf0\n" + " 22: 9abc ldr r2, [sp, #752] @ 0x2f0\n" + " 24: 5678 ldrsb r0, [r7, r1]\n" + " 26: 1234 asrs r4, r6, #8\n" + " 28: 028b lsls r3, r1, #10\n" + " 2a: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). From cf8a7328d84d8ec7ef7ef921c55c21114b4acae0 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 17 Oct 2025 22:05:16 +0200 Subject: [PATCH 04/28] JIT: Add flushing of stream and backend Signed-off-by: Paul Guyot --- libs/estdlib/src/code_server.erl | 12 +++---- libs/jit/src/jit.erl | 7 ++-- libs/jit/src/jit_aarch64.erl | 11 ++++++ libs/jit/src/jit_armv6m.erl | 11 ++++++ libs/jit/src/jit_x86_64.erl | 11 ++++++ .../generic_unix/lib/jit_stream_mmap.c | 35 ++++++++++++++++--- 6 files changed, 73 insertions(+), 14 deletions(-) diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index 69aa359327..eee061efba 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -164,8 +164,8 @@ load(Module) -> code_server:literal_resolver(Module, Index) end, TypeResolver = fun(Index) -> code_server:type_resolver(Module, Index) end, - Stream0 = jit:stream(jit_mmap_size(byte_size(Code))), - {BackendModule, BackendState0} = jit:backend(Stream0), + {StreamModule, Stream0} = jit:stream(jit_mmap_size(byte_size(Code))), + {BackendModule, BackendState0} = jit:backend(StreamModule, Stream0), {LabelsCount, BackendState1} = jit:compile( Code, AtomResolver, @@ -174,12 +174,12 @@ load(Module) -> BackendModule, BackendState0 ), - BackendState2 = BackendModule:flush(BackendState1), - Stream1 = BackendModule:stream(BackendState2), - code_server:set_native_code(Module, LabelsCount, Stream1), + Stream1 = BackendModule:stream(BackendState1), + Stream2 = StreamModule:flush(Stream1), + code_server:set_native_code(Module, LabelsCount, Stream2), End = erlang:system_time(millisecond), io:format("~B ms (bytecode: ~B bytes, native code: ~B bytes)\n", [ - End - Start, byte_size(Code), BackendModule:offset(BackendState2) + End - Start, byte_size(Code), BackendModule:offset(BackendState1) ]) catch T:V:S -> diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index f4b57768bb..823a6b9f97 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -22,7 +22,7 @@ -export([ stream/1, - backend/1, + backend/2, beam_chunk_header/3, compile/6 ]). @@ -148,7 +148,8 @@ compile( }, {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), MSt3 = second_pass(MMod, MSt2, State1), - {LabelsCount, MSt3}; + MSt4 = MMod:flush(MSt3), + {LabelsCount, MSt4}; compile( <<16:32, 0:32, OpcodeMax:32, _LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>>, _AtomResolver, @@ -3852,7 +3853,7 @@ variant() -> %% @doc Instantiate backend for this platform %% @return A tuple with the backend module and the backend state for this platform -backend({StreamModule, Stream}) -> +backend(StreamModule, Stream) -> BackendModule = ?MODULE:backend_module(), Variant = ?MODULE:variant(), BackendState = BackendModule:new(Variant, StreamModule, Stream), diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 48f4202bee..fefa004d6d 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -25,6 +25,7 @@ new/3, stream/1, offset/1, + flush/1, debugger/1, used_regs/1, available_regs/1, @@ -259,6 +260,16 @@ stream(#state{stream = Stream}) -> offset(#state{stream_module = StreamModule, stream = Stream}) -> StreamModule:offset(Stream). +%%----------------------------------------------------------------------------- +%% @doc Flush the current state (unused on aarch64) +%% @end +%% @param State current backend state +%% @return The flushed state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> state(). +flush(#state{} = State) -> + State. + %%----------------------------------------------------------------------------- %% @doc Emit a debugger of breakpoint instruction. This is used for debugging %% and not in production. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 9e602c57d8..8429f8cfbd 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -25,6 +25,7 @@ new/3, stream/1, offset/1, + flush/1, debugger/1, used_regs/1, available_regs/1, @@ -273,6 +274,16 @@ stream(#state{stream = Stream}) -> offset(#state{stream_module = StreamModule, stream = Stream}) -> StreamModule:offset(Stream). +%%----------------------------------------------------------------------------- +%% @doc Flush the current state, e.g. literal pools +%% @end +%% @param State current backend state +%% @return The flushed state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> state(). +flush(#state{} = State) -> + flush_literal_pool(State). + %%----------------------------------------------------------------------------- %% @doc Emit a debugger of breakpoint instruction. This is used for debugging %% and not in production. diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index cb3e5ae8a3..0c722952b3 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -25,6 +25,7 @@ new/3, stream/1, offset/1, + flush/1, debugger/1, used_regs/1, available_regs/1, @@ -244,6 +245,16 @@ stream(#state{stream = Stream}) -> offset(#state{stream_module = StreamModule, stream = Stream}) -> StreamModule:offset(Stream). +%%----------------------------------------------------------------------------- +%% @doc Flush the current state (unused on x86-64) +%% @end +%% @param State current backend state +%% @return The flushed state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> state(). +flush(#state{} = State) -> + State. + %%----------------------------------------------------------------------------- %% @doc Emit a debugger of breakpoint instruction. This is used for debugging %% and not in production. diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c index f246a9791d..096cfe7faf 100644 --- a/src/platforms/generic_unix/lib/jit_stream_mmap.c +++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c @@ -193,6 +193,28 @@ static term nif_jit_stream_mmap_read(Context *ctx, int argc, term argv[]) return term_from_literal_binary(js_obj->stream_base + offset, len, &ctx->heap, ctx->global); } +static term nif_jit_stream_mmap_flush(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_mmap_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamMMap *js_obj = (struct JITStreamMMap *) js_obj_ptr; + if (IS_NULL_PTR(js_obj->stream_base)) { + RAISE_ERROR(BADARG_ATOM); + } + +#if defined(__APPLE__) + sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size); +#elif defined(__GNUC__) + __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size); +#endif + + return argv[0]; +} + static term nif_jit_stream_module(Context *ctx, int argc, term argv[]) { UNUSED(argc); @@ -226,6 +248,10 @@ static const struct Nif jit_stream_mmap_read_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_jit_stream_mmap_read }; +static const struct Nif jit_stream_mmap_flush_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_mmap_flush +}; ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) { @@ -239,11 +265,6 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) return NULL; } -#if defined(__APPLE__) - sys_icache_invalidate(js_obj->stream_base, js_obj->stream_size); -#elif defined(__GNUC__) - __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size); -#endif #if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M // Set thumb bit for armv6m ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base + 1; @@ -251,6 +272,7 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base; #endif + // Prevent module from being unmapped by dtor js_obj->stream_base = NULL; return result; } @@ -291,6 +313,9 @@ const struct Nif *jit_stream_mmap_get_nif(const char *nifname) if (strcmp("read/3", rest) == 0) { return &jit_stream_mmap_read_nif; } + if (strcmp("flush/1", rest) == 0) { + return &jit_stream_mmap_flush_nif; + } } return NULL; } From 32d03861a11d1138b7f797c7621316f3e6889a57 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 17 Oct 2025 22:07:53 +0200 Subject: [PATCH 05/28] armv6m: fix all replacements placeholders to FF This is required as flash chips are programmed by setting bits from 1 to 0, and erased by setting all bits to 0, so placeholders set to FF can be replaced without erasing a whole block. Signed-off-by: Paul Guyot --- libs/jit/src/jit_armv6m.erl | 190 +++++++++++++--------------- tests/libs/jit/jit_armv6m_tests.erl | 58 ++++----- 2 files changed, 113 insertions(+), 135 deletions(-) diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 8429f8cfbd..040b0a668b 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -78,7 +78,8 @@ -include("primitives.hrl"). -include("term.hrl"). --define(ASSERT(Expr), true = Expr). +%-define(ASSERT(Expr), true = Expr). +-define(ASSERT(Expr), ok). %% ARMv6-M AAPCS32 ABI: r0-r3 are used for argument passing and return value. %% r0-r1 form a double-word for 64-bit returns, additional args passed on stack. @@ -395,7 +396,7 @@ jump_table0( I3 = jit_armv6m_asm:add(pc, r3), I4 = jit_armv6m_asm:nop(), - JumpEntry = <>, + JumpEntry = <>, Stream1 = StreamModule:append(Stream0, JumpEntry), % Add relocation for the data entry so update_branches/2 can patch the jump target @@ -850,7 +851,7 @@ branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> branch_to_label_code( #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false ) -> - {CodeBlock, SequenceSize} = + SequenceSize = if Offset rem 4 =:= 0 -> % Aligned @@ -862,7 +863,7 @@ branch_to_label_code( % Placeholder offset I5 = <<0:32/little>>, Seq = <>, - {Seq, byte_size(Seq)}; + byte_size(Seq); true -> % Unaligned I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), @@ -871,16 +872,17 @@ branch_to_label_code( % Placeholder offset I4 = <<0:32/little>>, Seq = <>, - {Seq, byte_size(Seq)} + byte_size(Seq) end, % Add relocation entry + CodeBlock = binary:copy(<<16#FF>>, SequenceSize), Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code( #state{available_regs = [], branches = Branches} = State0, Offset, Label, false ) -> - {CodeBlock, SequenceSize} = + SequenceSize = if Offset rem 4 =/= 0 -> % Unaligned @@ -898,7 +900,7 @@ branch_to_label_code( Seq = <>, - {Seq, byte_size(Seq)}; + byte_size(Seq); true -> % Aligned I1 = jit_armv6m_asm:push([r0]), @@ -912,9 +914,10 @@ branch_to_label_code( I7 = <<0:32/little>>, Seq = <>, - {Seq, byte_size(Seq)} + byte_size(Seq) end, % Add relocation entry + CodeBlock = binary:copy(<<16#FF>>, SequenceSize), Reloc = {Label, Offset, {far_branch, SequenceSize, ?IP_REG}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; @@ -998,7 +1001,8 @@ if_else_block( Stream2 = State2#state.stream, %% Emit unconditional branch to skip the else block (will be replaced) ElseJumpOffset = StreamModule:offset(Stream2), - ElseJumpInstr = jit_armv6m_asm:b(0), + ?ASSERT(byte_size(jit_armv6m_asm:b(0)) =:= 2), + ElseJumpInstr = <<16#FFFF:16>>, Stream3 = StreamModule:append(Stream2, ElseJumpInstr), %% Else block starts here. OffsetAfter = StreamModule:offset(Stream3), @@ -1031,24 +1035,22 @@ if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, { %% Compare register with 0 I1 = jit_armv6m_asm:cmp(Reg, 0), %% Branch if positive (N flag clear) - I2 = jit_armv6m_asm:bcc(pl, 0), - Stream1 = StreamModule:append(Stream0, <>), + CC = pl, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(pl, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = State0#state{stream = Stream1}, - {State1, pl, byte_size(I1)}; + {State1, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', Val} ) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> I1 = jit_armv6m_asm:cmp(Reg, Val), % ge = greater than or equal - I2 = jit_armv6m_asm:bcc(ge, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = ge, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = State0#state{stream = Stream1}, - {State1, ge, byte_size(I1)}; + {State1, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, {Reg, '<', Val} @@ -1057,14 +1059,11 @@ if_block_cond( Stream0 = State1#state.stream, I1 = jit_armv6m_asm:cmp(Reg, Temp), % ge = greater than or equal - I2 = jit_armv6m_asm:bcc(ge, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = ge, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State2 = State1#state{stream = Stream1}, - {State2, ge, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '<', RegB} @@ -1076,15 +1075,12 @@ if_block_cond( end, I1 = jit_armv6m_asm:cmp(Reg, RegB), % ge = greater than or equal - I2 = jit_armv6m_asm:bcc(ge, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = ge, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ge, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} ) -> @@ -1096,11 +1092,12 @@ if_block_cond( %% Compare register with 0 I1 = jit_armv6m_asm:cmp(Reg, 0), %% Branch if not equal - I2 = jit_armv6m_asm:bcc(ne, 0), - Stream1 = StreamModule:append(Stream0, <>), + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ne, byte_size(I1)}; + {State2, CC, byte_size(I1)}; %% Delegate (int) forms to regular forms since we only have 32-bit words if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> if_block_cond(State, {RegOrTuple, '==', 0}); @@ -1116,15 +1113,12 @@ if_block_cond( RegOrTuple -> RegOrTuple end, I1 = jit_armv6m_asm:cmp(Reg, Val), - I2 = jit_armv6m_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, eq, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> if_block_cond(State, {RegOrTuple, '!=', Val}); if_block_cond( @@ -1137,28 +1131,25 @@ if_block_cond( RegOrTuple -> RegOrTuple end, I1 = jit_armv6m_asm:cmp(Reg, Val), - I2 = jit_armv6m_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream1 = StreamModule:append(Stream0, Code), + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, ne, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {{free, RegA}, '==', {free, RegB}} ) -> % Compare two free registers: cmp RegA, RegB; beq I1 = jit_armv6m_asm:cmp(RegA, RegB), - Stream1 = StreamModule:append(Stream0, I1), - I2 = jit_armv6m_asm:bcc(ne, 0), - Stream2 = StreamModule:append(Stream1, I2), - State1 = State0#state{stream = Stream2}, + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, State2 = if_block_free_reg({free, RegA}, State1), State3 = if_block_free_reg({free, RegB}, State2), - {State3, ne, byte_size(I1)}; + {State3, CC, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, {RegOrTuple, '==', Val} @@ -1173,15 +1164,12 @@ if_block_cond( Stream1 = State1#state.stream, Offset1 = StreamModule:offset(Stream1), I1 = jit_armv6m_asm:cmp(Reg, Temp), - I2 = jit_armv6m_asm:bcc(ne, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream2 = StreamModule:append(Stream1, Code), + CC = ne, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream2 = StreamModule:append(Stream1, <>), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, - {State3, ne, Offset1 - Offset0 + byte_size(I1)}; + {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, {RegOrTuple, '!=', Val} @@ -1196,15 +1184,12 @@ if_block_cond( Stream1 = State1#state.stream, Offset1 = StreamModule:offset(Stream1), I1 = jit_armv6m_asm:cmp(Reg, Temp), - I2 = jit_armv6m_asm:bcc(eq, 0), - Code = << - I1/binary, - I2/binary - >>, - Stream2 = StreamModule:append(Stream1, Code), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream2 = StreamModule:append(Stream1, <>), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, - {State3, eq, Offset1 - Offset0 + byte_size(I1)}; + {State3, CC, Offset1 - Offset0 + byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1221,12 +1206,12 @@ if_block_cond( % Test bit 0: shift bit 0 to MSB and branch if positive (bit was 0/false) I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), % branch if negative (bit was 1/true) - I2 = jit_armv6m_asm:bcc(mi, 0), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), + CC = mi, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, mi, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1243,12 +1228,12 @@ if_block_cond( % Test bit 0: shift bit 0 to MSB and branch if negative (bit was 1/true) I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), % branch if positive (bit was 0/false) - I2 = jit_armv6m_asm:bcc(pl, 0), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), + CC = pl, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, - {State2, pl, byte_size(I1)}; + {State2, CC, byte_size(I1)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1277,8 +1262,8 @@ if_block_cond( TestCode1 = jit_armv6m_asm:tst(Reg, Temp), {<>, eq} end, - I2 = jit_armv6m_asm:bcc(BranchCond, 0), - Code = <>, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(BranchCond, 0)) =:= 2), + Code = <>, Stream1 = StreamModule:append(Stream0, Code), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, @@ -1295,10 +1280,11 @@ if_block_cond( I1 = jit_armv6m_asm:mvns(Temp, Reg), % 32 - 4 I2 = jit_armv6m_asm:lsls(Temp, Temp, 28), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream1 = StreamModule:append(Stream0, <>), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = State0#state{stream = Stream1}, - {State1, eq, byte_size(I1) + byte_size(I2)}; + {State1, CC, byte_size(I1) + byte_size(I2)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1310,11 +1296,12 @@ if_block_cond( I1 = jit_armv6m_asm:mvns(Reg, Reg), % 32 - 4 I2 = jit_armv6m_asm:lsls(Reg, Reg, 28), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream1 = StreamModule:append(Stream0, <>), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream1 = StreamModule:append(Stream0, <>), State1 = State0#state{stream = Stream1}, State2 = if_block_free_reg(RegTuple, State1), - {State2, eq, byte_size(I1) + byte_size(I2)}; + {State2, CC, byte_size(I1) + byte_size(I2)}; if_block_cond( #state{ stream_module = StreamModule, @@ -1334,10 +1321,11 @@ if_block_cond( I2 = jit_armv6m_asm:cmp(Temp, Val), Stream3 = StreamModule:append(Stream2, I2), OffsetAfter = StreamModule:offset(Stream3), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream4 = StreamModule:append(Stream3, I3), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream4 = StreamModule:append(Stream3, <<16#FFFF:16>>), State3 = State2#state{stream = Stream4, available_regs = [Temp | State2#state.available_regs]}, - {State3, eq, OffsetAfter - OffsetBefore}; + {State3, CC, OffsetAfter - OffsetBefore}; if_block_cond( #state{ stream_module = StreamModule, @@ -1353,11 +1341,12 @@ if_block_cond( I2 = jit_armv6m_asm:cmp(Reg, Val), Stream2 = StreamModule:append(Stream1, I2), OffsetAfter = StreamModule:offset(Stream2), - I3 = jit_armv6m_asm:bcc(eq, 0), - Stream3 = StreamModule:append(Stream2, I3), + CC = eq, + ?ASSERT(byte_size(jit_armv6m_asm:bcc(CC, 0)) =:= 2), + Stream3 = StreamModule:append(Stream2, <<16#FFFF:16>>), State3 = State1#state{stream = Stream3}, State4 = if_block_free_reg(RegTuple, State3), - {State4, eq, OffsetAfter - OffsetBefore}. + {State4, CC, OffsetAfter - OffsetBefore}. -spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state(). if_block_free_reg({free, Reg}, State0) -> @@ -1812,7 +1801,7 @@ set_registers_args0( set_registers_args0( State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset ) -> - false = lists:member(?CTX_REG, ArgsRegs), + ?ASSERT(not lists:member(?CTX_REG, ArgsRegs)), State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); set_registers_args0( @@ -2477,7 +2466,8 @@ set_continuation_to_offset( ) -> OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), - I1 = jit_armv6m_asm:adr(Temp, 4), + ?ASSERT(byte_size(jit_armv6m_asm:adr(Temp, 4)) =:= 2), + I1 = <<16#FFFF:16>>, Reloc = {OffsetRef, Offset, {adr, Temp}}, % Set thumb bit (LSB = 1) by adding 1 to the 4-byte aligned address I2 = jit_armv6m_asm:adds(Temp, Temp, 1), @@ -2652,8 +2642,8 @@ mov_immediate( #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State, Reg, Val ) -> LdrInstructionAddr = StreamModule:offset(Stream0), - I1 = jit_armv6m_asm:ldr(Reg, {pc, 0}), - Stream1 = StreamModule:append(Stream0, <>), + ?ASSERT(byte_size(jit_armv6m_asm:ldr(Reg, {pc, 0})) =:= 2), + Stream1 = StreamModule:append(Stream0, <<16#FFFF:16>>), State#state{stream = Stream1, literal_pool = [{LdrInstructionAddr, Reg, Val} | LP]}. flush_literal_pool(#state{literal_pool = []} = State) -> @@ -2792,10 +2782,12 @@ decrement_reductions_and_maybe_schedule_next( Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), % Branch if reduction count is not zero - I4 = jit_armv6m_asm:bcc(ne, 0), + ?ASSERT(byte_size(jit_armv6m_asm:bcc(ne, 0)) =:= 2), + I4 = <<16#FFFF:16>>, % Set continuation to the next instruction ADROffset = BNEOffset + byte_size(I4), - I5 = jit_armv6m_asm:adr(Temp, 4), + ?ASSERT(byte_size(jit_armv6m_asm:adr(Temp, 4) =:= 2)), + I5 = <<16#FFFF:16>>, I6 = jit_armv6m_asm:adds(Temp, Temp, 1), I7 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), % Append the instructions to the stream @@ -2928,8 +2920,8 @@ set_cp(State0) -> Offset = StreamModule:offset(Stream0), % build cp with module_index << 24 I1 = jit_armv6m_asm:lsls(Reg, Reg, 24), - % Emit a single nop as placeholder for offset load instruction - I2 = jit_armv6m_asm:nop(), + % Placeholder for offset load instruction + I2 = <<16#FFFF:16>>, MOVOffset = Offset + byte_size(I1), % OR the module index with the offset (loaded in temp register) I3 = jit_armv6m_asm:orrs(Reg, TempReg), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 4f78d06163..10b577cc74 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -32,7 +32,7 @@ -define(BACKEND, jit_armv6m). % disassembly obtained with: -% arm-elf-objdump -b binary -D dump.bin -M arm +% arm-elf-objdump -D -b binary -marm --disassembler-options=force-thumb -z call_primitive_0_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -542,7 +542,7 @@ if_block_test_() -> " 2: 69c6 ldr r6, [r0, #28]\n" " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" " 6: da01 bge.n 0xc\n" - " 8: dafe bge.n 0x8\n" + " 8: ffff ; to be rewritten\n" " a: 3602 adds r6, #2\n" " c: e078 b.n 0x100\n" " e: 0000 movs r0, r0\n" @@ -1925,61 +1925,47 @@ wait_timeout_test() -> wait_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - State1 = ?BACKEND:jump_table(State0, 5), + State1 = ?BACKEND:jump_table(State0, 2), State2 = ?BACKEND:add_label(State1, 1), Label = 2, State3 = ?BACKEND:set_continuation_to_label(State2, Label), State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, 2), + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:update_branches(State6), - Stream = ?BACKEND:stream(State4), + Stream = ?BACKEND:stream(State7), Dump = << " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 4: 449f add pc, r3\n" " 6: 46c0 nop ; (mov r8, r8)\n" - " 8: 0000 movs r0, r0\n" + " 8: 0034 movs r4, r6\n" " a: 0000 movs r0, r0\n" " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 10: 449f add pc, r3\n" " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0000 movs r0, r0\n" + " 14: 0010 movs r0, r2\n" " 16: 0000 movs r0, r0\n" " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" " 1c: 449f add pc, r3\n" " 1e: 46c0 nop ; (mov r8, r8)\n" - " 20: 0000 movs r0, r0\n" + " 20: 001c movs r4, r3\n" " 22: 0000 movs r0, r0\n" - " 24: 4b01 ldr r3, [pc, #4] ; (0x2c)\n" - " 26: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 28: 449f add pc, r3\n" - " 2a: 46c0 nop ; (mov r8, r8)\n" - " 2c: 0000 movs r0, r0\n" - " 2e: 0000 movs r0, r0\n" - " 30: 4b01 ldr r3, [pc, #4] ; (0x38)\n" - " 32: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 34: 449f add pc, r3\n" - " 36: 46c0 nop ; (mov r8, r8)\n" - " 38: 0000 movs r0, r0\n" - " 3a: 0000 movs r0, r0\n" - " 3c: 4b01 ldr r3, [pc, #4] ; (0x44)\n" - " 3e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" - " 40: 449f add pc, r3\n" - " 42: 46c0 nop ; (mov r8, r8)\n" - " 44: 0000 movs r0, r0\n" - " 46: 0000 movs r0, r0\n" - " 48: a700 add r7, pc, #0 ; (adr r7, 0x4c)\n" - " 4a: 2633 movs r6, #51 ; 0x33\n" - " 4c: 4276 negs r6, r6\n" - " 4e: 19f6 adds r6, r6, r7\n" - " 50: 9f00 ldr r7, [sp, #0]\n" - " 52: 607e str r6, [r7, #4]\n" - " 54: 6f57 ldr r7, [r2, #116] ; 0x74\n" - " 56: 9e05 ldr r6, [sp, #20]\n" - " 58: 9705 str r7, [sp, #20]\n" - " 5a: 46b6 mov lr, r6\n" - " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}" + " 24: a700 add r7, pc, #0 ; (adr r7, 0x28)\n" + " 26: 260f movs r6, #15\n" + " 28: 4276 negs r6, r6\n" + " 2a: 19f6 adds r6, r6, r7\n" + " 2c: 9f00 ldr r7, [sp, #0]\n" + " 2e: 607e str r6, [r7, #4]\n" + " 30: 6f57 ldr r7, [r2, #116] ; 0x74\n" + " 32: 9e05 ldr r6, [sp, #20]\n" + " 34: 9705 str r7, [sp, #20]\n" + " 36: 46b6 mov lr, r6\n" + " 38: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3a: 46c0 nop ; (mov r8, r8)" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 53c6a92351e08718cad9c27cfd53fdbd0b3a1221 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 17 Oct 2025 22:18:08 +0200 Subject: [PATCH 06/28] Update avmpack API to return end section Signed-off-by: Paul Guyot --- src/libAtomVM/avmpack.c | 5 +++-- src/libAtomVM/avmpack.h | 4 +++- src/libAtomVM/nifs.c | 2 +- src/platforms/emscripten/src/main.c | 2 +- src/platforms/esp32/main/main.c | 2 +- src/platforms/generic_unix/main.c | 2 +- src/platforms/rp2/src/main.c | 5 ++++- src/platforms/stm32/src/main.c | 2 +- 8 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/libAtomVM/avmpack.c b/src/libAtomVM/avmpack.c index 075dc94966..624fa82493 100644 --- a/src/libAtomVM/avmpack.c +++ b/src/libAtomVM/avmpack.c @@ -35,6 +35,7 @@ static inline int pad(int size) bool avmpack_is_valid(const void *avmpack_binary, uint32_t size) { + // "#!/usr/bin/env AtomVM" const unsigned char pack_header[AVMPACK_SIZE] = { 0x23, 0x21, 0x2f, 0x75, 0x73, 0x72, 0x2f, 0x62, @@ -51,7 +52,7 @@ bool avmpack_is_valid(const void *avmpack_binary, uint32_t size) return memcmp(avmpack_binary, pack_header, AVMPACK_SIZE) == 0; } -int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, const void **ptr, uint32_t *size, const char **name) +int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, uint32_t flags_val, const void **ptr, uint32_t *size, const char **name) { int offset = AVMPACK_SIZE; const uint32_t *flags; @@ -60,7 +61,7 @@ int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask const uint32_t *sizes = ((const uint32_t *) (avmpack_binary)) + offset / sizeof(uint32_t); flags = ((const uint32_t *) (avmpack_binary)) + 1 + offset / sizeof(uint32_t); - if ((ENDIAN_SWAP_32(*flags) & flags_mask) == flags_mask) { + if ((ENDIAN_SWAP_32(*flags) & flags_mask) == flags_val) { const char *found_section_name = (const char *) (sizes + 3); int section_name_len = pad(strlen(found_section_name) + 1); diff --git a/src/libAtomVM/avmpack.h b/src/libAtomVM/avmpack.h index 85ce9562d9..a68e6e3378 100644 --- a/src/libAtomVM/avmpack.h +++ b/src/libAtomVM/avmpack.h @@ -39,6 +39,7 @@ extern "C" { #define END_OF_FILE 0 #define BEAM_START_FLAG 1 #define BEAM_CODE_FLAG 2 +#define END_OF_FILE_MASK 255 struct AVMPackData; @@ -104,12 +105,13 @@ typedef void *(*avmpack_fold_fun)(void *accum, const void *section_ptr, uint32_t * @details Finds an AVM Pack section that has certain flags set and returns a pointer to it, its size and its name. * @param avmpack_binary a pointer to valid AVM Pack file data. * @param flags_mask that will be matched against file sections. + * @param flags_value that will be matched against file sections. * @param ptr will point to the found file section. * @param size will be set to the file section size that has been found, if the section has not been found it will not be updated. * @param name the section name, as defined in the module header. * @returns 1 if the file section has been found, 0 otherwise. */ -int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, const void **ptr, uint32_t *size, const char **name); +int avmpack_find_section_by_flag(const void *avmpack_binary, uint32_t flags_mask, uint32_t flags_value, const void **ptr, uint32_t *size, const char **name); /** * @brief Finds an AVM Pack section that has certain name. diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index e3d76eba41..328216b1c3 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -4806,7 +4806,7 @@ static term nif_atomvm_get_start_beam(Context *ctx, int argc, term argv[]) uint32_t size; const void *beam; const char *module_name; - if (!avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, &beam, &size, &module_name)) { + if (!avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &beam, &size, &module_name)) { synclist_unlock(&ctx->global->avmpack_data); if (UNLIKELY(memory_ensure_free(ctx, TUPLE_SIZE(2)) != MEMORY_GC_OK)) { RAISE_ERROR(OUT_OF_MEMORY_ATOM); diff --git a/src/platforms/emscripten/src/main.c b/src/platforms/emscripten/src/main.c index 27e02c3a67..c8c00dc07a 100644 --- a/src/platforms/emscripten/src/main.c +++ b/src/platforms/emscripten/src/main.c @@ -59,7 +59,7 @@ static int load_module(const char *path) const void *startup_beam = NULL; uint32_t startup_beam_size; const char *startup_module_name; - avmpack_find_section_by_flag(avmpack_data->data, 1, &startup_beam, &startup_beam_size, &startup_module_name); + avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name); if (startup_beam) { avmpack_data->in_use = true; main_module = module_new_from_iff_binary(global, startup_beam, startup_beam_size); diff --git a/src/platforms/esp32/main/main.c b/src/platforms/esp32/main/main.c index bc25c82c64..ca6d02e2f3 100644 --- a/src/platforms/esp32/main/main.c +++ b/src/platforms/esp32/main/main.c @@ -98,7 +98,7 @@ void app_main() ESP_LOGE(TAG, "Invalid startup avmpack. size=%u", size); AVM_ABORT(); } - if (!avmpack_find_section_by_flag(startup_avm, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { + if (!avmpack_find_section_by_flag(startup_avm, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { ESP_LOGE(TAG, "Error: Failed to locate start module in startup partition. (Did you flash a library by mistake?)"); AVM_ABORT(); } diff --git a/src/platforms/generic_unix/main.c b/src/platforms/generic_unix/main.c index f45fd7f14f..6908aa50f8 100644 --- a/src/platforms/generic_unix/main.c +++ b/src/platforms/generic_unix/main.c @@ -104,7 +104,7 @@ int main(int argc, char **argv) const void *startup_beam = NULL; const char *startup_module_name; uint32_t startup_beam_size; - avmpack_find_section_by_flag(avmpack_data->data, 1, &startup_beam, &startup_beam_size, &startup_module_name); + avmpack_find_section_by_flag(avmpack_data->data, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name); if (startup_beam) { avmpack_data->in_use = true; diff --git a/src/platforms/rp2/src/main.c b/src/platforms/rp2/src/main.c index e25e1398d0..0733c85e63 100644 --- a/src/platforms/rp2/src/main.c +++ b/src/platforms/rp2/src/main.c @@ -87,9 +87,12 @@ static int app_main() if (!avmpack_is_valid(MAIN_AVM, XIP_SRAM_BASE - (uintptr_t) MAIN_AVM)) { sleep_ms(5000); fprintf(stderr, "Fatal error: invalid main.avm packbeam\n"); + if (avmpack_is_valid(LIB_AVM, (uintptr_t) MAIN_AVM - (uintptr_t) LIB_AVM)) { + fprintf(stderr, "Lib avm packbeam is valid, though\n"); + } AVM_ABORT(); } - if (!avmpack_find_section_by_flag(MAIN_AVM, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { + if (!avmpack_find_section_by_flag(MAIN_AVM, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { sleep_ms(5000); fprintf(stderr, "Fatal error: Failed to locate start module in main.avm packbeam. (Did you flash a library by mistake?)"); AVM_ABORT(); diff --git a/src/platforms/stm32/src/main.c b/src/platforms/stm32/src/main.c index 7febe37717..689aa925f1 100644 --- a/src/platforms/stm32/src/main.c +++ b/src/platforms/stm32/src/main.c @@ -248,7 +248,7 @@ int main() port_driver_init_all(glb); nif_collection_init_all(glb); - if (!avmpack_is_valid(flashed_avm, size) || !avmpack_find_section_by_flag(flashed_avm, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { + if (!avmpack_is_valid(flashed_avm, size) || !avmpack_find_section_by_flag(flashed_avm, BEAM_START_FLAG, BEAM_START_FLAG, &startup_beam, &startup_beam_size, &startup_module_name)) { AVM_LOGE(TAG, "Invalid AVM Pack"); AVM_ABORT(); } From 480fa7f25bcb3866836c7741055cfdfbbd0b20f9 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 17 Oct 2025 22:21:03 +0200 Subject: [PATCH 07/28] pico: add support for flash-based JIT stream Introduce jit_stream_flash.c common implementation that leverages (common) flash behavior that can be written from 1 to 0. Signed-off-by: Paul Guyot --- .github/workflows/build-and-test.yaml | 13 + doc/src/atomvm-internals.md | 23 +- src/libAtomVM/jit_stream_flash.c | 932 ++++++++++++++++++ src/libAtomVM/jit_stream_flash.h | 129 +++ src/libAtomVM/module.c | 7 + src/libAtomVM/nifs.c | 6 +- src/libAtomVM/sys.h | 27 + src/platforms/esp32/partitions.csv | 5 +- src/platforms/generic_unix/lib/sys.c | 20 + src/platforms/rp2/src/CMakeLists.txt | 4 + src/platforms/rp2/src/lib/CMakeLists.txt | 13 +- .../rp2/src/lib/jit_stream_flash_platform.c | 117 +++ .../rp2/src/lib/jit_stream_flash_platform.h | 40 + src/platforms/rp2/src/lib/smp.c | 3 + tests/CMakeLists.txt | 12 + .../jit_stream_flash_platform.h | 26 +- tests/test-jit_stream_flash.c | 858 ++++++++++++++++ 17 files changed, 2219 insertions(+), 16 deletions(-) create mode 100644 src/libAtomVM/jit_stream_flash.c create mode 100644 src/libAtomVM/jit_stream_flash.h create mode 100644 src/platforms/rp2/src/lib/jit_stream_flash_platform.c create mode 100644 src/platforms/rp2/src/lib/jit_stream_flash_platform.h rename src/platforms/rp2/src/lib/jit_stream_flash.c => tests/jit_stream_flash_platform.h (69%) create mode 100644 tests/test-jit_stream_flash.c diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 4c2c066994..610db683b1 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -526,6 +526,19 @@ jobs: ulimit -c unlimited ./tests/test-heap + - name: "Test: test-jit_stream_flash with valgrind" + if: matrix.library-arch == '' + working-directory: build + run: | + ulimit -c unlimited + valgrind --error-exitcode=1 ./tests/test-jit_stream_flash + + - name: "Test: test-jit_stream_flash" + working-directory: build + run: | + ulimit -c unlimited + ./tests/test-jit_stream_flash + - name: "Test: test-mailbox with valgrind" if: matrix.library-arch == '' working-directory: build diff --git a/doc/src/atomvm-internals.md b/doc/src/atomvm-internals.md index 60e2919b02..62ad888a71 100644 --- a/doc/src/atomvm-internals.md +++ b/doc/src/atomvm-internals.md @@ -137,7 +137,7 @@ Following BEAM, there are two flavors of the emulator: jit and emu, but eventual - Native: the VM only runs native code and all code must be precompiled on the desktop using the JIT compiler (which effectively is a AOT or Ahead-of-Time compiler). In this mode, it is not necessary to bundle the jit compiler on the embedded target. - Hybrid: the VM can run native code as well as emulated BEAM code and some code is precompiled on the desktop. -JIT is available on some platforms (currently only x86_64 and aarch64) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. +JIT is available on some platforms (currently only x86_64, aarch64 and armv6m) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. Modules can include precompiled code in a dedicated beam chunk with name 'avmN'. The chunk can contain native code for several architectures, however it may only contain native code for a given version of the native interface. Current version is 1. This native code is executed by the jit-flavor of the emulator as well as the emu flavor if execution of precompiled is enabled. @@ -158,6 +158,27 @@ A backend implementation is required for each architecture. The backend is calle A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Two implementations currently exist: `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop, and `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix. +### Embedded JIT and Native + +On embedded devices, Native mode means the code is precompiled on the desktop and executed natively on the device. This currently works on all ARMv6M devices (Pico and STM32). + +The default partition scheme on all platforms is optimized for the Emulated VM which is larger than the JIT or Native VM, and for the Emulated atomvmlib (with no native code for estdlib and no jit library) which is smaller than the JIT atomvmlib (that includes native code for estdlib and jit library). + +JIT mode means the Erlang bytecode is compiled to native code directly on the device. This actually is possible on Raspberry Pi Pico by using the flash to store the native code. The first time the code is executed, it is compiled and streamed to flash, and for next runs (including at a future boot), the native code is directly executed. + +To achive embedded JIT, it is required to flash the device with the JIT compiler for armv6m which is part of the jit library. This library is quite large, so for Pico boards that come with 2MB of flash, it is required to remove jit modules for other backends. It is also required to change the way code is partitioned. + +For example, it is possible to have the following offsets defined in `src/platforms/rp2/src/main.c`: + +``` +#define LIB_AVM ((void *) 0x10060000) +#define MAIN_AVM ((void *) 0x101B0000) +``` + +To fit in the lib partition, all networking modules should also be removed (the Pico doesn't have any networking capacity). + +After the first run, compiled modules in flash are used unless there is a version mismatch or the application avm or the library avm have been updated on the device. AVM packages end with a section called "end" (0x656E64). When the JIT compiler flashes native code, it changes this name to "END" (0x454E44), by effectively clearing 3 bits in the flash, which is possible without erasing any flash block. Any rewrite of these avm packages will overwrite the section names to "end". + ## The Scheduler In SMP builds, AtomVM runs one scheduler thread per core. Scheduler threads are actually started on demand. The number of scheduler threads can be queried with [`erlang:system_info/1`](./apidocs/erlang/estdlib/erlang.md#system_info1) and be modified with [`erlang:system_flag/2`](./apidocs/erlang/estdlib/erlang.md#system_flag2). All scheduler threads are considered equal and there is no notion of main thread except when shutting down (main thread is shut down last). diff --git a/src/libAtomVM/jit_stream_flash.c b/src/libAtomVM/jit_stream_flash.c new file mode 100644 index 0000000000..cbe4aaee1e --- /dev/null +++ b/src/libAtomVM/jit_stream_flash.c @@ -0,0 +1,932 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "jit_stream_flash.h" + +#include "avmpack.h" +#include "context.h" +#include "defaultatoms.h" +#include "erl_nif.h" +#include "erl_nif_priv.h" +#include "globalcontext.h" +#include "jit.h" +#include "module.h" +#include "nifs.h" +#include "platform_defaultatoms.h" +#include "sys.h" +#include "term.h" + +#include +#include +#include +#include + +// #define ENABLE_TRACE +#include "trace.h" + +#define JIT_ENTRY_MAGIC 0x4A74 + +#ifdef ENABLE_TRACE +// Simple CRC32 for verification +static uint32_t crc32(const uint8_t *data, size_t len) +{ + uint32_t crc = 0xFFFFFFFF; + for (size_t i = 0; i < len; i++) { + crc ^= data[i]; + for (int j = 0; j < 8; j++) { + crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1)); + } + } + return ~crc; +} +#endif + +/** + * @brief JIT entry header stored in flash + * + * Each compiled module has an entry with this header followed by the native code. + */ +struct JITEntry +{ + uint16_t magic; ///< Magic number (JIT_ENTRY_MAGIC) or 0xFFFF for free space + uint16_t version; ///< Module version + uint32_t code; ///< Pointer to original BEAM code (32-bit for flash storage) + uint32_t labels; ///< Number of labels + uint32_t size; ///< Size of native code in bytes +} __attribute__((packed)); + +_Static_assert(sizeof(struct JITEntry) == 16, "sizeof(struct JITEntry) must be 16"); + +/** + * @brief JIT stream flash state + * + * Maintains the state for writing JIT code to flash with page buffering. + */ +struct JITStreamFlash +{ + struct JITEntry *jit_entry; ///< Pointer to current JIT entry in flash + uintptr_t page_base_addr; ///< Base address of current page + uint8_t page_buffer[FLASH_PAGE_SIZE]; ///< Page buffer for writing + uint8_t page_offset; ///< Current offset within page + struct JSFlashPlatformContext *pf_ctx; ///< Platform-specific context +}; + +static ErlNifResourceType *jit_stream_flash_resource_type; +static void jit_stream_flash_dtor(ErlNifEnv *caller_env, void *obj); +static bool jit_stream_flash_replace_at_addr(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data, size_t len); + +const ErlNifResourceTypeInit jit_stream_flash_resource_type_init = { + .members = 1, + .dtor = jit_stream_flash_dtor +}; + +static struct JITEntry *jit_entry_next(struct JITEntry *jit_entry) +{ + uintptr_t next_entry_addr = ((uintptr_t) jit_entry) + sizeof(struct JITEntry) + jit_entry->size; + // Align to 4 bytes boundaries + next_entry_addr = (next_entry_addr + 3) & ~3; + + TRACE("jit_entry_next: jit_entry = %p, return %p\n", (void *) jit_entry, (void *) next_entry_addr); + + return (struct JITEntry *) next_entry_addr; +} + +/** + * @brief Check if a sector needs to be erased + * + * Scans the entire sector to check if it contains any non-0xFF bytes. + * Uses word-by-word comparison for efficiency since sectors are aligned. + * + * @param sector_addr Address of the sector (must be sector-aligned) + * @return true if sector needs erasing, false if already erased + */ +static bool jit_stream_flash_sector_needs_erase(uintptr_t sector_addr) +{ + const uintptr_t *sector_ptr = (const uintptr_t *) sector_addr; + const uintptr_t erased_pattern = ~((uintptr_t) 0); // All bits set to 1 (0xFF...FF) + size_t num_words = FLASH_SECTOR_SIZE / sizeof(uintptr_t); + + // Check if entire sector is all 0xFF by comparing word-by-word + for (size_t i = 0; i < num_words; i++) { + if (sector_ptr[i] != erased_pattern) { + return true; + } + } + + return false; +} + +static struct JITEntry *globalcontext_find_first_jit_entry(GlobalContext *global, bool *is_valid) +{ + const void *max_end_offset = NULL; + uint32_t end_size; + const void *end_offset; + const char *end_name; + bool valid_cache = true; + + struct ListHead *item; + struct ListHead *avmpack_data = synclist_rdlock(&global->avmpack_data); + LIST_FOR_EACH (item, avmpack_data) { + struct AVMPackData *avmpack_data = GET_LIST_ENTRY(item, struct AVMPackData, avmpack_head); + avmpack_find_section_by_flag(avmpack_data->data, END_OF_FILE_MASK, END_OF_FILE, &end_offset, &end_size, &end_name); + valid_cache = valid_cache && (strcmp(end_name, "END") == 0); + + if (end_offset > max_end_offset) { + max_end_offset = end_offset; + } + } + synclist_unlock(&global->avmpack_data); + + uintptr_t max_end_offset_page = ((((uintptr_t) max_end_offset) - 1) & ~(FLASH_SECTOR_SIZE - 1)); + *is_valid = valid_cache; + + TRACE("globalcontext_find_first_jit_entry: return %p\n", (void *) (max_end_offset_page + FLASH_SECTOR_SIZE)); + + return (struct JITEntry *) (max_end_offset_page + FLASH_SECTOR_SIZE); +} + +static void globalcontext_set_cache_valid(GlobalContext *global) +{ + TRACE("globalcontext_set_cache_valid\n"); + + uint32_t end_size; + const void *end_offset; + const char *end_name; + bool valid_cache; + + // Create platform context for flash operations + struct JSFlashPlatformContext *pf_ctx = jit_stream_flash_platform_init(); + if (IS_NULL_PTR(pf_ctx)) { + fprintf(stderr, "Failed to initialize platform flash context\n"); + return; + } + + do { + valid_cache = true; + struct ListHead *item; + struct ListHead *avmpack_data = synclist_rdlock(&global->avmpack_data); + LIST_FOR_EACH (item, avmpack_data) { + struct AVMPackData *avmpack_data = GET_LIST_ENTRY(item, struct AVMPackData, avmpack_head); + avmpack_find_section_by_flag(avmpack_data->data, END_OF_FILE_MASK, END_OF_FILE, &end_offset, &end_size, &end_name); + if (strcmp(end_name, "END")) { + valid_cache = false; + break; + } + } + synclist_unlock(&global->avmpack_data); + if (!valid_cache) { + // Replace "end" with "END" - this is a 3-byte string replacement + const uint8_t end_str[] = "END"; + if (!jit_stream_flash_replace_at_addr(pf_ctx, (uintptr_t) end_name, end_str, 3)) { + fprintf(stderr, "Failed to update cache validity marker from 'end' to 'END'\n"); + break; + } + } + } while (!valid_cache); + + jit_stream_flash_platform_destroy(pf_ctx); +} + +static struct JITEntry *globalcontext_find_last_jit_entry(GlobalContext *global) +{ + bool is_valid; + struct JITEntry *jit_entry = globalcontext_find_first_jit_entry(global, &is_valid); + if (!is_valid) { + TRACE("globalcontext_find_last_jit_entry, cache not valid, returning NULL\n"); + return NULL; + } + + // Find the last valid entry + struct JITEntry *last_valid = jit_entry; + while (jit_entry->magic == JIT_ENTRY_MAGIC) { + last_valid = jit_entry; + jit_entry = jit_entry_next(jit_entry); + } + TRACE("globalcontext_find_last_jit_entry, returning last valid entry at %p\n", (void *) last_valid); + return last_valid; +} + +static bool jit_stream_flash_flush_page(struct JITStreamFlash *js) +{ + // Write the page + // Note: sector is already erased by nif_jit_stream_flash_new (first sector) + // or jit_stream_flash_append (subsequent sectors when crossing boundaries) + if (!jit_stream_flash_platform_write_page(js->pf_ctx, js->page_base_addr, js->page_buffer)) { + fprintf(stderr, "Failed to write page at address 0x%lx\n", (unsigned long) js->page_base_addr); + return false; + } + + return true; +} + +static bool jit_stream_flash_finalize_entry(struct JSFlashPlatformContext *pf_ctx, struct JITEntry *jit_entry, uint16_t magic, uint16_t version, uint32_t code, uint32_t labels) +{ + uintptr_t entry_addr = (uintptr_t) jit_entry; + uint8_t page_buffer[FLASH_PAGE_SIZE]; + uintptr_t page_base_addr = entry_addr & ~(FLASH_PAGE_SIZE - 1); + + // Read current page contents + memcpy(page_buffer, (const uint8_t *) page_base_addr, FLASH_PAGE_SIZE); + + // Calculate offset within page + size_t entry_offset = entry_addr - page_base_addr; + struct JITEntry *updated_entry = (struct JITEntry *) (page_buffer + entry_offset); + + // Update fields + updated_entry->magic = magic; + updated_entry->version = version; + updated_entry->code = code; + updated_entry->labels = labels; + + // Write back to flash + if (!jit_stream_flash_platform_write_page(pf_ctx, page_base_addr, page_buffer)) { + fprintf(stderr, "Failed to finalize entry at address 0x%lx\n", (unsigned long) page_base_addr); + return false; + } + + return true; +} + +// Replace data in flash at the given absolute address +// Returns true on success, false if validation fails (trying to set bits 0→1) +static bool jit_stream_flash_replace_at_addr(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data, size_t len) +{ + uintptr_t replace_start = addr; + uintptr_t replace_end = replace_start + len; + + // Iterate over all pages that need to be updated + uintptr_t current_page_addr = replace_start & ~(FLASH_PAGE_SIZE - 1); + size_t data_offset = 0; + + while (current_page_addr < replace_end) { + // Calculate the range within this page that needs to be replaced + uintptr_t page_start_offset = 0; + uintptr_t page_end_offset = FLASH_PAGE_SIZE; + + if (current_page_addr < replace_start) { + page_start_offset = replace_start - current_page_addr; + } + + if (current_page_addr + FLASH_PAGE_SIZE > replace_end) { + page_end_offset = replace_end - current_page_addr; + } + + size_t copy_len = page_end_offset - page_start_offset; + + // Prepare page buffer + uint8_t page_buffer[FLASH_PAGE_SIZE]; + uintptr_t page_base_addr = current_page_addr; + + // Read current page contents + memcpy(page_buffer, (const uint8_t *) page_base_addr, FLASH_PAGE_SIZE); + + // Verify that we're only clearing bits (1→0), not setting them (0→1) + const uint8_t *flash_ptr = (const uint8_t *) page_base_addr; + for (size_t i = 0; i < copy_len; i++) { + uint8_t flash_byte = flash_ptr[page_start_offset + i]; + uint8_t new_byte = data[data_offset + i]; + + // Check if we're trying to set any bits (0→1) + if ((new_byte & ~flash_byte) != 0) { + fprintf(stderr, "\n=== FLASH REPLACE VALIDATION FAILED ===\n"); + fprintf(stderr, "Attempting to set bits (0→1) without erase!\n"); + fprintf(stderr, "Page address: 0x%lx\n", (unsigned long) page_base_addr); + fprintf(stderr, "Offset in page: %zu, flash byte: 0x%02hhx, new byte: 0x%02hhx\n", + page_start_offset + i, flash_byte, new_byte); + fprintf(stderr, "Bits being set (0→1): 0x%02hhx\n", (new_byte & ~flash_byte)); + fprintf(stderr, "Replace address: 0x%lx, len=%zu\n", (unsigned long) addr, len); + fprintf(stderr, "========================================\n\n"); + return false; + } + } + + // Update with new data + memcpy(page_buffer + page_start_offset, data + data_offset, copy_len); + + // Write back to flash + if (!jit_stream_flash_platform_write_page(pf_ctx, page_base_addr, page_buffer)) { + fprintf(stderr, "Failed to replace data at address 0x%lx\n", (unsigned long) page_base_addr); + return false; + } + + data_offset += copy_len; + current_page_addr += FLASH_PAGE_SIZE; + } + + return true; +} + +static bool jit_stream_flash_append(struct JITStreamFlash *js, const uint8_t *buffer, size_t count) +{ + while (count > 0) { + // Validate flash constraints: can only write to erased (0xFF) bytes + uint8_t current_byte = js->page_buffer[js->page_offset]; + uint8_t new_byte = *buffer; + if ((~current_byte & new_byte) != 0) { + // Trying to set bits from 0→1 without erase + fprintf(stderr, "\n=== JIT STREAM FLASH APPEND ERROR ===\n"); + fprintf(stderr, "Attempting to write 0x%02x over 0x%02x at page offset %u\n", + new_byte, current_byte, js->page_offset); + fprintf(stderr, "Page base address: 0x%lx\n", (unsigned long) js->page_base_addr); + fprintf(stderr, "Flash address: 0x%lx\n", (unsigned long) (js->page_base_addr + js->page_offset)); + fprintf(stderr, "Bits being set 0→1: 0x%02x\n", (~current_byte & new_byte)); + fprintf(stderr, "This indicates the sector was not properly erased!\n"); + fprintf(stderr, "=====================================\n\n"); + return false; + } + + js->page_buffer[js->page_offset] = *buffer; + if (js->page_offset == (FLASH_PAGE_SIZE - 1)) { + if (!jit_stream_flash_flush_page(js)) { + fprintf(stderr, "jit_stream_flash_flush_page failed\n"); + return false; + } + // Move to the next page after flushing + uintptr_t previous_sector = js->page_base_addr & ~(FLASH_SECTOR_SIZE - 1); + js->page_base_addr += FLASH_PAGE_SIZE; + js->page_offset = 0; + uintptr_t new_sector = js->page_base_addr & ~(FLASH_SECTOR_SIZE - 1); + + // Check if we've entered a new sector and erase if needed + if (new_sector != previous_sector) { + if (jit_stream_flash_sector_needs_erase(new_sector)) { + TRACE("jit_stream_flash_append -- erasing new sector at %lx\n", (unsigned long) new_sector); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_sector)) { + fprintf(stderr, "Failed to erase new sector at address 0x%lx\n", (unsigned long) new_sector); + return false; + } + } + } + + // Read the new page contents into the buffer + memcpy(js->page_buffer, (const uint8_t *) js->page_base_addr, FLASH_PAGE_SIZE); + } else { + js->page_offset++; + } + buffer++; + count--; + } + return true; +} + +static term nif_jit_stream_flash_new(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + UNUSED(argv); + + struct JITEntry *last_valid_entry = globalcontext_find_last_jit_entry(ctx->global); + struct JITEntry *new_entry; + + if (last_valid_entry == NULL) { + // No valid entries, get the first position + bool is_valid; + new_entry = globalcontext_find_first_jit_entry(ctx->global, &is_valid); + } else { + // Get position after last valid entry + new_entry = jit_entry_next(last_valid_entry); + } + + // Return a resource object + struct JITStreamFlash *js = enif_alloc_resource(jit_stream_flash_resource_type, sizeof(struct JITStreamFlash)); + if (IS_NULL_PTR(js)) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + // Initialize platform context + js->pf_ctx = jit_stream_flash_platform_init(); + if (IS_NULL_PTR(js->pf_ctx)) { + fprintf(stderr, "Failed to initialize platform flash context\n"); + enif_release_resource(js); + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + js->jit_entry = new_entry; + js->page_base_addr = (uintptr_t) new_entry & ~(FLASH_PAGE_SIZE - 1); + + // Handle sector erasing for the sector where JIT entry starts + uintptr_t new_entry_addr = (uintptr_t) new_entry; + uintptr_t new_entry_sector = new_entry_addr & ~(FLASH_SECTOR_SIZE - 1); + uintptr_t sector_end = new_entry_sector + FLASH_SECTOR_SIZE; + + // Check if there's stale data from entry position to end of sector + bool has_stale_data = false; + const uintptr_t *check_ptr = (const uintptr_t *) new_entry_addr; + const uintptr_t *check_end = (const uintptr_t *) sector_end; + const uintptr_t erased_pattern = ~((uintptr_t) 0); + + while (check_ptr < check_end) { + if (*check_ptr != erased_pattern) { + has_stale_data = true; + break; + } + check_ptr++; + } + + if (has_stale_data) { + // There's stale data (from failed compilation) - need to erase but preserve data before entry + size_t preserve_size = new_entry_addr - new_entry_sector; + + if (preserve_size > 0) { + // Allocate buffer for the sector + uint8_t *sector_buffer = malloc(FLASH_SECTOR_SIZE); + if (IS_NULL_PTR(sector_buffer)) { + fprintf(stderr, "Failed to allocate sector buffer\n"); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + // Copy data to preserve (before the entry) + memcpy(sector_buffer, (const uint8_t *) new_entry_sector, preserve_size); + + // Fill rest with 0xFF (erased state) + memset(sector_buffer + preserve_size, 0xFF, FLASH_SECTOR_SIZE - preserve_size); + + // Erase the sector + TRACE("nif_jit_stream_flash_new -- erasing sector with stale data at %lx (preserving %zu bytes)\n", + (unsigned long) new_entry_sector, preserve_size); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_entry_sector)) { + fprintf(stderr, "Failed to erase sector with stale data\n"); + free(sector_buffer); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + + // Write back the preserved data page by page + for (size_t page_offset = 0; page_offset < preserve_size; page_offset += FLASH_PAGE_SIZE) { + if (!jit_stream_flash_platform_write_page(js->pf_ctx, new_entry_sector + page_offset, + sector_buffer + page_offset)) { + fprintf(stderr, "Failed to write back preserved data\n"); + free(sector_buffer); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + } + + free(sector_buffer); + } else { + // Entry is at sector boundary, just erase + TRACE("nif_jit_stream_flash_new -- erasing sector with stale data at %lx\n", + (unsigned long) new_entry_sector); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, new_entry_sector)) { + fprintf(stderr, "Failed to erase sector for new JIT entry\n"); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + } + } else { + TRACE("nif_jit_stream_flash_new -- sector at %lx is clean (no stale data)\n", + (unsigned long) new_entry_sector); + } + + // Now handle the sector where JIT entry ends (if different from start sector) + uintptr_t entry_end = new_entry_addr + sizeof(struct JITEntry); + uintptr_t entry_end_sector = entry_end & ~(FLASH_SECTOR_SIZE - 1); + + if (entry_end_sector != new_entry_sector) { + // Entry spans two sectors - erase the end sector if needed + if (jit_stream_flash_sector_needs_erase(entry_end_sector)) { + TRACE("nif_jit_stream_flash_new -- erasing end sector at %lx\n", + (unsigned long) entry_end_sector); + if (!jit_stream_flash_platform_erase_sector(js->pf_ctx, entry_end_sector)) { + fprintf(stderr, "Failed to erase end sector for new JIT entry\n"); + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + } + } + + memcpy(js->page_buffer, (const uint8_t *) js->page_base_addr, FLASH_PAGE_SIZE); + js->page_offset = (uintptr_t) new_entry & (FLASH_PAGE_SIZE - 1); + + TRACE("nif_jit_stream_flash_new entry is %p, page_offset is %lx\n", (void *) new_entry, (unsigned long) js->page_offset); + + // Append the first bytes, which may flush the page + struct JITEntry header; + header.magic = 0xFFFF; + header.version = 0xFFFF; + header.code = 0xFFFFFFFF; + header.labels = 0xFFFFFFFF; + header.size = 0xFFFFFFFF; + if (!jit_stream_flash_append(js, (const uint8_t *) &header, sizeof(header))) { + jit_stream_flash_platform_destroy(js->pf_ctx); + enif_release_resource(js); + RAISE_ERROR(BADARG_ATOM); + } + + term obj = enif_make_resource(erl_nif_env_from_context(ctx), js); + enif_release_resource(js); // decrement refcount after enif_alloc_resource + return obj; +} + +static term nif_jit_stream_flash_offset(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset; + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + + int offset = current_addr - base_addr; + + return term_from_int(offset); +} + +static term nif_jit_stream_flash_append(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + VALIDATE_VALUE(argv[1], term_is_binary); + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + size_t binary_size = term_binary_size(argv[1]); + const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[1]); + + if (!jit_stream_flash_append(js_obj, binary_data, binary_size)) { + RAISE_ERROR(BADARG_ATOM); + } + + return argv[0]; +} + +static term nif_jit_stream_flash_replace(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + VALIDATE_VALUE(argv[1], term_is_integer); + VALIDATE_VALUE(argv[2], term_is_binary); + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + + size_t binary_size = term_binary_size(argv[2]); + const uint8_t *binary_data = (const uint8_t *) term_binary_data(argv[2]); + avm_int_t offset = term_to_int(argv[1]); + + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + uintptr_t replace_start = base_addr + offset; + uintptr_t replace_end = replace_start + binary_size; + + // Iterate over all pages that need to be updated + uintptr_t current_page_addr = replace_start & ~(FLASH_PAGE_SIZE - 1); + size_t binary_offset = 0; + + while (current_page_addr < replace_end) { + // Calculate the range within this page that needs to be replaced + uintptr_t page_start_offset = 0; + uintptr_t page_end_offset = FLASH_PAGE_SIZE; + + if (current_page_addr < replace_start) { + page_start_offset = replace_start - current_page_addr; + } + + if (current_page_addr + FLASH_PAGE_SIZE > replace_end) { + page_end_offset = replace_end - current_page_addr; + } + + size_t copy_len = page_end_offset - page_start_offset; + + // Check if this is the current buffer page + if (current_page_addr == js_obj->page_base_addr) { + // Update current buffer directly + memcpy(js_obj->page_buffer + page_start_offset, binary_data + binary_offset, copy_len); + } else { + // This is an already-flushed page, need to update flash + if (!jit_stream_flash_replace_at_addr(js_obj->pf_ctx, current_page_addr + page_start_offset, + binary_data + binary_offset, + copy_len)) { + RAISE_ERROR(BADARG_ATOM); + } + } + + binary_offset += copy_len; + current_page_addr += FLASH_PAGE_SIZE; + } + + return argv[0]; +} + +static term nif_jit_stream_flash_read(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + + VALIDATE_VALUE(argv[1], term_is_integer); + VALIDATE_VALUE(argv[2], term_is_integer); + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + avm_int_t offset = term_to_int(argv[1]); + avm_int_t len = term_to_int(argv[2]); + + // Validate parameters + if (UNLIKELY(len <= 0 || offset < 0)) { + RAISE_ERROR(BADARG_ATOM); + } + + // Calculate current stream position + uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset; + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + size_t stream_offset = current_addr - base_addr; + + // Check if read is within bounds + if (UNLIKELY((size_t) (offset + len) > stream_offset)) { + RAISE_ERROR(BADARG_ATOM); + } + + if (UNLIKELY(memory_ensure_free_opt(ctx, TERM_BINARY_HEAP_SIZE(len), MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { + RAISE_ERROR(OUT_OF_MEMORY_ATOM); + } + + uintptr_t read_addr = base_addr + offset; + return term_from_literal_binary((const uint8_t *) read_addr, len, &ctx->heap, ctx->global); +} + +static term nif_jit_stream_flash_flush(Context *ctx, int argc, term argv[]) +{ + UNUSED(ctx); + UNUSED(argc); + + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), argv[0], jit_stream_flash_resource_type, &js_obj_ptr))) { + RAISE_ERROR(BADARG_ATOM); + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + // Calculate the size BEFORE flushing + uintptr_t current_addr = js_obj->page_base_addr + js_obj->page_offset; + uintptr_t code_start = (uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry); + uint32_t code_size = current_addr - code_start; + + // Check if the size field is in the current unflushed page buffer or in an already-flushed page + uintptr_t size_field_addr = (uintptr_t) &js_obj->jit_entry->size; + uintptr_t size_field_page = size_field_addr & ~(FLASH_PAGE_SIZE - 1); + + if (size_field_page == js_obj->page_base_addr) { + // Size field is in the current buffer, update it directly before flushing + size_t offset_in_page = size_field_addr - js_obj->page_base_addr; + memcpy(js_obj->page_buffer + offset_in_page, &code_size, sizeof(uint32_t)); + } else { + // Size field is in an already-flushed page, use replace + if (!jit_stream_flash_replace_at_addr(js_obj->pf_ctx, size_field_addr, + (const uint8_t *) &code_size, + sizeof(uint32_t))) { + RAISE_ERROR(BADARG_ATOM); + } + } + + // Flush the final page + if (!jit_stream_flash_flush_page(js_obj)) { + fprintf(stderr, "jit_stream_flash_flush_page failed\n"); + RAISE_ERROR(BADARG_ATOM); + } + + return argv[0]; +} + +static term nif_jit_stream_module(Context *ctx, int argc, term argv[]) +{ + UNUSED(argc); + UNUSED(argv); + + return globalcontext_make_atom(ctx->global, ATOM_STR("\x10", "jit_stream_flash")); +} + +static const struct Nif jit_stream_module_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_module +}; +static const struct Nif jit_stream_flash_new_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_new +}; +static const struct Nif jit_stream_flash_offset_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_offset +}; +static const struct Nif jit_stream_flash_append_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_append +}; +static const struct Nif jit_stream_flash_replace_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_replace +}; +static const struct Nif jit_stream_flash_read_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_read +}; +static const struct Nif jit_stream_flash_flush_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_stream_flash_flush +}; + +ModuleNativeEntryPoint jit_stream_flash_entry_point(Context *ctx, term jit_stream) +{ + void *js_obj_ptr; + if (UNLIKELY(!enif_get_resource(erl_nif_env_from_context(ctx), jit_stream, jit_stream_flash_resource_type, &js_obj_ptr))) { + return NULL; + } + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) js_obj_ptr; + + uintptr_t base_addr = ((uintptr_t) js_obj->jit_entry + sizeof(struct JITEntry)); + + // Convert to executable address (handles DBUS→IBUS, Thumb bit, etc.) + base_addr = jit_stream_flash_platform_ptr_to_executable(base_addr); + + return (ModuleNativeEntryPoint) base_addr; +} + +static void jit_stream_flash_dtor(ErlNifEnv *caller_env, void *obj) +{ + UNUSED(caller_env); + struct JITStreamFlash *js_obj = (struct JITStreamFlash *) obj; + if (js_obj->pf_ctx) { + jit_stream_flash_platform_destroy(js_obj->pf_ctx); + } +} + +const struct Nif *jit_stream_flash_get_nif(const char *nifname) +{ + if (strcmp("jit:stream_module/0", nifname) == 0) { + return &jit_stream_module_nif; + } + if (strncmp("jit_stream_flash:", nifname, 17) == 0) { + const char *rest = nifname + 17; + if (strcmp("new/1", rest) == 0) { + return &jit_stream_flash_new_nif; + } + if (strcmp("offset/1", rest) == 0) { + return &jit_stream_flash_offset_nif; + } + if (strcmp("append/2", rest) == 0) { + return &jit_stream_flash_append_nif; + } + if (strcmp("replace/3", rest) == 0) { + return &jit_stream_flash_replace_nif; + } + if (strcmp("read/3", rest) == 0) { + return &jit_stream_flash_read_nif; + } + if (strcmp("flush/1", rest) == 0) { + return &jit_stream_flash_flush_nif; + } + } + return NULL; +} + +void jit_stream_flash_init(GlobalContext *global) +{ + ErlNifEnv env; + erl_nif_env_partial_init_from_globalcontext(&env, global); + jit_stream_flash_resource_type = enif_init_resource_type(&env, "jit_stream_flash", &jit_stream_flash_resource_type_init, ERL_NIF_RT_CREATE, NULL); +} + +void globalcontext_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels) +{ + bool is_valid; + (void) globalcontext_find_first_jit_entry(global, &is_valid); + + struct JSFlashPlatformContext *pf_ctx = jit_stream_flash_platform_init(); + if (IS_NULL_PTR(pf_ctx)) { + fprintf(stderr, "Failed to initialize platform flash context\n"); + return; + } + + // Reverse the executable address transformation to get data address + // Platform-specific: Thumb (clear bit 0), RISC-V (IBUS→DBUS conversion) + uintptr_t data_addr = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry_point); + + struct JITEntry *jit_entry = (struct JITEntry *) (data_addr - sizeof(struct JITEntry)); + uintptr_t code = (uintptr_t) mod->code; + + // Finalize the entry + if (!jit_stream_flash_finalize_entry(pf_ctx, jit_entry, JIT_ENTRY_MAGIC, version, (uint32_t) code, labels)) { + fprintf(stderr, "jit_stream_flash_finalize_entry failed\n"); + jit_stream_flash_platform_destroy(pf_ctx); + return; + } + +#ifdef ENABLE_TRACE + // Compute CRC of entire module for verification + uint32_t module_crc = crc32((const uint8_t *) jit_entry, sizeof(struct JITEntry) + jit_entry->size); + TRACE("After finalize - jit_entry=%p CRC32=0x%08x (entry+code size=%u)\n", + (void *) jit_entry, (unsigned int) module_crc, (unsigned int) (sizeof(struct JITEntry) + jit_entry->size)); +#endif + + // Erase next sector if it's completely after the current module + struct JITEntry *current_entry = (struct JITEntry *) (data_addr - sizeof(struct JITEntry)); + struct JITEntry *next_entry = jit_entry_next(current_entry); + uintptr_t next_entry_addr = (uintptr_t) next_entry; + uintptr_t next_sector = next_entry_addr & ~(FLASH_SECTOR_SIZE - 1); + + // Calculate the sector where the current module ENDS (not where it starts) + uintptr_t current_module_end = (uintptr_t) current_entry + sizeof(struct JITEntry) + current_entry->size; + uintptr_t current_end_sector = current_module_end & ~(FLASH_SECTOR_SIZE - 1); + + // Only erase next sector if it's completely after the current module's end + // This prevents erasing a sector that contains the tail of the current module + if (next_sector > current_end_sector) { + // Next entry is in a sector completely after current module, erase it if it has stale data + if (next_entry->magic != 0xFFFF) { + TRACE("globalcontext_set_cache_native_code -- NOT erasing new sector at %lx\n", (unsigned long) next_sector); + if (!jit_stream_flash_platform_erase_sector(pf_ctx, next_sector)) { + fprintf(stderr, "jit_stream_flash_platform_erase_sector failed\n"); + jit_stream_flash_platform_destroy(pf_ctx); + return; + } + } else { + TRACE("globalcontext_set_cache_native_code -- NOT erasing new sector at %lx\n", (unsigned long) next_sector); + } + } + + if (!is_valid) { + // Mark that cache entry is valid by replacing end with END in installed AVM + globalcontext_set_cache_valid(global); + } + + jit_stream_flash_platform_destroy(pf_ctx); +} + +// Implementation of jit_stream_entry_point, sys_get_cache_native_code and +// sys_set_cache_native_code using this jit_stream +#ifndef TEST_JIT_STREAM_FLASH +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + return jit_stream_flash_entry_point(ctx, jit_stream); +} + +bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels) +{ + bool is_valid; + struct JITEntry *jit_entry = globalcontext_find_first_jit_entry(global, &is_valid); + if (!is_valid) { + return false; + } + uintptr_t code = (uintptr_t) mod->code; + while (jit_entry->magic == JIT_ENTRY_MAGIC) { + if (jit_entry->code == (uint32_t) code) { + *version = jit_entry->version; + uintptr_t ep_addr = (uintptr_t) jit_entry + sizeof(struct JITEntry); + ep_addr = jit_stream_flash_platform_ptr_to_executable(ep_addr); + *entry_point = (ModuleNativeEntryPoint) ep_addr; + *labels = jit_entry->labels; + +#ifdef ENABLE_TRACE + // Compute CRC of entire module for verification + uint32_t module_crc = crc32((const uint8_t *) jit_entry, sizeof(struct JITEntry) + jit_entry->size); + TRACE("Loading from cache - jit_entry=%p CRC32=0x%08x (entry+code size=%u)\n", + (void *) jit_entry, (unsigned int) module_crc, (unsigned int) (sizeof(struct JITEntry) + jit_entry->size)); +#endif + + return true; + } + jit_entry = jit_entry_next(jit_entry); + } + return false; +} + +void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels) +{ + globalcontext_set_cache_native_code(global, mod, version, entry_point, labels); +} +#endif + +#endif // AVM_NO_JIT diff --git a/src/libAtomVM/jit_stream_flash.h b/src/libAtomVM/jit_stream_flash.h new file mode 100644 index 0000000000..30644189ea --- /dev/null +++ b/src/libAtomVM/jit_stream_flash.h @@ -0,0 +1,129 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +/** + * @file jit_stream_flash.h + * @brief JIT code caching in flash memory - common implementation + */ + +#ifndef _JIT_STREAM_FLASH_H_ +#define _JIT_STREAM_FLASH_H_ + +#include "globalcontext.h" +#include "jit_stream_flash_platform.h" +#include "module.h" + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Platform-specific flash context (opaque) + */ +struct JSFlashPlatformContext; + +/** + * @brief Initialize JIT stream flash subsystem + * + * @param global Global context + */ +void jit_stream_flash_init(GlobalContext *global); + +/** + * @brief Get NIF for jit_stream_flash operations + * + * @param nifname NIF name + * @return NIF pointer or NULL + */ +const struct Nif *jit_stream_flash_get_nif(const char *nifname); + +/** + * @brief Get entry point from jit_stream_flash. + * Called by `jit_stream_entry_point` + * + * @param ctx Context + * @param jit_stream JIT stream term + * @return Entry point or NULL + */ +ModuleNativeEntryPoint jit_stream_flash_entry_point(Context *ctx, term jit_stream); + +/** + * @brief Finalize flash operation by marking an entry point as valid for + * a given module. This is called by `sys_set_cache_native_code`. + * + * @param global Global context + * @param mod Module + * @param version Module version + * @param entry_point Entry point + * @param labels Number of labels + */ +void globalcontext_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels); + +/** + * @brief Initialize platform flash context + * @return Platform flash context, or NULL on error + */ +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void); + +/** + * @brief Destroy platform flash context + * @param pf_ctx Platform flash context to destroy + */ +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *pf_ctx); + +/** + * @brief Erase a flash sector at the given address + * @param pf_ctx Platform flash context + * @param addr Virtual address of the sector to erase (must be sector-aligned) + * @return true on success, false on error + */ +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr); + +/** + * @brief Write a page to flash + * @param pf_ctx Platform flash context + * @param addr Virtual address to write to (must be page-aligned) + * @param data Data to write (must be FLASH_PAGE_SIZE bytes) + * @return true on success, false on error + */ +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data); + +/** + * @brief Convert data bus address to instruction bus address + * @param addr Data bus address + * @return Instruction bus address (executable pointer) + */ +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr); + +/** + * @brief Convert instruction bus address to data bus address + * @param addr Instruction bus address (executable pointer) + * @return Data bus address + */ +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr); + +#ifdef __cplusplus +} +#endif + +#endif // _JIT_STREAM_FLASH_H_ diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 108d5027d8..64ff0569a6 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -353,6 +353,13 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary fprintf(stderr, "Native code chunk found but no compatible architecture or variant found\n"); } } + } else { + ModuleNativeEntryPoint module_entry_point; + uint32_t labels; + uint16_t version; + if (sys_get_cache_native_code(global, mod, &version, &module_entry_point, &labels) && version == JIT_FORMAT_VERSION) { + module_set_native_code(mod, labels, module_entry_point); + } } #endif diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 328216b1c3..dc87df2250 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -5633,6 +5633,8 @@ static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]) VALIDATE_VALUE(argv[0], term_is_atom); VALIDATE_VALUE(argv[1], term_is_integer); + avm_int_t labels_count = term_to_int(argv[1]); + term module_name = argv[0]; Module *mod = globalcontext_get_module(ctx->global, term_to_atom_index(module_name)); if (IS_NULL_PTR(mod)) { @@ -5646,10 +5648,12 @@ static term nif_code_server_set_native_code(Context *ctx, int argc, term argv[]) SMP_MODULE_LOCK(mod); if (mod->native_code == NULL) { - module_set_native_code(mod, term_to_int(argv[1]), entry_point); + module_set_native_code(mod, labels_count, entry_point); } SMP_MODULE_UNLOCK(mod); + sys_set_cache_native_code(ctx->global, mod, JIT_FORMAT_VERSION, entry_point, labels_count); + return OK_ATOM; } #endif diff --git a/src/libAtomVM/sys.h b/src/libAtomVM/sys.h index 0735d86ed3..8ad701c3cb 100644 --- a/src/libAtomVM/sys.h +++ b/src/libAtomVM/sys.h @@ -296,6 +296,33 @@ void sys_free_platform(GlobalContext *global); */ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset); +/** + * @brief Get the cache (typically on flash) of native code for a given module + * + * @details If module is found in cache, return a pointer to the entry point. + * Only implemented on platforms with JIT. Implementations on flash typically + * check if the jit cache is valid (for lib or for app) and use the pointer to + * code as a key. + * @param global the global context + * @param mod module to return the cache native code for + * @param version version of the cache entry (for compatibility with the VM) + * @param entry_point entry point to the module, if found + * @param labels number of labels + * @return \c true if the cache entry was found + */ +bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels); + +/** + * @brief Add native code to cache for a given module + * + * @param global the global context + * @param mod module to add the native code for + * @param version version of the native code + * @param entry_point entry point to the module + * @param labels number of labels + */ +void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels); + #ifdef __cplusplus } #endif diff --git a/src/platforms/esp32/partitions.csv b/src/platforms/esp32/partitions.csv index 95c1cf74bc..d313cbdc81 100644 --- a/src/platforms/esp32/partitions.csv +++ b/src/platforms/esp32/partitions.csv @@ -7,6 +7,5 @@ # Note: if you change the phy_init or app partition offset, make sure to change the offset in Kconfig.projbuild nvs, data, nvs, 0x9000, 0x6000, phy_init, data, phy, 0xf000, 0x1000, -factory, app, factory, 0x10000, 0x1C0000, -boot.avm, data, phy, 0x1D0000, 0x40000, -main.avm, data, phy, 0x210000, 0x100000 +factory, app, factory, 0x10000, 0x160000, +main.avm, data, phy, 0x170000, 0x290000, diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 099164dd89..eedbe060b4 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -853,4 +853,24 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t si return (ModuleNativeEntryPoint) (native_code + offset); #endif } + +bool sys_get_cache_native_code(GlobalContext *global, Module *mod, uint16_t *version, ModuleNativeEntryPoint *entry_point, uint32_t *labels) +{ + UNUSED(global); + UNUSED(mod); + UNUSED(version); + UNUSED(entry_point); + UNUSED(labels); + return false; +} + +void sys_set_cache_native_code(GlobalContext *global, Module *mod, uint16_t version, ModuleNativeEntryPoint entry_point, uint32_t labels) +{ + UNUSED(global); + UNUSED(mod); + UNUSED(version); + UNUSED(entry_point); + UNUSED(labels); +} + #endif diff --git a/src/platforms/rp2/src/CMakeLists.txt b/src/platforms/rp2/src/CMakeLists.txt index c79433551e..f4ef67cd8f 100644 --- a/src/platforms/rp2/src/CMakeLists.txt +++ b/src/platforms/rp2/src/CMakeLists.txt @@ -55,6 +55,10 @@ else() target_compile_definitions(AtomVM PRIVATE PICO_STDIO_USB_CONNECT_WAIT_TIMEOUT_MS=20000) endif() +if (AVM_DISABLE_SMP) + target_compile_definitions(AtomVM PRIVATE PICO_FLASH_ASSUME_CORE1_SAFE) +endif() + if (AVM_WAIT_BOOTSEL_ON_EXIT) target_compile_definitions(AtomVM PRIVATE WAIT_BOOTSEL_ON_EXIT) endif() diff --git a/src/platforms/rp2/src/lib/CMakeLists.txt b/src/platforms/rp2/src/lib/CMakeLists.txt index 3cc69b56a3..b9e594c9df 100644 --- a/src/platforms/rp2/src/lib/CMakeLists.txt +++ b/src/platforms/rp2/src/lib/CMakeLists.txt @@ -31,7 +31,6 @@ set(HEADER_FILES set(SOURCE_FILES gpiodriver.c - jit_stream_flash.c networkdriver.c otp_crypto_platform.c platform_defaultatoms.c @@ -110,4 +109,16 @@ if (PICO_CYW43_SUPPORTED) define_if_function_exists(libAtomVM${PLATFORM_LIB_SUFFIX} gethostname "unistd.h" PRIVATE HAVE_GETHOSTNAME) endif() +if (NOT AVM_DISABLE_JIT) + target_sources( + libAtomVM${PLATFORM_LIB_SUFFIX} + PRIVATE + jit_stream_flash_platform.c + ../../../../libAtomVM/jit_stream_flash.c + jit_stream_flash_platform.h + ../../../../libAtomVM/jit_stream_flash.h + ) + target_link_options(libAtomVM${PLATFORM_LIB_SUFFIX} PUBLIC "SHELL:-Wl,-u -Wl,jit_stream_flash_get_nif") +endif() + target_link_options(libAtomVM${PLATFORM_LIB_SUFFIX} PUBLIC "SHELL:-Wl,-u -Wl,gpio_nif -Wl,-u -Wl,otp_crypto_nif") diff --git a/src/platforms/rp2/src/lib/jit_stream_flash_platform.c b/src/platforms/rp2/src/lib/jit_stream_flash_platform.c new file mode 100644 index 0000000000..e8a17c3537 --- /dev/null +++ b/src/platforms/rp2/src/lib/jit_stream_flash_platform.c @@ -0,0 +1,117 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "jit_stream_flash.h" + +#include +#include +#include +#include + +#include "rp2_sys.h" + +// Helper structures for flash_safe_execute +struct EraseParams +{ + uintptr_t addr; +}; + +struct WriteParams +{ + uintptr_t addr; + const uint8_t *data; + size_t len; +}; + +static void __not_in_flash_func(do_erase_sector)(void *params_ptr) +{ + struct EraseParams *params = (struct EraseParams *) params_ptr; + flash_range_erase(params->addr - XIP_BASE, FLASH_SECTOR_SIZE); +} + +static void __not_in_flash_func(do_write_page)(void *params_ptr) +{ + struct WriteParams *params = (struct WriteParams *) params_ptr; + flash_range_program(params->addr - XIP_BASE, params->data, params->len); +} + +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) +{ + return (struct JSFlashPlatformContext *) 1; +} + +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *pf_ctx) +{ + UNUSED(pf_ctx); +} + +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr) +{ + UNUSED(pf_ctx); + + struct EraseParams params = { + .addr = addr + }; + + int r = flash_safe_execute(do_erase_sector, ¶ms, UINT32_MAX); + if (UNLIKELY(r != PICO_OK)) { + fprintf(stderr, "flash_safe_execute (erase) failed with error %d\n", r); + return false; + } + + return true; +} + +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *pf_ctx, uintptr_t addr, const uint8_t *data) +{ + UNUSED(pf_ctx); + + struct WriteParams params = { + .addr = addr, + .data = data, + .len = FLASH_PAGE_SIZE + }; + + int r = flash_safe_execute(do_write_page, ¶ms, UINT32_MAX); + if (UNLIKELY(r != PICO_OK)) { + fprintf(stderr, "flash_safe_execute (write) failed with error %d\n", r); + return false; + } + + return true; +} + +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) +{ + // Set Thumb bit + return addr | 0x1; +} + +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) +{ + // Clear Thumb bit + return addr & ~0x1UL; +} + +REGISTER_NIF_COLLECTION(jit_stream_flash, jit_stream_flash_init, NULL, jit_stream_flash_get_nif) + +#endif // AVM_NO_JIT diff --git a/src/platforms/rp2/src/lib/jit_stream_flash_platform.h b/src/platforms/rp2/src/lib/jit_stream_flash_platform.h new file mode 100644 index 0000000000..3ee8c660bd --- /dev/null +++ b/src/platforms/rp2/src/lib/jit_stream_flash_platform.h @@ -0,0 +1,40 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef _JIT_STREAM_FLASH_PLATFORM_H_ +#define _JIT_STREAM_FLASH_PLATFORM_H_ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// RP2040 flash constants (W25Q16JV chip) +#define FLASH_SECTOR_SIZE 4096 +#define FLASH_PAGE_SIZE 256 + +#ifdef __cplusplus +} +#endif + +#endif // _JIT_STREAM_FLASH_PLATFORM_H_ diff --git a/src/platforms/rp2/src/lib/smp.c b/src/platforms/rp2/src/lib/smp.c index 946b066305..44251e8913 100644 --- a/src/platforms/rp2/src/lib/smp.c +++ b/src/platforms/rp2/src/lib/smp.c @@ -57,14 +57,17 @@ static void scheduler_core1_entry_point(void) { _Static_assert(sizeof(uintptr_t) == sizeof(uint32_t), "Expected pointers to be 32 bits"); uint32_t ctx_int = multicore_fifo_pop_blocking(); + multicore_lockout_victim_init(); int result = scheduler_entry_point((GlobalContext *) ctx_int); UNUSED(result); + multicore_lockout_victim_deinit(); } void smp_scheduler_start(GlobalContext *ctx) { multicore_launch_core1(scheduler_core1_entry_point); multicore_fifo_push_blocking((uint32_t) ctx); + multicore_lockout_victim_init(); } bool smp_is_main_thread(GlobalContext *glb) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 42ef857dda..c7652f4b65 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -24,12 +24,14 @@ project (tests) add_executable(test-erlang test.c) add_executable(test-enif test-enif.c) add_executable(test-heap test-heap.c) +add_executable(test-jit_stream_flash test-jit_stream_flash.c ../src/libAtomVM/jit_stream_flash.c) add_executable(test-mailbox test-mailbox.c) add_executable(test-structs test-structs.c) target_compile_features(test-erlang PUBLIC c_std_11) target_compile_features(test-enif PUBLIC c_std_11) target_compile_features(test-heap PUBLIC c_std_11) +target_compile_features(test-jit_stream_flash PUBLIC c_std_11) target_compile_features(test-mailbox PUBLIC c_std_11) target_compile_features(test-structs PUBLIC c_std_11) @@ -37,6 +39,7 @@ if(CMAKE_COMPILER_IS_GNUCC) target_compile_options(test-erlang PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-enif PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-heap PUBLIC -Wall -pedantic -Wextra -ggdb) + target_compile_options(test-jit_stream_flash PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-mailbox PUBLIC -Wall -pedantic -Wextra -ggdb) target_compile_options(test-structs PUBLIC -Wall -pedantic -Wextra -ggdb) endif() @@ -50,6 +53,7 @@ if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") target_link_libraries(test-erlang PRIVATE ${LIBRT}) target_link_libraries(test-enif PRIVATE ${LIBRT}) target_link_libraries(test-heap PRIVATE ${LIBRT}) + target_link_libraries(test-jit_stream_flash PRIVATE ${LIBRT}) target_link_libraries(test-mailbox PRIVATE ${LIBRT}) target_link_libraries(test-structs PRIVATE ${LIBRT}) else() @@ -63,6 +67,7 @@ if (MbedTLS_FOUND) target_link_libraries(test-erlang PRIVATE MbedTLS::mbedtls) target_link_libraries(test-enif PRIVATE MbedTLS::mbedtls) target_link_libraries(test-heap PRIVATE MbedTLS::mbedtls) + target_link_libraries(test-jit_stream_flash PRIVATE MbedTLS::mbedtls) target_link_libraries(test-mailbox PRIVATE MbedTLS::mbedtls) target_link_libraries(test-structs PRIVATE MbedTLS::mbedtls) endif() @@ -79,6 +84,7 @@ if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR target_include_directories(test-erlang PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-enif PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-heap PRIVATE ../src/platforms/generic_unix/lib) + target_include_directories(test-jit_stream_flash PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-mailbox PRIVATE ../src/platforms/generic_unix/lib) target_include_directories(test-structs PRIVATE ../src/platforms/generic_unix/lib) else() @@ -88,11 +94,15 @@ endif() target_include_directories(test-erlang PRIVATE ../src/libAtomVM) target_include_directories(test-enif PRIVATE ../src/libAtomVM) target_include_directories(test-heap PRIVATE ../src/libAtomVM) +target_include_directories(test-jit_stream_flash PRIVATE ../src/libAtomVM ${CMAKE_CURRENT_SOURCE_DIR}) target_include_directories(test-mailbox PRIVATE ../src/libAtomVM) target_include_directories(test-structs PRIVATE ../src/libAtomVM) target_link_libraries(test-erlang PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-enif PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-heap PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) +# test-jit_stream_flash includes jit_stream_flash.c and provides its own mock platform implementation +target_compile_definitions(test-jit_stream_flash PRIVATE TEST_JIT_STREAM_FLASH) +target_link_libraries(test-jit_stream_flash PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-mailbox PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) target_link_libraries(test-structs PRIVATE libAtomVM libAtomVM${PLATFORM_LIB_SUFFIX}) @@ -120,11 +130,13 @@ if (COVERAGE) append_coverage_compiler_flags_to_target(test-erlang) append_coverage_compiler_flags_to_target(test-enif) append_coverage_compiler_flags_to_target(test-heap) + append_coverage_compiler_flags_to_target(test-jit_stream_flash) append_coverage_compiler_flags_to_target(test-mailbox) append_coverage_compiler_flags_to_target(test-structs) append_coverage_linker_flags_to_target(test-erlang) append_coverage_linker_flags_to_target(test-enif) append_coverage_linker_flags_to_target(test-heap) + append_coverage_linker_flags_to_target(test-jit_stream_flash) append_coverage_linker_flags_to_target(test-mailbox) append_coverage_linker_flags_to_target(test-structs) if (CMAKE_COMPILER_IS_GNUCC) diff --git a/src/platforms/rp2/src/lib/jit_stream_flash.c b/tests/jit_stream_flash_platform.h similarity index 69% rename from src/platforms/rp2/src/lib/jit_stream_flash.c rename to tests/jit_stream_flash_platform.h index 77dfcca908..b9e39dd36b 100644 --- a/src/platforms/rp2/src/lib/jit_stream_flash.c +++ b/tests/jit_stream_flash_platform.h @@ -18,17 +18,23 @@ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later */ -#ifndef AVM_NO_JIT +#ifndef _JIT_STREAM_FLASH_PLATFORM_H_ +#define _JIT_STREAM_FLASH_PLATFORM_H_ -#include "context.h" -#include "jit.h" -#include "term.h" +#include +#include +#include -ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) -{ - UNUSED(ctx); - UNUSED(jit_stream); - return NULL; -} +#ifdef __cplusplus +extern "C" { +#endif + +// Host test flash constants +#define FLASH_SECTOR_SIZE 4096 +#define FLASH_PAGE_SIZE 256 +#ifdef __cplusplus +} #endif + +#endif // _JIT_STREAM_FLASH_PLATFORM_H_ diff --git a/tests/test-jit_stream_flash.c b/tests/test-jit_stream_flash.c new file mode 100644 index 0000000000..d35b565584 --- /dev/null +++ b/tests/test-jit_stream_flash.c @@ -0,0 +1,858 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#include +#include +#include +#include +#include +#include + +#include "avmpack.h" +#include "context.h" +#include "globalcontext.h" +#include "jit_stream_flash.h" +#include "jit_stream_flash_platform.h" +#include "scheduler.h" +#include "synclist.h" +#include "term.h" +#include "utils.h" + +// Mock flash memory - simulate 64KB of flash +#define MOCK_FLASH_SIZE (64 * 1024) +// Align to sector boundary for proper flash simulation +static uint8_t mock_flash[MOCK_FLASH_SIZE] __attribute__((aligned(FLASH_SECTOR_SIZE))); + +// JIT entry header (copied from jit_stream_flash.c for testing) +struct JITEntry +{ + uint16_t magic; + uint16_t version; + uint32_t code; + uint32_t labels; + uint32_t size; +}; + +// CRC32 for verification (copied from jit_stream_flash.c) +static uint32_t crc32(const uint8_t *data, size_t len) +{ + uint32_t crc = 0xFFFFFFFF; + for (size_t i = 0; i < len; i++) { + crc ^= data[i]; + for (int j = 0; j < 8; j++) { + crc = (crc >> 1) ^ (0xEDB88320 & -(crc & 1)); + } + } + return ~crc; +} + +// Platform context (opaque) +struct JSFlashPlatformContext +{ + uintptr_t base_addr; +}; + +// Forward declarations of mock platform functions +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void); +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx); +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr); +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data); +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr); + +// Mock platform implementation +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) +{ + struct JSFlashPlatformContext *ctx = malloc(sizeof(struct JSFlashPlatformContext)); + if (!ctx) { + return NULL; + } + + // DO NOT erase flash here - it should persist across multiple stream creations + // Flash initialization happens once at test startup + + ctx->base_addr = (uintptr_t) mock_flash; + return ctx; +} + +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx) +{ + free(ctx); +} + +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr) +{ + assert(ctx); + + // Check alignment + if ((addr - ctx->base_addr) % FLASH_SECTOR_SIZE != 0) { + fprintf(stderr, "Erase address 0x%lx not sector-aligned\n", (unsigned long) addr); + return false; + } + + size_t offset = addr - ctx->base_addr; + if (offset >= MOCK_FLASH_SIZE) { + fprintf(stderr, "Erase address 0x%lx out of bounds\n", (unsigned long) addr); + return false; + } + + // Erase the sector + memset(&mock_flash[offset], 0xFF, FLASH_SECTOR_SIZE); + + return true; +} + +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data) +{ + assert(ctx); + + // Check alignment + if ((addr - ctx->base_addr) % FLASH_PAGE_SIZE != 0) { + fprintf(stderr, "Write address 0x%lx not page-aligned (base_addr=0x%lx, offset=0x%lx)\n", + (unsigned long) addr, (unsigned long) ctx->base_addr, + (unsigned long) (addr - ctx->base_addr)); + return false; + } + + size_t offset = addr - ctx->base_addr; + if (offset + FLASH_PAGE_SIZE > MOCK_FLASH_SIZE) { + fprintf(stderr, "Write at offset 0x%zx would exceed flash bounds\n", offset); + return false; + } + + // Validate write - flash can only transition bits from 1→0 without erase + for (size_t i = 0; i < FLASH_PAGE_SIZE; i++) { + uint8_t current = mock_flash[offset + i]; + uint8_t new_val = data[i]; + + // Check if we're trying to set any bits from 0→1 + if ((~current & new_val) != 0) { + fprintf(stderr, "FLASH VALIDATION ERROR at offset 0x%zx:\n", offset + i); + fprintf(stderr, " Attempting to set bits 0→1 without erase\n"); + fprintf(stderr, " Current: 0x%02x, New: 0x%02x, Invalid bits: 0x%02x\n", + current, new_val, ~current & new_val); + return false; + } + } + + // Write the page + memcpy(&mock_flash[offset], data, FLASH_PAGE_SIZE); + + return true; +} + +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) +{ + // For host testing, no conversion needed + return addr; +} + +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) +{ + // For host testing, no conversion needed + return addr; +} + +// Create a minimal AVM pack for testing +static uint8_t create_minimal_avmpack(void) +{ + // Create a minimal AVM pack with an "end" section + uint8_t *pack = mock_flash + 0x100; // Place pack at offset 0x100 + + // AVM Pack header: "#!/usr/bin/env AtomVM\n" (23 bytes) + padding to 24 bytes + const char header_str[] = "#!/usr/bin/env AtomVM\n"; + memcpy(pack, header_str, 23); + pack[23] = 0; // Padding to align to 4 bytes + + // Section header for "end" section + uint8_t *section = pack + 24; + uint32_t *sec_header = (uint32_t *) section; + + // Section format: size (4) + flags (4) + reserved (4) + name (null-terminated) + // Write size in big-endian (total section size including header) + uint32_t section_size = 4 + 4 + 4 + 4; // size + flags + reserved + "end\0" + sec_header[0] = __builtin_bswap32(section_size); + + // Write flags in big-endian + uint32_t flags = END_OF_FILE; + sec_header[1] = __builtin_bswap32(flags); + + // Write reserved field (seems to be 0) + sec_header[2] = 0; + + // Write null-terminated name starting at offset 12 + memcpy(section + 12, "end", 4); // includes null terminator + + return 0; +} + +// Register AVM pack with global context +static void register_test_avmpack(GlobalContext *glb) +{ + create_minimal_avmpack(); + + // Create AVMPackData + struct ConstAVMPack *pack = malloc(sizeof(struct ConstAVMPack)); + avmpack_data_init(&pack->base, &const_avm_pack_info); + pack->base.data = mock_flash + 0x100; + pack->base.in_use = true; + + // Add to global context's avmpack list + synclist_append(&glb->avmpack_data, &pack->base.avmpack_head); +} + +// Test helper: create binary term with proper GC rooting +static term make_binary_rooted(Context *ctx, const uint8_t *data, size_t len, term *roots, int num_roots) +{ + if (UNLIKELY(memory_ensure_free_with_roots(ctx, term_binary_heap_size(len), num_roots, roots, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { + return term_invalid_term(); + } + return term_from_literal_binary(data, len, &ctx->heap, ctx->global); +} + +// Test helper: get NIF function +typedef term (*nif_function)(Context *ctx, int argc, term argv[]); + +static nif_function get_nif(const char *name) +{ + const struct Nif *nif = jit_stream_flash_get_nif(name); + if (!nif || nif->base.type != NIFFunctionType) { + return NULL; + } + return nif->nif_ptr; +} + +// Test 1: Basic append and flush +void test_basic_append_flush(void) +{ + fprintf(stderr, "\n=== Test: Basic Append and Flush ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + assert(new_nif != NULL); + assert(append_nif != NULL); + assert(flush_nif != NULL); + + // Create stream + term argv[3]; + argv[0] = term_from_int(10); // label count + term stream = new_nif(ctx, 1, argv); + assert(term_is_binary(stream)); // Resource is a binary + + // Append some data - root the stream during binary allocation + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); // Root argv[0] (stream) + stream = append_nif(ctx, 2, argv); // Update stream in case GC moved it + assert(stream == argv[0]); // Should return the stream + + // Flush + argv[0] = stream; + stream = flush_nif(ctx, 1, argv); // Update stream + assert(stream == argv[0]); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Basic append and flush\n"); +} + +// Test 2: Multiple appends crossing page boundaries +void test_multiple_appends(void) +{ + fprintf(stderr, "\n=== Test: Multiple Appends Crossing Pages ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create stream + term argv[3]; + argv[0] = term_from_int(10); + term stream = new_nif(ctx, 1, argv); + + // Append multiple chunks to cross page boundaries + for (int i = 0; i < 10; i++) { + uint8_t data[100]; + memset(data, 0xA0 + i, sizeof(data)); + argv[0] = stream; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream = append_nif(ctx, 2, argv); + argv[0] = stream; // Update for next iteration + } + + // Flush + argv[0] = stream; + flush_nif(ctx, 1, argv); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Multiple appends crossing pages\n"); +} + +// Test 3: Replace operation +void test_replace(void) +{ + fprintf(stderr, "\n=== Test: Replace Operation ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function replace_nif = get_nif("jit_stream_flash:replace/3"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create stream + term argv[3]; + argv[0] = term_from_int(10); + term stream = new_nif(ctx, 1, argv); + + // Append initial data + uint8_t data[200]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream = append_nif(ctx, 2, argv); // Update stream + + // Replace some bytes in the middle + uint8_t replace_data[] = { 0x11, 0x22, 0x33, 0x44 }; + argv[0] = stream; + argv[1] = term_from_int(50); // offset + argv[2] = make_binary_rooted(ctx, replace_data, sizeof(replace_data), &argv[0], 1); + stream = replace_nif(ctx, 3, argv); // Update stream + + // Flush + argv[0] = stream; + stream = flush_nif(ctx, 1, argv); // Update stream + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Replace operation\n"); +} + +// Test 4: Second module bug scenario - this is the critical test! +void test_second_module_bug(void) +{ + fprintf(stderr, "\n=== Test: Second Module Bug Scenario (THE ACTUAL BUG) ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Simulate first module compilation - fill most of first sector + fprintf(stderr, "Simulating first module compilation...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Write 3.5KB of code (leaves 0.5KB in first sector) + for (int i = 0; i < 35; i++) { + uint8_t data[100]; + memset(data, 0xA0 + (i % 16), sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); // Update stream1 + } + + argv[0] = stream1; + + stream1 = flush_nif(ctx, 1, argv); // Update stream1 + + fprintf(stderr, "First module compiled and flushed\n"); + + // Finalize the first module to mark it as valid and prepare for the second + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1); + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; // Fake code pointer for testing + + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100); + + // Now simulate second module - this should trigger the bug + // The bug was: when creating a new stream, if we're in a new sector + // that hasn't been erased, we need to erase it before writing + fprintf(stderr, "\nSimulating second module compilation...\n"); + argv[0] = term_from_int(50); + term stream2 = new_nif(ctx, 1, argv); + + // Append data - this will cross into next sector + for (int i = 0; i < 20; i++) { + uint8_t data[100]; + memset(data, 0xB0 + (i % 16), sizeof(data)); + argv[0] = stream2; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream2 = append_nif(ctx, 2, argv); // Update stream2 + } + + argv[0] = stream2; + stream2 = flush_nif(ctx, 1, argv); // Update stream2 + + fprintf(stderr, "Second module compiled and flushed successfully!\n"); + + // Finalize the second module + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2); + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; // Fake code pointer for testing + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 50); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Second module bug scenario - bug is FIXED!\n"); +} + +void test_magic_0xffff_but_garbage_bug(void) +{ + fprintf(stderr, "\n=== Test: Magic is 0xFFFF but Sector Has Garbage ===\n"); + + // Simulate ESP32 scenario where first JIT entry is at start of sector + // and magic happens to be 0xFFFF but rest has garbage + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first sector with AVM + + // Set magic to 0xFFFF at start of sector 1, but rest is garbage (0x97) + uint16_t *magic_ptr = (uint16_t *) (mock_flash + 0x1000); + *magic_ptr = 0xFFFF; + // Fill rest of sector with garbage + for (size_t i = 2; i < FLASH_SECTOR_SIZE; i++) { + mock_flash[0x1000 + i] = 0x97; + } + + fprintf(stderr, "Sector 1: magic=0xFFFF at offset 0, but rest has garbage (0x97)\n"); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Compile a small module - should detect garbage and erase + fprintf(stderr, "Compiling module (should detect garbage despite magic=0xFFFF)...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Append some data + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + + argv[0] = stream1; + stream1 = flush_nif(ctx, 1, argv); + fprintf(stderr, "Module compiled successfully!\n"); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Magic 0xFFFF but garbage test\n"); +} + +void test_garbage_flash_bug(void) +{ + fprintf(stderr, "\n=== Test: Garbage Flash Bug - JIT Sectors Not Erased After AVM Flash ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + fprintf(stderr, "Flash state: Sector 0 erased (0xFF), sectors 1+ have garbage (0x00)\n"); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Compile first module that spans two sectors (like benchmark: 8254 bytes) + fprintf(stderr, "Compiling first module spanning sectors 1-2 (8254 bytes)...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Write 82 blocks of 100 bytes = 8200 bytes + 16 byte header = 8216 bytes + for (int i = 0; i < 82; i++) { + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + } + + argv[0] = stream1; + stream1 = flush_nif(ctx, 1, argv); + fprintf(stderr, "First module compiled and flushed\n"); + + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1); + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Garbage flash bug test\n"); +} + +void test_esp32_crash_bug(void) +{ + fprintf(stderr, "\n=== Test: ESP32 Crash Bug - Module Spanning Multiple Sectors ===\n"); + + // Reset flash for this test + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); // first page with AVM + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Simulate first module like ESP32 benchmark: ~8254 bytes + // This will span sectors 0, 1, and part of sector 2 + fprintf(stderr, "First module: writing ~8254 bytes (spans 3 sectors)...\n"); + term argv[3]; + argv[0] = term_from_int(100); + term stream1 = new_nif(ctx, 1, argv); + + // Write 82 blocks of 100 bytes = 8200 bytes + 16 byte header = 8216 bytes + for (int i = 0; i < 82; i++) { + uint8_t data[100]; + memset(data, 0xAA, sizeof(data)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + } + + argv[0] = stream1; + stream1 = flush_nif(ctx, 1, argv); + fprintf(stderr, "First module flushed\n"); + + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1); + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 100); + + // Second module like ESP32 pingpong: ~6690 bytes + // This will start in sector 2 (which already has tail of first module!) + fprintf(stderr, "Second module: writing ~6690 bytes...\n"); + argv[0] = term_from_int(50); + term stream2 = new_nif(ctx, 1, argv); + + // Write 67 blocks of 100 bytes = 6700 bytes + for (int i = 0; i < 67; i++) { + uint8_t data[100]; + memset(data, 0xBB, sizeof(data)); + argv[0] = stream2; + argv[1] = make_binary_rooted(ctx, data, sizeof(data), &argv[0], 1); + stream2 = append_nif(ctx, 2, argv); + } + + argv[0] = stream2; + stream2 = flush_nif(ctx, 1, argv); + fprintf(stderr, "Second module flushed\n"); + + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2); + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 50); + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: ESP32 crash bug test\n"); +} + +// Test for the tail corruption bug: when first module extends into next sector, +// creating the second module should NOT erase the sector containing the first module's tail +static void test_tail_corruption_bug(void) +{ + fprintf(stderr, "\n=== Test: Tail Corruption Bug - Module Tail in Next Sector ===\n"); + + // Initialize flash: sector 0 erased (AVM), rest is garbage + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); + + create_minimal_avmpack(); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create first module that will extend into the next sector + // Module size: 8270 bytes (like benchmark on ESP32) + // Entry header: 16 bytes at 0x0 in sector 0x1000 + // Native code: 8254 bytes, extends from sector 0x1000 into sector 0x2000 + // Module ends at: 0x1000 + 16 + 8254 = 0x304E (in sector 0x2000) + // Next entry would be at: 0x3050 (also in sector 0x2000) + + term argv[3]; + argv[0] = term_from_int(10); + term stream1 = new_nif(ctx, 1, argv); + + // Append 8254 bytes of native code + uint8_t code1[8254]; + memset(code1, 0xAB, sizeof(code1)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, code1, sizeof(code1), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + + argv[0] = stream1; + term stream1_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1_flushed); + + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 30); + + // Compute CRC of first module for verification + uintptr_t data_addr1 = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry1); + struct JITEntry *jit_entry1 = (struct JITEntry *) (data_addr1 - sizeof(struct JITEntry)); + uint32_t crc1_after_finalize = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module: entry=%p size=%u CRC=0x%08x\n", + (void *) jit_entry1, (unsigned int) jit_entry1->size, (unsigned int) crc1_after_finalize); + + // Verify first module extends into sector 0x2000 + uintptr_t entry1_addr = (uintptr_t) jit_entry1; + uintptr_t entry1_end = entry1_addr + sizeof(struct JITEntry) + jit_entry1->size; + uintptr_t entry1_sector = entry1_addr & ~(FLASH_SECTOR_SIZE - 1); + uintptr_t entry1_end_sector = entry1_end & ~(FLASH_SECTOR_SIZE - 1); + fprintf(stderr, "First module: starts in sector 0x%lx, ends at 0x%lx (sector 0x%lx)\n", + (unsigned long) entry1_sector, (unsigned long) entry1_end, + (unsigned long) entry1_end_sector); + + if (entry1_sector == entry1_end_sector) { + fprintf(stderr, "FAIL: Test setup error - first module should span sectors\n"); + exit(1); + } + + // Create second module - THIS SHOULD NOT CORRUPT THE FIRST MODULE + argv[0] = term_from_int(10); + term stream2 = new_nif(ctx, 1, argv); + + uint8_t code2[100]; + memset(code2, 0xCD, sizeof(code2)); + argv[0] = stream2; + argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1); + stream2 = append_nif(ctx, 2, argv); + + argv[0] = stream2; + term stream2_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2_flushed); + + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 20); + + // Verify first module's CRC is still intact + uint32_t crc1_after_second = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module after second: CRC=0x%08x (expected 0x%08x)\n", + (unsigned int) crc1_after_second, (unsigned int) crc1_after_finalize); + + if (crc1_after_second != crc1_after_finalize) { + fprintf(stderr, "FAIL: First module corrupted after creating second module!\n"); + fprintf(stderr, "Expected CRC: 0x%08x, Got: 0x%08x\n", + (unsigned int) crc1_after_finalize, (unsigned int) crc1_after_second); + exit(1); + } + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Tail corruption bug test\n"); +} + +// Test 9: Stale data cleanup after failed compilation +static void test_stale_data_cleanup(void) +{ + fprintf(stderr, "\n=== Test: Stale Data Cleanup After Failed Compilation ===\n"); + + // Initialize flash: sector 0 erased (AVM), rest is garbage + memset(mock_flash, 0x00, MOCK_FLASH_SIZE); + memset(&mock_flash[0], 0xFF, FLASH_SECTOR_SIZE); + + create_minimal_avmpack(); + + GlobalContext *glb = globalcontext_new(); + Context *ctx = context_new(glb); + + register_test_avmpack(glb); + jit_stream_flash_init(glb); + + nif_function new_nif = get_nif("jit_stream_flash:new/1"); + nif_function append_nif = get_nif("jit_stream_flash:append/2"); + nif_function flush_nif = get_nif("jit_stream_flash:flush/1"); + + // Create first module and finalize it (small, stays in first sector after AVM) + term argv[3]; + argv[0] = term_from_int(10); + term stream1 = new_nif(ctx, 1, argv); + + uint8_t code1[500]; + memset(code1, 0xAA, sizeof(code1)); + argv[0] = stream1; + argv[1] = make_binary_rooted(ctx, code1, sizeof(code1), &argv[0], 1); + stream1 = append_nif(ctx, 2, argv); + + argv[0] = stream1; + term stream1_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry1 = jit_stream_flash_entry_point(ctx, stream1_flushed); + + Module fake_mod1; + fake_mod1.code = (CodeChunk *) 0x12345678; + globalcontext_set_cache_native_code(glb, &fake_mod1, 1, entry1, 30); + + // Compute CRC of first module + uintptr_t data_addr1 = jit_stream_flash_platform_executable_to_ptr((uintptr_t) entry1); + struct JITEntry *jit_entry1 = (struct JITEntry *) (data_addr1 - sizeof(struct JITEntry)); + uint32_t crc1_original = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module: CRC=0x%08x, size=%u bytes\n", + (unsigned int) crc1_original, (unsigned int) jit_entry1->size); + + // Start creating a second module but DON'T finalize (simulate crash/OOM) + argv[0] = term_from_int(10); + term stream2_attempt1 = new_nif(ctx, 1, argv); + + uint8_t code2[200]; + memset(code2, 0xBB, sizeof(code2)); + argv[0] = stream2_attempt1; + argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1); + stream2_attempt1 = append_nif(ctx, 2, argv); + + // DON'T flush or finalize - this simulates a failed compilation + // Now there's stale data in flash after the first module + + fprintf(stderr, "Simulated failed compilation - stale data left in flash\n"); + + // Try to create the second module again - should detect and clean up stale data + argv[0] = term_from_int(10); + term stream2_attempt2 = new_nif(ctx, 1, argv); + + memset(code2, 0xCC, sizeof(code2)); + argv[0] = stream2_attempt2; + argv[1] = make_binary_rooted(ctx, code2, sizeof(code2), &argv[0], 1); + stream2_attempt2 = append_nif(ctx, 2, argv); + + argv[0] = stream2_attempt2; + term stream2_flushed = flush_nif(ctx, 1, argv); + ModuleNativeEntryPoint entry2 = jit_stream_flash_entry_point(ctx, stream2_flushed); + + Module fake_mod2; + fake_mod2.code = (CodeChunk *) 0x87654321; + globalcontext_set_cache_native_code(glb, &fake_mod2, 1, entry2, 20); + + fprintf(stderr, "Second module successfully created after cleanup\n"); + + // Verify first module's CRC is still intact + uint32_t crc1_after_cleanup = crc32((const uint8_t *) jit_entry1, sizeof(struct JITEntry) + jit_entry1->size); + fprintf(stderr, "First module after cleanup: CRC=0x%08x (expected 0x%08x)\n", + (unsigned int) crc1_after_cleanup, (unsigned int) crc1_original); + + if (crc1_after_cleanup != crc1_original) { + fprintf(stderr, "FAIL: First module corrupted during stale data cleanup!\n"); + exit(1); + } + + scheduler_terminate(ctx); + globalcontext_destroy(glb); + + fprintf(stderr, "PASS: Stale data cleanup test\n"); +} + +int main(int argc, char **argv) +{ + UNUSED(argc); + UNUSED(argv); + + fprintf(stderr, "Starting jit_stream_flash tests...\n"); + + test_basic_append_flush(); + test_multiple_appends(); + test_replace(); + test_second_module_bug(); + test_magic_0xffff_but_garbage_bug(); + test_garbage_flash_bug(); + test_esp32_crash_bug(); + test_tail_corruption_bug(); + test_stale_data_cleanup(); + + fprintf(stderr, "\nAll tests passed!\n"); + return EXIT_SUCCESS; +} From c9b2bfaff19634075b34aa97b26f85cf579354ef Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 4 Oct 2025 22:42:54 +0200 Subject: [PATCH 08/28] riscv32: initial commit of asm module Signed-off-by: Paul Guyot --- libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_riscv32_asm.erl | 1000 ++++++++++++++++++++++ tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_riscv32_asm_tests.erl | 553 ++++++++++++ tests/libs/jit/jit_tests_common.erl | 7 +- tests/libs/jit/tests.erl | 1 + 6 files changed, 1562 insertions(+), 1 deletion(-) create mode 100644 libs/jit/src/jit_riscv32_asm.erl create mode 100644 tests/libs/jit/jit_riscv32_asm_tests.erl diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 7aad016575..586223b4bc 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -31,6 +31,7 @@ set(ERLANG_MODULES jit_aarch64_asm jit_armv6m jit_armv6m_asm + jit_riscv32_asm jit_x86_64 jit_x86_64_asm ) diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl new file mode 100644 index 0000000000..64d42c10ad --- /dev/null +++ b/libs/jit/src/jit_riscv32_asm.erl @@ -0,0 +1,1000 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm). + +-export([ + % R-type arithmetic and logical instructions + add/3, + sub/3, + and_/3, + or_/2, + or_/3, + xor_/3, + sll/3, + srl/3, + sra/3, + slt/3, + sltu/3, + % I-type immediate instructions + addi/3, + andi/3, + ori/3, + xori/3, + slli/3, + srli/3, + srai/3, + slti/3, + sltiu/3, + % Load instructions + lw/2, + lw/3, + lh/2, + lh/3, + lhu/2, + lhu/3, + lb/2, + lb/3, + lbu/2, + lbu/3, + % Store instructions + sw/2, + sw/3, + sh/2, + sh/3, + sb/2, + sb/3, + % Branch instructions + beq/3, + bne/3, + blt/3, + bge/3, + bltu/3, + bgeu/3, + % Jump instructions + jal/2, + jalr/3, + jalr/2, + % Upper immediate instructions + lui/2, + auipc/2, + % Pseudo-instructions + nop/0, + li/2, + mv/2, + not_/2, + neg/2, + j/1, + jr/1, + ret/0, + call/2, + % M extension (multiply/divide) + mul/3, + % System instructions + bkpt/1, + ebreak/0 +]). + +-export_type([ + riscv_register/0 +]). + +%% RISC-V 32-bit (RV32I) Assembler +%% +%% This module provides an assembler for the RISC-V 32-bit instruction set. +%% It generates binary machine code for RISC-V instructions following the +%% RV32I base integer instruction set architecture. +%% +%% RISC-V Register Set (32 registers): +%% x0 (zero) - Hardwired zero (reads as 0, writes ignored) +%% x1 (ra) - Return address +%% x2 (sp) - Stack pointer +%% x3 (gp) - Global pointer +%% x4 (tp) - Thread pointer +%% x5 (t0) - Temporary register 0 +%% x6 (t1) - Temporary register 1 +%% x7 (t2) - Temporary register 2 +%% x8 (s0/fp)- Saved register 0 / Frame pointer +%% x9 (s1) - Saved register 1 +%% x10 (a0) - Function argument 0 / Return value 0 +%% x11 (a1) - Function argument 1 / Return value 1 +%% x12 (a2) - Function argument 2 +%% x13 (a3) - Function argument 3 +%% x14 (a4) - Function argument 4 +%% x15 (a5) - Function argument 5 +%% x16 (a6) - Function argument 6 +%% x17 (a7) - Function argument 7 +%% x18 (s2) - Saved register 2 +%% x19 (s3) - Saved register 3 +%% x20 (s4) - Saved register 4 +%% x21 (s5) - Saved register 5 +%% x22 (s6) - Saved register 6 +%% x23 (s7) - Saved register 7 +%% x24 (s8) - Saved register 8 +%% x25 (s9) - Saved register 9 +%% x26 (s10) - Saved register 10 +%% x27 (s11) - Saved register 11 +%% x28 (t3) - Temporary register 3 +%% x29 (t4) - Temporary register 4 +%% x30 (t5) - Temporary register 5 +%% x31 (t6) - Temporary register 6 +%% +%% RISC-V Calling Convention (ILP32): +%% - Arguments: a0-a7 (x10-x17) +%% - Return values: a0-a1 (x10-x11) +%% - Caller-saved: t0-t6, a0-a7 +%% - Callee-saved: s0-s11, sp, ra +%% - Stack grows downward +%% - Stack must be 16-byte aligned at function call boundaries +%% +%% Instruction Encoding: +%% All RV32I instructions are 32 bits (4 bytes). +%% Bit ordering is little-endian within each 32-bit word. +%% +%% See: RISC-V Instruction Set Manual, Volume I: User-Level ISA +%% https://riscv.org/technical/specifications/ +%% https://github.com/riscv/riscv-isa-manual/ + +-type riscv_register() :: + zero + | ra + | sp + | gp + | tp + | t0 + | t1 + | t2 + | s0 + | fp + | s1 + | a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | t3 + | t4 + | t5 + | t6. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers (0-31) +-spec reg_to_num(riscv_register()) -> 0..31. +% ABI names +reg_to_num(zero) -> 0; +reg_to_num(ra) -> 1; +reg_to_num(sp) -> 2; +reg_to_num(gp) -> 3; +reg_to_num(tp) -> 4; +reg_to_num(t0) -> 5; +reg_to_num(t1) -> 6; +reg_to_num(t2) -> 7; +reg_to_num(s0) -> 8; +reg_to_num(fp) -> 8; +reg_to_num(s1) -> 9; +reg_to_num(a0) -> 10; +reg_to_num(a1) -> 11; +reg_to_num(a2) -> 12; +reg_to_num(a3) -> 13; +reg_to_num(a4) -> 14; +reg_to_num(a5) -> 15; +reg_to_num(a6) -> 16; +reg_to_num(a7) -> 17; +reg_to_num(s2) -> 18; +reg_to_num(s3) -> 19; +reg_to_num(s4) -> 20; +reg_to_num(s5) -> 21; +reg_to_num(s6) -> 22; +reg_to_num(s7) -> 23; +reg_to_num(s8) -> 24; +reg_to_num(s9) -> 25; +reg_to_num(s10) -> 26; +reg_to_num(s11) -> 27; +reg_to_num(t3) -> 28; +reg_to_num(t4) -> 29; +reg_to_num(t5) -> 30; +reg_to_num(t6) -> 31. + +%%----------------------------------------------------------------------------- +%% R-type instruction encoding +%%----------------------------------------------------------------------------- + +%% R-type instruction format: +%% funct7 (7) | rs2 (5) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_r_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Funct7 :: 0..127 +) -> binary(). +encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct7 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADD - Add +%% rd = rs1 + rs2 +-spec add(riscv_register(), riscv_register(), riscv_register()) -> binary(). +add(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00). + +%% SUB - Subtract +%% rd = rs1 - rs2 +-spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sub(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20). + +%% AND - Bitwise AND +%% rd = rs1 & rs2 +-spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +and_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR +%% rd = rs1 | rs2 +-spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR (in-place) +%% rd = rd | rs +-spec or_(riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs) -> + or_(Rd, Rd, Rs). + +%% XOR - Bitwise XOR +%% rd = rs1 ^ rs2 +-spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +xor_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00). + +%% SLL - Shift Left Logical +%% rd = rs1 << rs2[4:0] +-spec sll(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sll(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 001, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#1, Rs1, Rs2, 16#00). + +%% SRL - Shift Right Logical +%% rd = rs1 >> rs2[4:0] (zero-extend) +-spec srl(riscv_register(), riscv_register(), riscv_register()) -> binary(). +srl(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#00). + +%% SRA - Shift Right Arithmetic +%% rd = rs1 >> rs2[4:0] (sign-extend) +-spec sra(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sra(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#20). + +%% SLT - Set Less Than +%% rd = (rs1 < rs2) ? 1 : 0 (signed) +-spec slt(riscv_register(), riscv_register(), riscv_register()) -> binary(). +slt(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 010, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#2, Rs1, Rs2, 16#00). + +%% SLTU - Set Less Than Unsigned +%% rd = (rs1 < rs2) ? 1 : 0 (unsigned) +-spec sltu(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sltu(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 011, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#3, Rs1, Rs2, 16#00). + +%%----------------------------------------------------------------------------- +%% I-type instruction encoding +%%----------------------------------------------------------------------------- + +%% I-type instruction format: +%% imm[11:0] (12) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-20 19-15 14-12 11-7 6-0 + +-spec encode_i_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + % Sign-extend and mask to 12 bits + ImmMasked = Imm band 16#FFF, + Instr = + (ImmMasked bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% I-type immediate arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADDI - Add Immediate +%% rd = rs1 + imm +-spec addi(riscv_register(), riscv_register(), integer()) -> binary(). +addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 000 + encode_i_type(16#13, Rd, 16#0, Rs1, Imm); +addi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ANDI - AND Immediate +%% rd = rs1 & imm +-spec andi(riscv_register(), riscv_register(), integer()) -> binary(). +andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 111 + encode_i_type(16#13, Rd, 16#7, Rs1, Imm); +andi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ORI - OR Immediate +%% rd = rs1 | imm +-spec ori(riscv_register(), riscv_register(), integer()) -> binary(). +ori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 110 + encode_i_type(16#13, Rd, 16#6, Rs1, Imm); +ori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% XORI - XOR Immediate +%% rd = rs1 ^ imm +-spec xori(riscv_register(), riscv_register(), integer()) -> binary(). +xori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 100 + encode_i_type(16#13, Rd, 16#4, Rs1, Imm); +xori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTI - Set Less Than Immediate +%% rd = (rs1 < imm) ? 1 : 0 (signed) +-spec slti(riscv_register(), riscv_register(), integer()) -> binary(). +slti(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 010 + encode_i_type(16#13, Rd, 16#2, Rs1, Imm); +slti(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTIU - Set Less Than Immediate Unsigned +%% rd = (rs1 < imm) ? 1 : 0 (unsigned) +-spec sltiu(riscv_register(), riscv_register(), integer()) -> binary(). +sltiu(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 011 + encode_i_type(16#13, Rd, 16#3, Rs1, Imm); +sltiu(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% I-type immediate shift instructions +%%----------------------------------------------------------------------------- + +%% SLLI - Shift Left Logical Immediate +%% rd = rs1 << shamt +-spec slli(riscv_register(), riscv_register(), 0..31) -> binary(). +slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#1, Rs1, Shamt); +slli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRLI - Shift Right Logical Immediate +%% rd = rs1 >> shamt (zero-extend) +-spec srli(riscv_register(), riscv_register(), 0..31) -> binary(). +srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#5, Rs1, Shamt); +srli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRAI - Shift Right Arithmetic Immediate +%% rd = rs1 >> shamt (sign-extend) +-spec srai(riscv_register(), riscv_register(), 0..31) -> binary(). +srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000 + % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30); +srai(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%%----------------------------------------------------------------------------- +%% Load instructions (I-type) +%%----------------------------------------------------------------------------- + +%% LW - Load Word +%% rd = mem[rs1 + offset] (32-bit) +-spec lw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lw(Rd, {Rs1, Offset}) -> + lw(Rd, Rs1, Offset); +lw(Rd, Rs1) when is_atom(Rs1) -> + lw(Rd, Rs1, 0). + +-spec lw(riscv_register(), riscv_register(), integer()) -> binary(). +lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 010 + encode_i_type(16#03, Rd, 16#2, Rs1, Offset); +lw(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LH - Load Halfword (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][15:0]) +-spec lh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lh(Rd, {Rs1, Offset}) -> + lh(Rd, Rs1, Offset); +lh(Rd, Rs1) when is_atom(Rs1) -> + lh(Rd, Rs1, 0). + +-spec lh(riscv_register(), riscv_register(), integer()) -> binary(). +lh(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 001 + encode_i_type(16#03, Rd, 16#1, Rs1, Offset); +lh(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LHU - Load Halfword Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][15:0]) +-spec lhu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lhu(Rd, {Rs1, Offset}) -> + lhu(Rd, Rs1, Offset); +lhu(Rd, Rs1) when is_atom(Rs1) -> + lhu(Rd, Rs1, 0). + +-spec lhu(riscv_register(), riscv_register(), integer()) -> binary(). +lhu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 101 + encode_i_type(16#03, Rd, 16#5, Rs1, Offset); +lhu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LB - Load Byte (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][7:0]) +-spec lb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lb(Rd, {Rs1, Offset}) -> + lb(Rd, Rs1, Offset); +lb(Rd, Rs1) when is_atom(Rs1) -> + lb(Rd, Rs1, 0). + +-spec lb(riscv_register(), riscv_register(), integer()) -> binary(). +lb(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 000 + encode_i_type(16#03, Rd, 16#0, Rs1, Offset); +lb(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LBU - Load Byte Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][7:0]) +-spec lbu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lbu(Rd, {Rs1, Offset}) -> + lbu(Rd, Rs1, Offset); +lbu(Rd, Rs1) when is_atom(Rs1) -> + lbu(Rd, Rs1, 0). + +-spec lbu(riscv_register(), riscv_register(), integer()) -> binary(). +lbu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 100 + encode_i_type(16#03, Rd, 16#4, Rs1, Offset); +lbu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% S-type instruction encoding (for stores) +%%----------------------------------------------------------------------------- + +%% S-type instruction format: +%% imm[11:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:0] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_s_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_s_type(Opcode, Funct3, Rs1, Rs2, Imm) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Split immediate: imm[11:5] goes to bits 31-25, imm[4:0] goes to bits 11-7 + ImmMasked = Imm band 16#FFF, + Imm11_5 = (ImmMasked bsr 5) band 16#7F, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Imm11_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_0 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Store instructions (S-type) +%%----------------------------------------------------------------------------- + +%% SW - Store Word +%% mem[rs1 + offset] = rs2[31:0] +-spec sw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sw(Rs2, {Rs1, Offset}) -> + sw(Rs1, Rs2, Offset); +sw(Rs2, Rs1) when is_atom(Rs1) -> + sw(Rs1, Rs2, 0). + +-spec sw(riscv_register(), riscv_register(), integer()) -> binary(). +sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 010 + encode_s_type(16#23, 16#2, Rs1, Rs2, Offset); +sw(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SH - Store Halfword +%% mem[rs1 + offset][15:0] = rs2[15:0] +-spec sh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sh(Rs2, {Rs1, Offset}) -> + sh(Rs1, Rs2, Offset); +sh(Rs2, Rs1) when is_atom(Rs1) -> + sh(Rs1, Rs2, 0). + +-spec sh(riscv_register(), riscv_register(), integer()) -> binary(). +sh(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 001 + encode_s_type(16#23, 16#1, Rs1, Rs2, Offset); +sh(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SB - Store Byte +%% mem[rs1 + offset][7:0] = rs2[7:0] +-spec sb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sb(Rs2, {Rs1, Offset}) -> + sb(Rs1, Rs2, Offset); +sb(Rs2, Rs1) when is_atom(Rs1) -> + sb(Rs1, Rs2, 0). + +-spec sb(riscv_register(), riscv_register(), integer()) -> binary(). +sb(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 000 + encode_s_type(16#23, 16#0, Rs1, Rs2, Offset); +sb(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% B-type instruction encoding (for branches) +%%----------------------------------------------------------------------------- + +%% B-type instruction format: +%% imm[12|10:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:1|11] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 +%% +%% The immediate is split across the instruction and represents a signed offset +%% in multiples of 2 bytes (must be 2-byte aligned). +%% Range: ±4 KiB (±4096 bytes) + +-spec encode_b_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Offset :: integer() +) -> binary(). +encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Offset must be 2-byte aligned and in range [-4096, 4094] + % Extract bits: imm[12], imm[10:5], imm[4:1], imm[11] + OffsetMasked = Offset band 16#1FFF, + % imm[12] -> bit 31 + Imm12 = (OffsetMasked bsr 12) band 1, + % imm[10:5] -> bits 30-25 + Imm10_5 = (OffsetMasked bsr 5) band 16#3F, + % imm[4:1] -> bits 11-8 + Imm4_1 = (OffsetMasked bsr 1) band 16#F, + % imm[11] -> bit 7 + Imm11 = (OffsetMasked bsr 11) band 1, + Instr = + (Imm12 bsl 31) bor + (Imm10_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_1 bsl 8) bor + (Imm11 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Branch instructions (B-type) +%%----------------------------------------------------------------------------- + +%% BEQ - Branch if Equal +%% if (rs1 == rs2) pc += offset +-spec beq(riscv_register(), riscv_register(), integer()) -> binary(). +beq(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 000 + encode_b_type(16#63, 16#0, Rs1, Rs2, Offset); +beq(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +beq(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BNE - Branch if Not Equal +%% if (rs1 != rs2) pc += offset +-spec bne(riscv_register(), riscv_register(), integer()) -> binary(). +bne(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 001 + encode_b_type(16#63, 16#1, Rs1, Rs2, Offset); +bne(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bne(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLT - Branch if Less Than (signed) +%% if (rs1 < rs2) pc += offset +-spec blt(riscv_register(), riscv_register(), integer()) -> binary(). +blt(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 100 + encode_b_type(16#63, 16#4, Rs1, Rs2, Offset); +blt(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +blt(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGE - Branch if Greater or Equal (signed) +%% if (rs1 >= rs2) pc += offset +-spec bge(riscv_register(), riscv_register(), integer()) -> binary(). +bge(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 101 + encode_b_type(16#63, 16#5, Rs1, Rs2, Offset); +bge(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bge(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLTU - Branch if Less Than Unsigned +%% if (rs1 < rs2) pc += offset (unsigned) +-spec bltu(riscv_register(), riscv_register(), integer()) -> binary(). +bltu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 110 + encode_b_type(16#63, 16#6, Rs1, Rs2, Offset); +bltu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bltu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGEU - Branch if Greater or Equal Unsigned +%% if (rs1 >= rs2) pc += offset (unsigned) +-spec bgeu(riscv_register(), riscv_register(), integer()) -> binary(). +bgeu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 111 + encode_b_type(16#63, 16#7, Rs1, Rs2, Offset); +bgeu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bgeu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%%----------------------------------------------------------------------------- +%% J-type instruction encoding (for JAL) +%%----------------------------------------------------------------------------- + +%% J-type instruction format (JAL): +%% imm[20|10:1|11|19:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 +%% +%% The immediate represents a signed offset in multiples of 2 bytes. +%% Range: ±1 MiB (±1048576 bytes) + +-spec encode_j_type( + Opcode :: 0..127, Rd :: riscv_register(), Offset :: integer() +) -> binary(). +encode_j_type(Opcode, Rd, Offset) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits: imm[20], imm[10:1], imm[11], imm[19:12] + OffsetMasked = Offset band 16#1FFFFF, + % imm[20] -> bit 31 + Imm20 = (OffsetMasked bsr 20) band 1, + % imm[10:1] -> bits 30-21 + Imm10_1 = (OffsetMasked bsr 1) band 16#3FF, + % imm[11] -> bit 20 + Imm11 = (OffsetMasked bsr 11) band 1, + % imm[19:12] -> bits 19-12 + Imm19_12 = (OffsetMasked bsr 12) band 16#FF, + Instr = + (Imm20 bsl 31) bor + (Imm10_1 bsl 21) bor + (Imm11 bsl 20) bor + (Imm19_12 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% U-type instruction encoding (for LUI, AUIPC) +%%----------------------------------------------------------------------------- + +%% U-type instruction format: +%% imm[31:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 + +-spec encode_u_type( + Opcode :: 0..127, Rd :: riscv_register(), Imm :: integer() +) -> binary(). +encode_u_type(Opcode, Rd, Imm) -> + RdNum = reg_to_num(Rd), + % Upper 20 bits of immediate + ImmUpper = (Imm bsr 12) band 16#FFFFF, + Instr = (ImmUpper bsl 12) bor (RdNum bsl 7) bor Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Jump and link instructions +%%----------------------------------------------------------------------------- + +%% JAL - Jump and Link +%% rd = pc + 4; pc += offset +-spec jal(riscv_register(), integer()) -> binary(). +jal(Rd, Offset) when + Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0 +-> + % Opcode: 1101111 (0x6F) + encode_j_type(16#6F, Rd, Offset); +jal(_Rd, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +jal(_Rd, Offset) -> + error({offset_out_of_range, Offset, -1048576, 1048574}). + +%% JALR - Jump and Link Register +%% rd = pc + 4; pc = (rs1 + offset) & ~1 +-spec jalr(riscv_register(), riscv_register(), integer()) -> binary(). +jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 1100111 (0x67), Funct3: 000 + encode_i_type(16#67, Rd, 16#0, Rs1, Offset); +jalr(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% JALR - Jump and Link Register (no offset) +%% rd = pc + 4; pc = rs1 & ~1 +-spec jalr(riscv_register(), riscv_register()) -> binary(). +jalr(Rd, Rs1) -> + jalr(Rd, Rs1, 0). + +%%----------------------------------------------------------------------------- +%% Upper immediate instructions +%%----------------------------------------------------------------------------- + +%% LUI - Load Upper Immediate +%% rd = imm << 12 +-spec lui(riscv_register(), integer()) -> binary(). +lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0110111 (0x37) + encode_u_type(16#37, Rd, Imm bsl 12); +lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%% AUIPC - Add Upper Immediate to PC +%% rd = pc + (imm << 12) +-spec auipc(riscv_register(), integer()) -> binary(). +auipc(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0010111 (0x17) + encode_u_type(16#17, Rd, Imm bsl 12); +auipc(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%%----------------------------------------------------------------------------- +%% Pseudo-instructions +%%----------------------------------------------------------------------------- +%% These are convenience instructions that map to actual RV32I instructions + +%% NOP - No Operation +%% Expands to: addi x0, x0, 0 +-spec nop() -> binary(). +nop() -> + addi(zero, zero, 0). + +%% LI - Load Immediate +%% Load a 32-bit immediate value into a register +%% For small immediates (-2048 to 2047): addi rd, x0, imm +%% For larger immediates: lui + addi sequence +-spec li(riscv_register(), integer()) -> binary(). +li(Rd, Imm) when Imm >= -2048, Imm =< 2047 -> + % Small immediate: addi rd, x0, imm + addi(Rd, zero, Imm); +li(Rd, Imm) when Imm >= -16#80000000, Imm =< 16#7FFFFFFF -> + % Large immediate: lui + addi + % Split into upper 20 bits and lower 12 bits + % Need to account for sign extension of lower 12 bits + Lower = Imm band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + UpperRaw = + if + Lower >= 16#800 -> + (Imm bsr 12) + 1; + true -> + Imm bsr 12 + end, + % Mask to 20 bits first, then sign extend if needed + UpperMasked = UpperRaw band 16#FFFFF, + Upper = + if + UpperMasked band 16#80000 =/= 0 -> + % Bit 19 is set, so this is negative in 20-bit representation + % Sign extend from 20 bits + UpperMasked - 16#100000; + true -> + % Positive value + UpperMasked + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + LuiInstr = lui(Rd, Upper), + AddiInstr = addi(Rd, Rd, LowerSigned), + <>; +li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}). + +%% MV - Move (copy register) +%% Expands to: addi rd, rs, 0 +-spec mv(riscv_register(), riscv_register()) -> binary(). +mv(Rd, Rs) -> + addi(Rd, Rs, 0). + +%% NOT - Bitwise NOT +%% Expands to: xori rd, rs, -1 +-spec not_(riscv_register(), riscv_register()) -> binary(). +not_(Rd, Rs) -> + xori(Rd, Rs, -1). + +%% NEG - Negate (two's complement) +%% Expands to: sub rd, x0, rs +-spec neg(riscv_register(), riscv_register()) -> binary(). +neg(Rd, Rs) -> + sub(Rd, zero, Rs). + +%% J - Unconditional Jump +%% Expands to: jal x0, offset +-spec j(integer()) -> binary(). +j(Offset) -> + jal(zero, Offset). + +%% JR - Jump Register +%% Expands to: jalr x0, rs, 0 +-spec jr(riscv_register()) -> binary(). +jr(Rs) -> + jalr(zero, Rs, 0). + +%% RET - Return from subroutine +%% Expands to: jalr x0, ra, 0 +-spec ret() -> binary(). +ret() -> + jalr(zero, ra, 0). + +%% CALL - Call function (far call using AUIPC + JALR) +%% This is a two-instruction sequence for calling functions beyond JAL range +%% Expands to: auipc ra, offset[31:12]; jalr ra, ra, offset[11:0] +-spec call(riscv_register(), integer()) -> binary(). +call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF -> + % Split offset into upper 20 bits and lower 12 bits + Lower = Offset band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + Upper = + if + Lower >= 16#800 -> + ((Offset bsr 12) + 1) band 16#FFFFF; + true -> + (Offset bsr 12) band 16#FFFFF + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + AuipcInstr = auipc(Rd, Upper), + JalrInstr = jalr(ra, Rd, LowerSigned), + <>; +call(_Rd, Offset) -> + error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}). + +%% EBREAK - Environment Breakpoint +%% Causes a breakpoint exception to be raised. +%% This is the RISC-V equivalent of ARM's BKPT instruction. +%% Encoding: 0x00100073 +-spec ebreak() -> binary(). +ebreak() -> + <<16#73, 16#00, 16#10, 16#00>>. + +%% BKPT - Breakpoint (for ARM compatibility) +%% In RISC-V, this is implemented as EBREAK. +%% The immediate parameter is ignored for compatibility with ARM. +-spec bkpt(integer()) -> binary(). +bkpt(_Imm) -> + ebreak(). + +%% MUL - Multiply (RV32M extension) +%% Multiplies rs1 by rs2 and places the lower 32 bits in rd +%% Format: mul rd, rs1, rs2 +%% Encoding: R-type with opcode=0x33, funct3=0x0, funct7=0x01 +-spec mul(riscv_register(), riscv_register(), riscv_register()) -> binary(). +mul(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01). diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 26ab6b4ecc..9bc1c8c78b 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -30,6 +30,7 @@ set(ERLANG_MODULES jit_aarch64_asm_tests jit_armv6m_tests jit_armv6m_asm_tests + jit_riscv32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl new file mode 100644 index 0000000000..94e4942db5 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_asm_tests.erl @@ -0,0 +1,553 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(riscv32, Bin, Str), Value) +). + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instruction tests +%%----------------------------------------------------------------------------- + +add_test_() -> + [ + ?_assertAsmEqual( + <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00a585b3:32/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#01e787b3:32/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) + ) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual( + <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#40a585b3:32/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5) + ) + ]. + +and_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5f5b3:32/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) + ) + ]. + +or_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5e5b3:32/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) + ) + ]. + +xor_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5c5b3:32/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) + ) + ]. + +sll_test_() -> + [ + ?_assertAsmEqual( + <<16#00629533:32/little>>, "sll a0, t0, t1", jit_riscv32_asm:sll(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c59633:32/little>>, "sll a2, a1, a2", jit_riscv32_asm:sll(a2, a1, a2) + ) + ]. + +srl_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d533:32/little>>, "srl a0, t0, t1", jit_riscv32_asm:srl(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5d633:32/little>>, "srl a2, a1, a2", jit_riscv32_asm:srl(a2, a1, a2) + ) + ]. + +sra_test_() -> + [ + ?_assertAsmEqual( + <<16#4062d533:32/little>>, "sra a0, t0, t1", jit_riscv32_asm:sra(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#40c5d633:32/little>>, "sra a2, a1, a2", jit_riscv32_asm:sra(a2, a1, a2) + ) + ]. + +slt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062a533:32/little>>, "slt a0, t0, t1", jit_riscv32_asm:slt(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5a633:32/little>>, "slt a2, a1, a2", jit_riscv32_asm:slt(a2, a1, a2) + ) + ]. + +sltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062b533:32/little>>, "sltu a0, t0, t1", jit_riscv32_asm:sltu(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5b633:32/little>>, "sltu a2, a1, a2", jit_riscv32_asm:sltu(a2, a1, a2) + ) + ]. + +%%----------------------------------------------------------------------------- +%% I-type immediate instruction tests +%%----------------------------------------------------------------------------- + +addi_test_() -> + [ + ?_assertAsmEqual( + <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#fff58593:32/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) + ), + ?_assertAsmEqual( + <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047) + ), + ?_assertAsmEqual( + <<16#80000593:32/little>>, "addi a1, zero, -2048", jit_riscv32_asm:addi(a1, zero, -2048) + ) + ]. + +andi_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#00f5f593:32/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) + ) + ]. + +ori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2e513:32/little>>, "ori a0, t0, 255", jit_riscv32_asm:ori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#00f5e593:32/little>>, "ori a1, a1, 15", jit_riscv32_asm:ori(a1, a1, 15) + ) + ]. + +xori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2c513:32/little>>, "xori a0, t0, 255", jit_riscv32_asm:xori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#fff5c593:32/little>>, "xori a1, a1, -1", jit_riscv32_asm:xori(a1, a1, -1) + ) + ]. + +slli_test_() -> + [ + ?_assertAsmEqual( + <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#01f59593:32/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) + ), + ?_assertAsmEqual( + <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0) + ) + ]. + +srli_test_() -> + [ + ?_assertAsmEqual( + <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#01f5d593:32/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) + ) + ]. + +srai_test_() -> + [ + ?_assertAsmEqual( + <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#41f5d593:32/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) + ) + ]. + +slti_test_() -> + [ + ?_assertAsmEqual( + <<16#0142a513:32/little>>, "slti a0, t0, 20", jit_riscv32_asm:slti(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#fff5a593:32/little>>, "slti a1, a1, -1", jit_riscv32_asm:slti(a1, a1, -1) + ) + ]. + +sltiu_test_() -> + [ + ?_assertAsmEqual( + <<16#0142b513:32/little>>, "sltiu a0, t0, 20", jit_riscv32_asm:sltiu(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#00153513:32/little>>, "sltiu a0, a0, 1", jit_riscv32_asm:sltiu(a0, a0, 1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Load instruction tests +%%----------------------------------------------------------------------------- + +lw_test_() -> + [ + ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), + ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), + ?_assertAsmEqual(<<16#00452583:32/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), + ?_assertAsmEqual( + <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4) + ), + ?_assertAsmEqual( + <<16#7ff52503:32/little>>, "lw a0, 2047(a0)", jit_riscv32_asm:lw(a0, a0, 2047) + ) + ]. + +lh_test_() -> + [ + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0, 0)), + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0)), + ?_assertAsmEqual(<<16#00251583:32/little>>, "lh a1, 2(a0)", jit_riscv32_asm:lh(a1, a0, 2)) + ]. + +lhu_test_() -> + [ + ?_assertAsmEqual( + <<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0)), + ?_assertAsmEqual(<<16#00255583:32/little>>, "lhu a1, 2(a0)", jit_riscv32_asm:lhu(a1, a0, 2)) + ]. + +lb_test_() -> + [ + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0, 0)), + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0)), + ?_assertAsmEqual(<<16#00150583:32/little>>, "lb a1, 1(a0)", jit_riscv32_asm:lb(a1, a0, 1)) + ]. + +lbu_test_() -> + [ + ?_assertAsmEqual( + <<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0)), + ?_assertAsmEqual(<<16#00154583:32/little>>, "lbu a1, 1(a0)", jit_riscv32_asm:lbu(a1, a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Store instruction tests +%%----------------------------------------------------------------------------- + +sw_test_() -> + [ + ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), + ?_assertAsmEqual(<<16#00b52223:32/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), + ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4)) + ]. + +sh_test_() -> + [ + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a1, a0)), + ?_assertAsmEqual(<<16#00b51123:32/little>>, "sh a1, 2(a0)", jit_riscv32_asm:sh(a0, a1, 2)) + ]. + +sb_test_() -> + [ + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a1, a0)), + ?_assertAsmEqual(<<16#00b500a3:32/little>>, "sb a1, 1(a0)", jit_riscv32_asm:sb(a0, a1, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Branch instruction tests +%%----------------------------------------------------------------------------- + +beq_test_() -> + [ + ?_assertAsmEqual( + <<16#00628463:32/little>>, "beq t0, t1, .+8", jit_riscv32_asm:beq(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4) + ), + ?_assertAsmEqual( + <<16#00050063:32/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) + ) + ]. + +bne_test_() -> + [ + ?_assertAsmEqual( + <<16#00629463:32/little>>, "bne t0, t1, .+8", jit_riscv32_asm:bne(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb51ee3:32/little>>, "bne a0, a1, .-4", jit_riscv32_asm:bne(a0, a1, -4) + ) + ]. + +blt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c463:32/little>>, "blt t0, t1, .+8", jit_riscv32_asm:blt(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb54ee3:32/little>>, "blt a0, a1, .-4", jit_riscv32_asm:blt(a0, a1, -4) + ) + ]. + +bge_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d463:32/little>>, "bge t0, t1, .+8", jit_riscv32_asm:bge(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb55ee3:32/little>>, "bge a0, a1, .-4", jit_riscv32_asm:bge(a0, a1, -4) + ) + ]. + +bltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e463:32/little>>, "bltu t0, t1, .+8", jit_riscv32_asm:bltu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb56ee3:32/little>>, "bltu a0, a1, .-4", jit_riscv32_asm:bltu(a0, a1, -4) + ) + ]. + +bgeu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f463:32/little>>, "bgeu t0, t1, .+8", jit_riscv32_asm:bgeu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb57ee3:32/little>>, "bgeu a0, a1, .-4", jit_riscv32_asm:bgeu(a0, a1, -4) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Jump instruction tests +%%----------------------------------------------------------------------------- + +jal_test_() -> + [ + ?_assertAsmEqual( + <<16#008000ef:32/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) + ), + ?_assertAsmEqual( + <<16#ffdff0ef:32/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) + ), + ?_assertAsmEqual( + <<16#00000517:32/little, 16#000500e7:32/little>>, + "auipc a0, 0\njalr a0", + jit_riscv32_asm:call(a0, 0) + ), + ?_assertAsmEqual( + <<16#00002517:32/little, 16#800500e7:32/little>>, + "auipc a0, 0x2\njalr -2048(a0)", + jit_riscv32_asm:call(a0, 16#1800) + ) + ]. + +jalr_test_() -> + [ + ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), + ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), + ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4)) + ]. + +%%----------------------------------------------------------------------------- +%% Upper immediate instruction tests +%%----------------------------------------------------------------------------- + +lui_test_() -> + [ + ?_assertAsmEqual(<<16#000125b7:32/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), + ?_assertAsmEqual(<<16#00001537:32/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), + ?_assertAsmEqual(<<16#fffff5b7:32/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) + ]. + +auipc_test_() -> + [ + ?_assertAsmEqual(<<16#00012597:32/little>>, "auipc a1, 18", jit_riscv32_asm:auipc(a1, 18)), + ?_assertAsmEqual(<<16#00001517:32/little>>, "auipc a0, 1", jit_riscv32_asm:auipc(a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +nop_test_() -> + [ + ?_assertAsmEqual(<<16#00000013:32/little>>, "nop", jit_riscv32_asm:nop()) + ]. + +li_small_test_() -> + [ + ?_assertAsmEqual(<<16#00a00513:32/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), + ?_assertAsmEqual(<<16#fff00513:32/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), + ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047)) + ]. + +li_large_test_() -> + [ + % 0x12345 = 74565 - requires lui + addi + ?_assertAsmEqual( + <<16#00012537:32/little, 16#34550513:32/little>>, + "lui a0, 0x12\naddi a0, a0, 0x345", + jit_riscv32_asm:li(a0, 16#12345) + ), + % 0x80000000 = -2147483648 (minimum 32-bit signed) + ?_assertAsmEqual( + <<16#800005b7:32/little, 16#00058593:32/little>>, + "lui a1, 0x80000\naddi a1, a1, 0", + jit_riscv32_asm:li(a1, -16#80000000) + ), + % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed) + ?_assertAsmEqual( + <<16#80000537:32/little, 16#fff50513:32/little>>, + "lui a0, 0x80000\naddi a0, a0, -1", + jit_riscv32_asm:li(a0, 16#7FFFFFFF) + ) + ]. + +mv_test_() -> + [ + ?_assertAsmEqual(<<16#00050513:32/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), + ?_assertAsmEqual(<<16#00058593:32/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) + ]. + +not_test_() -> + [ + ?_assertAsmEqual(<<16#fff54513:32/little>>, "not a0, a0", jit_riscv32_asm:not_(a0, a0)), + ?_assertAsmEqual(<<16#fff5c593:32/little>>, "not a1, a1", jit_riscv32_asm:not_(a1, a1)) + ]. + +neg_test_() -> + [ + ?_assertAsmEqual(<<16#40a00533:32/little>>, "neg a0, a0", jit_riscv32_asm:neg(a0, a0)), + ?_assertAsmEqual(<<16#40b005b3:32/little>>, "neg a1, a1", jit_riscv32_asm:neg(a1, a1)) + ]. + +j_test_() -> + [ + ?_assertAsmEqual( + <<16#0080006f:32/little>>, "j .+8", jit_riscv32_asm:j(8) + ), + ?_assertAsmEqual( + <<16#ffdff06f:32/little>>, "j .-4", jit_riscv32_asm:j(-4) + ) + ]. + +jr_test_() -> + [ + ?_assertAsmEqual(<<16#00050067:32/little>>, "jr a0", jit_riscv32_asm:jr(a0)), + ?_assertAsmEqual(<<16#00028067:32/little>>, "jr t0", jit_riscv32_asm:jr(t0)) + ]. + +ret_test_() -> + [ + ?_assertAsmEqual(<<16#00008067:32/little>>, "ret", jit_riscv32_asm:ret()) + ]. + +%%----------------------------------------------------------------------------- +%% M Extension (Multiply/Divide) instruction tests +%%----------------------------------------------------------------------------- + +mul_test_() -> + [ + ?_assertAsmEqual( + <<16#02f50533:32/little>>, "mul a0, a0, a5", jit_riscv32_asm:mul(a0, a0, a5) + ), + ?_assertAsmEqual( + <<16#03f60633:32/little>>, "mul a2, a2, t6", jit_riscv32_asm:mul(a2, a2, t6) + ), + ?_assertAsmEqual( + <<16#026585b3:32/little>>, "mul a1, a1, t1", jit_riscv32_asm:mul(a1, a1, t1) + ), + ?_assertAsmEqual( + <<16#02d282b3:32/little>>, "mul t0, t0, a3", jit_riscv32_asm:mul(t0, t0, a3) + ) + ]. + +%%----------------------------------------------------------------------------- +%% System instruction tests +%%----------------------------------------------------------------------------- + +ebreak_test_() -> + [ + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:ebreak() + ) + ]. + +bkpt_test_() -> + [ + % bkpt is an ARM compatibility wrapper that generates ebreak + % The immediate parameter is ignored + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(0) + ), + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(42) + ), + ?_assertAsmEqual( + <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(255) + ) + ]. diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index cfabfcf15f..7117ee5f69 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -77,6 +77,7 @@ asm(Arch, Bin, Str) -> find_binutils(Arch) -> ArchStr = atom_to_list(Arch), BinutilsList = [ + {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"}, {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"}, {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"}, {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"} @@ -104,6 +105,8 @@ get_asm_header(arm) -> get_asm_header(aarch64) -> ".text\n"; get_asm_header(x86_64) -> + ".text\n"; +get_asm_header(riscv32) -> ".text\n". %% Get architecture-specific assembler flags @@ -113,7 +116,9 @@ get_as_flags(arm) -> get_as_flags(aarch64) -> ""; get_as_flags(x86_64) -> - "--64". + "--64"; +get_as_flags(riscv32) -> + "-march=rv32ima". %% Parse objdump output lines and extract binary data -spec asm_lines([binary()], binary(), atom()) -> binary(). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index ff272f6eac..a289a01a6f 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -31,6 +31,7 @@ start() -> jit_aarch64_asm_tests, jit_armv6m_tests, jit_armv6m_asm_tests, + jit_riscv32_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). From 5ca3702c15b6474da1271fb893e7ac67750b798d Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 5 Oct 2025 09:28:20 +0200 Subject: [PATCH 09/28] riscv32: initial backend implementation Signed-off-by: Paul Guyot --- libs/jit/include/jit.hrl | 1 + libs/jit/src/CMakeLists.txt | 1 + libs/jit/src/jit_precompile.erl | 1 + libs/jit/src/jit_riscv32.erl | 3051 +++++++++++++++ src/libAtomVM/defaultatoms.def | 1 + src/libAtomVM/jit.c | 62 +- src/libAtomVM/jit.h | 6 + src/libAtomVM/module.c | 3 + src/libAtomVM/nifs.c | 2 + src/libAtomVM/opcodesswitch.h | 2 +- src/platforms/esp32/CMakeLists.txt | 18 +- .../esp32/components/avm_sys/CMakeLists.txt | 1 + .../components/avm_sys/jit_stream_flash.c | 34 + src/platforms/esp32/components/avm_sys/sys.c | 25 + .../esp32/components/libatomvm/CMakeLists.txt | 2 +- src/platforms/esp32/main/Kconfig.projbuild | 6 + src/platforms/esp32/test/CMakeLists.txt | 12 +- .../test/main/test_erl_sources/CMakeLists.txt | 87 +- tests/libs/jit/CMakeLists.txt | 1 + tests/libs/jit/jit_riscv32_tests.erl | 3419 +++++++++++++++++ tests/libs/jit/tests.erl | 1 + 21 files changed, 6671 insertions(+), 65 deletions(-) create mode 100644 libs/jit/src/jit_riscv32.erl create mode 100644 src/platforms/esp32/components/avm_sys/jit_stream_flash.c create mode 100644 tests/libs/jit/jit_riscv32_tests.erl diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index b006c5f34f..81ff1c42c2 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -23,6 +23,7 @@ -define(JIT_ARCH_X86_64, 1). -define(JIT_ARCH_AARCH64, 2). -define(JIT_ARCH_ARMV6M, 3). +-define(JIT_ARCH_RISCV32, 4). -define(JIT_VARIANT_PIC, 1). -define(JIT_VARIANT_FLOAT32, 2). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index 586223b4bc..ae62643c30 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -31,6 +31,7 @@ set(ERLANG_MODULES jit_aarch64_asm jit_armv6m jit_armv6m_asm + jit_riscv32 jit_riscv32_asm jit_x86_64 jit_x86_64_asm diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index cd9646790d..930b79dc37 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -93,6 +93,7 @@ compile(Target, Dir, Path) -> "x86_64" -> ?JIT_ARCH_X86_64; "aarch64" -> ?JIT_ARCH_AARCH64; "armv6m" -> ?JIT_ARCH_ARMV6M; + "riscv32" -> ?JIT_ARCH_RISCV32; _ -> error({unsupported_target, Target}) end, diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl new file mode 100644 index 0000000000..56887fb064 --- /dev/null +++ b/libs/jit/src/jit_riscv32.erl @@ -0,0 +1,3051 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + flush/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3 +]). + +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2 +]). +-endif. + +-compile([warnings_as_errors]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). + +-define(ASSERT(Expr), true = Expr). + +%% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers). +%% a0-a1 are used for return values (a0 for 32-bit, a0-a1 for 64-bit returns). +%% s0-s11 are callee-saved registers (must be preserved across calls). +%% t0-t6 are caller-saved temporary registers. +%% sp is the stack pointer. +%% ra is the return address register. +%% zero (x0) is hardwired to constant 0. +%% This implementation uses RV32IMC (base + multiply/compressed extensions). +%% +%% See: RISC-V Calling Convention +%% https://riscv.org/wp-content/uploads/2024/12/riscv-calling.pdf +%% +%% Registers used by the JIT backend (RISC-V32): +%% - Argument/return: a0-a7 (up to 8 args in registers) +%% - Callee-saved: s0-s11 (must preserve) +%% - Temporaries: t0-t6 (caller-saved) +%% - Stack pointer: sp +%% - Return address: ra +%% - Zero register: zero (always 0) +%% - Available for JIT scratch: t0-t6 (7 temp registers) +%% +%% Note: RISC-V32 instructions are fixed 32-bit with uniform encoding, +%% allowing access to all 32 registers. +%% +%% For more details, refer to the RISC-V ILP32 Procedure Call Standard. + +-type riscv32_register() :: + a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | t0 + | t1 + | t2 + | t3 + | t4 + | t5 + | t6 + | s0 + | s1 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | sp + | ra. + +-define(IS_GPR(Reg), + (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse + Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= t0 orelse Reg =:= t1 orelse + Reg =:= t2 orelse Reg =:= t3 orelse Reg =:= t4 orelse Reg =:= t5 orelse Reg =:= t6 orelse + Reg =:= s0 orelse Reg =:= s1 orelse Reg =:= s2 orelse Reg =:= s3 orelse Reg =:= s4 orelse + Reg =:= s5 orelse Reg =:= s6 orelse Reg =:= s7 orelse Reg =:= s8 orelse Reg =:= s9 orelse + Reg =:= s10 orelse Reg =:= s11 orelse Reg =:= sp orelse Reg =:= ra) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [riscv32_register()], + used_regs :: [riscv32_register()], + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer(), + literal_pool :: [{non_neg_integer(), riscv32_register(), non_neg_integer()}] +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}. +-type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. + +-type maybe_free_riscv32_register() :: + {free, riscv32_register()} | riscv32_register(). + +-type condition() :: + {riscv32_register(), '<', integer()} + | {maybe_free_riscv32_register(), '<', riscv32_register()} + | {maybe_free_riscv32_register(), '==', integer()} + | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(int)', maybe_free_riscv32_register(), '==', integer()} + | {'(int)', maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(bool)', maybe_free_riscv32_register(), '==', false} + | {'(bool)', maybe_free_riscv32_register(), '!=', false} + | {maybe_free_riscv32_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, riscv32_register()}, '==', {free, riscv32_register()}}. + +% Context offsets (32-bit architecture) +% ctx->e is 0x14 +% ctx->x is 0x18 +-define(CTX_REG, a0). +-define(NATIVE_INTERFACE_REG, a2). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). +% JITSTATE is in a1 register (no prolog, following aarch64 model) +-define(JITSTATE_REG, a1). +% Return address register (like LR in AArch64) +-define(RA_REG, ra). +-define(JITSTATE_MODULE_OFFSET, 0). +-define(JITSTATE_CONTINUATION_OFFSET, 16#4). +-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +-define(JUMP_TABLE_ENTRY_SIZE, 8). + +%% RISC-V32 register mappings + +%% Use t3 as temporary for some operations +-define(IP_REG, t3). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). + +%% RISC-V32 ILP32 ABI register allocation: +%% - a0: context pointer (reserved, passed as first parameter) +%% - a1-a5: available for parameters to native functions (up to 6 params) +%% - a2: native interface pointer (reserved) +%% - t0-t6: temporaries, caller-saved, available for JIT use +%% - s0-s11: callee-saved (would need to be saved/restored) +-define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]). +-define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]). +-define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + used_regs = [], + labels = [], + variant = Variant, + literal_pool = [] + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Flush the stream. +%% @end +%% @param State current backend state +%% @return The new state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> stream(). +flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:flush(Stream0), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:bkpt(0)), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [riscv32_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [riscv32_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% +%% On this platform, each jump table entry is 12 bytes. +%% ``` +%% ldr a3, pc+4 +%% push {a1, r4, r5, r6, r7, lr} +%% add pc, pc, a3 +%% nop() +%% offset_to_label0 +%% ``` +%% +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + % Create jump table entry: AUIPC + JALR (8 bytes total) + % This will be patched later in update_branches/2 + Offset = StreamModule:offset(Stream0), + % Placeholder: Load PC + upper20 bits + I1 = jit_riscv32_asm:auipc(a3, 0), + % Placeholder: Jump to a3 + lower12 bits + I2 = jit_riscv32_asm:jalr(zero, a3, 0), + + JumpEntry = <>, + Stream1 = StreamModule:append(Stream0, JumpEntry), + + % Record both AUIPC and JALR offsets for patching + Reloc = {N, Offset, jump_table_auipc_jalr}, + UpdatedState = State#state{stream = Stream1, branches = [Reloc | Branches]}, + + jump_table0(UpdatedState, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {adr, Reg} when Rel rem 4 =:= 0 -> pc_relative_address(Reg, Rel); + {adr, Reg} when Rel rem 4 =:= 2 -> pc_relative_address(Reg, Rel + 2); + {far_branch, Size, TempReg} -> + % Check if branch can now be optimized to near branch + if + Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 -> + % RISC-V jal has ±1MB range + % Optimize to near branch: jal + nops to fill original size + DirectBranch = jit_riscv32_asm:jal(zero, Rel), + % Fill remaining bytes with NOPs (RISC-V instructions are 4 bytes) + NopCount = (Size - 4) div 4, + Nops = << + <<(jit_riscv32_asm:nop())/binary>> + || _ <- lists:seq(1, NopCount) + >>, + <>; + true -> + % Keep far branch sequence: auipc + lw + jalr + data + % RISC-V far branch is always 16 bytes + case Size of + 16 -> + % 16-byte sequence: auipc + lw + jalr + data + I1 = jit_riscv32_asm:auipc(TempReg, 0), + I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), + I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), + % Calculate absolute target address + TargetAddress = LabelOffset, + I4 = <>, + <> + end + end; + jump_table_auipc_jalr -> + % Calculate PC-relative offset from AUIPC instruction to target + % AUIPC is at Offset, JALR is at Offset+4 + % Target is at LabelOffset + % Offset from AUIPC PC to target + PCRelOffset = LabelOffset - Offset, + + % Split into upper 20 bits and lower 12 bits + % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) + % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper + Upper20 = (PCRelOffset + 16#800) bsr 12, + Lower12 = PCRelOffset band 16#FFF, + % Sign-extend lower 12 bits for JALR immediate + Lower12Signed = + if + Lower12 >= 16#800 -> Lower12 - 16#1000; + true -> Lower12 + end, + + % Encode AUIPC and JALR with computed offsets + I1 = jit_riscv32_asm:auipc(a3, Upper20), + I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), + <> + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), riscv32_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, 0); + N when N * 4 =< 124 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, N * 4); + N when N * 4 < 256 -> + % Can encode N * 4 directly in li instruction + I1 = jit_riscv32_asm:li(TargetReg, N * 4), + I2 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I3 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <>; + N -> + % For very large primitive numbers, load N and shift left by 2 (multiply by 4) + I1 = jit_riscv32_asm:li(TargetReg, N), + I2 = jit_riscv32_asm:slli(TargetReg, TargetReg, 2), + I3 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I4 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <> + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), riscv32_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | RestRegs], + used_regs = UsedRegs + } = State, + Primitive, + Args +) -> + % Use a low register for LDR since ARM Thumb LDR only works with low registers + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + available_regs = RestRegs, + used_regs = [TempReg | UsedRegs] + }, + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = []} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = load_primitive_ptr(Primitive, Temp), + Stream1 = StreamModule:append(Stream0, PrepCall), + + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + + % Preprocess offset special arg + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + % In RISC-V, all up to 8 arguments fit in registers (a0-a7) + % Always use tail call when calling primitives in tail position + State4 = + case Args1 of + [FirstArg, jit_state | ArgsT] -> + % Use tail call + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + State2 = set_registers_args(State1, ArgsForTailCall, 0), + tail_call_with_jit_state_registers_only(State2, Temp) + end, + State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State5). + +%%----------------------------------------------------------------------------- +%% @doc Tail call to address in register. +%% RA is preserved across regular calls (call_func_ptr saves/restores it), +%% so when the called C primitive returns, it returns to opcodesswitch.h. +%% @end +%% @param State current backend state +%% @param Reg register containing the target address +%% @return Updated backend state +%%----------------------------------------------------------------------------- +tail_call_with_jit_state_registers_only( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Reg +) -> + % Jump to address in register (tail call) + I1 = jit_riscv32_asm:jr(Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + % RISC-V doesn't have a separate cmp instruction, use beq directly + I2 = + case Reg of + % Return value is already in a0 + a0 -> <<>>; + % Move to a0 (return register) + _ -> jit_riscv32_asm:mv(a0, Reg) + end, + I3 = jit_riscv32_asm:ret(), + % Branch if equal (skip the return) + I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, byte_size(I2) + byte_size(I3)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label +) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), + Offset = StreamModule:offset(Stream0), + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State2 = State1#state{stream = Stream1}, + flush_literal_pool(State2). + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State2 = State#state{stream = Stream1}, + flush_literal_pool(State2). + +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% Calculate absolute address and jump to it. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset, + + % Get native code base address into temporary register + I1 = pc_relative_address(Temp, NetOffset), + % Add target offset to get final absolute address + I2 = jit_riscv32_asm:add(Temp, Temp, OffsetReg), + % Indirect branch to the calculated absolute address + I3 = jit_riscv32_asm:jr(Temp), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 +-> + % Near branch: use direct J instruction + Rel = TargetOffset - Offset, + jit_riscv32_asm:j(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, _Offset, TargetOffset +) -> + % Far branch: use auipc + lw + jalr sequence (RISC-V) + % This creates a PC-relative load sequence - always 16 bytes (4-byte aligned) + + % TempReg = PC + I1 = jit_riscv32_asm:auipc(TempReg, 0), + % TempReg = *(PC+8) + I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), + % Jump to TempReg + I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), + % The literal value is the absolute target offset + I4 = <>, + <>. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Far branch sequence - always 16 bytes (4-byte aligned) + + % Load PC into temp + I1 = jit_riscv32_asm:auipc(TempReg, 0), + % Load offset from PC+8 + I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), + % Jump to address + I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), + % Placeholder offset + I4 = <<0:32/little>>, + CodeBlock = <>, + SequenceSize = byte_size(CodeBlock), + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code( + #state{available_regs = [], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) + % Same sequence as when we have available regs - always 16 bytes (4-byte aligned) + + % Load PC into t6 + I1 = jit_riscv32_asm:auipc(t6, 0), + % Load offset from PC+8 + I2 = jit_riscv32_asm:lw(t6, t6, 8), + % Jump to address + I3 = jit_riscv32_asm:jalr(zero, t6, 0), + % Placeholder offset + I4 = <<0:32/little>>, + CodeBlock = <>, + SequenceSize = byte_size(CodeBlock), + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, t6}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> + error({no_available_registers, _LabelLookup}). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, BranchInfo, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, BranchInfo} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, {BranchFunc, Reg, Operand}}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchInstrOffset = Offset + BranchInstrDelta, + BranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + BranchInstrOffset = Offset + BranchInstrDelta, + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + ElseJumpInstr = jit_riscv32_asm:j(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, ElseBranchOffset]), + Stream4 = StreamModule:replace(Stream3, BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_riscv32_asm:j(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> + { + state(), + {beq | bne | blt | bge, atom(), atom() | integer()}, + non_neg_integer() + }. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0) + BranchInstr = jit_riscv32_asm:bge(Reg, zero, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + {State1, {bge, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) + % Load immediate into a temp register for comparison + [Temp | _] = State0#state.available_regs, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than) + BranchInstr = jit_riscv32_asm:bge(Reg, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bge, Reg, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0) + BranchInstr = jit_riscv32_asm:bne(Reg, zero, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(Reg, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, RegB}, 0}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then beq Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal) + BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {beq, Reg, Val}, 0}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then bne Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(RegA, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, {bne, RegA, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal) + BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = jit_riscv32_asm:blt(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {blt, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = jit_riscv32_asm:bge(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bits using ANDI or li+and + TestCode = + if + Val >= -2048 andalso Val =< 2047 -> + %% Can use ANDI instruction directly + jit_riscv32_asm:andi(Temp, Reg, Val); + true -> + %% Need to load immediate into temp register first + TestCode0 = jit_riscv32_asm:li(Temp, Val), + TestCode1 = jit_riscv32_asm:and_(Temp, Reg, Temp), + <> + end, + OffsetBefore = StreamModule:offset(Stream0), + Stream1 = StreamModule:append(Stream0, TestCode), + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% Branch if result is zero (no bits set, NOT != 0) + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {beq, Temp, zero}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Temp, Reg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Reg, Reg), + I2 = jit_riscv32_asm:slli(Reg, Reg, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, {beq, Reg, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + I1 = jit_riscv32_asm:mv(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + State2 = and_(State1#state{available_regs = AT}, Temp, Mask), + Stream2 = State2#state.stream, + %% Compare Temp with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, Val, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need second temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT2] = AT, + State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + Stream3 = State3#state.stream, + BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, MaskReg, 0), + Stream4 = StreamModule:append(Stream3, BranchInstr), + State4 = State3#state{ + stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs] + }, + {State4, {beq, Temp, MaskReg}, BranchDelta} + end; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailRegs + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + State1 = and_(State0, Reg, Mask), + Stream1 = State1#state.stream, + %% Compare Reg with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT] = State1#state.available_regs, + State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + Stream2 = State2#state.stream, + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, MaskReg, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, {beq, Reg, MaskReg}, BranchDelta} + end. + +-spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), [riscv32_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(#state{}, maybe_free_riscv32_register(), non_neg_integer()) -> + {#state{}, riscv32_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_riscv32_asm:slli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, riscv32_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), riscv32_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + % Save RA (like AArch64 saves LR) so it's preserved across jalr calls + SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + + % Calculate available registers + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + % Calculate stack space: round up to 16-byte boundary for RISC-V ABI + NumRegs = length(SavedRegs), + StackBytes = NumRegs * 4, + AlignedStackBytes = ((StackBytes + 15) div 16) * 16, + + Stream1 = push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0), + + % Set up arguments following RISC-V ILP32 calling convention + % Arguments are passed in a0-a7 (up to 8 register arguments) + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + RegArgs0 = Args1, + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + + % We pushed registers to stack, so we can use these registers we saved + % and the currently available registers + SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = SetArgsRegsOnlyAvailableArgs, + used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, + stream = Stream1 + }, + + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must swap it with a free reg. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> + % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + % that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State1#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State1#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; + false -> + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + end; + {primitive, Primitive} -> + [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(State1#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + end, + + State3 = State1#state{ + available_regs = SetArgsAvailableRegs, + used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + stream = Stream3 + }, + + StackOffset = AlignedStackBytes, + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), + Stream4 = State4#state.stream, + + % Call the function pointer (using JALR for call with return) + Call = jit_riscv32_asm:jalr(ra, FuncPtrReg, 0), + Stream5 = StreamModule:append(Stream4, Call), + + % For result, we need a free register (including FuncPtrReg). + % If none are available (all registers were pushed to the stack), + % we write the result to the stack position of FuncPtrReg + {Stream6, UsedRegs2} = + case length(SavedRegs) of + N when N >= 7 andalso element(1, FuncPtrTuple) =:= free -> + % We use original FuncPtrReg then as we know it's available. + % Calculate stack offset: find register index in SavedRegs * 4 bytes + ResultReg = element(2, FuncPtrTuple), + RegIndex = index_of(ResultReg, SavedRegs), + StoreResultStackOffset = RegIndex * 4, + StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + _ -> + % Use any free that is not in SavedRegs + [ResultReg | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + end, + + Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + { + State4#state{ + stream = Stream8, + available_regs = AvailableRegs3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + +index_of(Item, List) -> index_of(Item, List, 0). + +index_of(_, [], _) -> -1; +index_of(Item, [Item | _], Index) -> Index; +index_of(Item, [_ | Rest], Index) -> index_of(Item, Rest, Index + 1). + +push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: addi sp, sp, -AlignedStackBytes then sw reg, offset(sp) for each reg + StackAdjust = jit_riscv32_asm:addi(sp, sp, -AlignedStackBytes), + Stream1 = StreamModule:append(Stream0, StackAdjust), + {Stream2, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Store = jit_riscv32_asm:sw(sp, Reg, Offset), + {StreamModule:append(StreamAcc, Store), Offset + 4} + end, + {Stream1, 0}, + SavedRegs + ), + Stream2; +push_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: lw reg, offset(sp) for each reg then addi sp, sp, AlignedStackBytes + {Stream1, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Load = jit_riscv32_asm:lw(Reg, sp, Offset), + {StreamModule:append(StreamAcc, Load), Offset + 4} + end, + {Stream0, 0}, + SavedRegs + ), + StackAdjust = jit_riscv32_asm:addi(sp, sp, AlignedStackBytes), + StreamModule:append(Stream1, StackAdjust); +pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +set_registers_args(State0, Args, StackOffset) -> + ParamRegs = parameter_regs(Args), + set_registers_args(State0, Args, ParamRegs, StackOffset). + +set_registers_args( + #state{used_regs = UsedRegs} = State0, + Args, + ParamRegs, + StackOffset +) -> + ArgsRegs = args_regs(Args), + AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), + Stream1 = State1#state.stream, + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State1#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +% ILP32: 64-bit arguments require double-word alignment (even register number) +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) -> + parameter_regs0(T, Rest, [a1, a0 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a1, a2, a3 | Rest], Acc) -> + parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) -> + parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> + State; +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to ILP32 +set_registers_args0( + State, + [{avm_int64_t, Value} | ArgsT], + ArgsRegs, + ParamRegs, + AvailGP, + StackOffset +) when is_integer(Value) -> + LowPartUnsigned = Value band 16#FFFFFFFF, + HighPartUnsigned = (Value bsr 32) band 16#FFFFFFFF, + % Convert to signed 32-bit values for RISC-V li instruction + LowPart = + if + LowPartUnsigned > 16#7FFFFFFF -> LowPartUnsigned - 16#100000000; + true -> LowPartUnsigned + end, + HighPart = + if + HighPartUnsigned > 16#7FFFFFFF -> HighPartUnsigned - 16#100000000; + true -> HighPartUnsigned + end, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, + [Arg | ArgsT], + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, + StackOffset +) -> + case lists:member(ParamReg, ArgsRegsT) of + false -> + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); + true -> + [Avail | AvailGPT] = AvailGP, + I = jit_riscv32_asm:mv(Avail, ParamReg), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset + ) + end. + +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + jit_state, + ParamReg, + _StackOffset +) -> + % jit_state is always in a1, so we only need to move it if the param reg is different + case ParamReg of + a1 -> + State; + _ -> + I = jit_riscv32_asm:mv(ParamReg, a1), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1} + end; +% For tail calls, jit_state is already in a1 +set_registers_args1(State, jit_state_tail_call, a1, _StackOffset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> + {XReg, X_REGOffset} = ?X_REG(X), + I = jit_riscv32_asm:lw(Reg, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> + I = jit_riscv32_asm:lw(Reg, Source, 0), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + {y_reg, X}, + Reg, + _StackOffset +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when + ?IS_GPR(ArgReg) +-> + I = jit_riscv32_asm:mv(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(X), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_riscv32_asm:sw(Reg, Src, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State0#state{stream = Stream1}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + State2#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% term_to_float +move_to_vm_register( + #state{ + stream_module = StreamModule, + available_regs = [Temp1, Temp2 | _], + stream = Stream0, + variant = Variant + } = + State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off), + I2 = jit_riscv32_asm:lw(Temp2, Reg, 4), + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8), + I4 = jit_riscv32_asm:lw(Temp2, Reg, 8), + I5 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8 + 4), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 4), + Code = <> + end, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + riscv32_register(), + non_neg_integer() | riscv32_register(), + vm_register() | riscv32_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + {BaseReg, Off} = ?X_REG(X), + I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + I2 = jit_riscv32_asm:sw(Dest, Temp, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Dest, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + {BaseReg, Off} = ?X_REG(X), + I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I4 = Code, + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element( + state(), riscv32_register() | {free, riscv32_register()}, non_neg_integer() +) -> + {state(), riscv32_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | riscv32_register(), riscv32_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:sw(Reg, ValueReg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_riscv32_asm:mv(Temp, IndexReg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, Reg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value() | cp) -> {state(), riscv32_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + {BaseReg, Off} = ?CP, + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + available_regs = [Reg | AvailT], + used_regs = Used + } = State0, + Imm +) when + is_integer(Imm) +-> + State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + {BaseReg, Offset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + Code = ldr_y_reg(Reg, Y, AvailT), + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [RegA, RegB | AvailT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + {fp, RegA, RegB} + }. + +-spec move_to_native_register(state(), value(), riscv32_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) -> + I = jit_riscv32_asm:mv(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + mov_immediate(State, RegDst, ValSrc); +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_riscv32_asm:lw(RegDst, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(RegDst, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(RegDst, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State, + {y_reg, Y}, + RegDst +) -> + Code = ldr_y_reg(RegDst, Y, AT), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {fp_reg, F}, + {fp, RegA, RegB} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), riscv32_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:mv(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + {y_reg, Y} +) -> + I1 = ldr_y_reg(Reg, Y, AvailT), + {BaseReg, Off} = ?CP, + I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + {BaseReg1, Off1} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1), + I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4), + {BaseReg2, Off2} = ?Y_REGS, + I3 = jit_riscv32_asm:sw(BaseReg2, Reg, Off2), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State, + Label +) -> + % Similar to AArch64: use pc_relative_address with a relocation that will be + % resolved to point directly to the label's actual address (not the jump table entry) + Offset = StreamModule:offset(Stream0), + % Emit placeholder for pc_relative_address (auipc + addi) + % The relocation will replace these with the correct offset + I1 = pc_relative_address(Temp, 4), + Reloc = {Label, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]}. + +%% @doc Set the contination to a given offset +%% Return a reference so the offset will be updated with update_branches +%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current +%% code and not too far, so on Thumb we can use adr instruction. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + I1 = pc_relative_address(Temp, 4), + Reloc = {OffsetRef, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%% @doc Implement a continuation entry point. +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + % Load module from jit_state (which is in a1) + I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET), + I2 = jit_riscv32_asm:lw(Reg, Reg, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{ + stream = Stream1, + available_regs = AvailableT, + used_regs = [Reg | UsedRegs0] + }, + Reg + }. + +%% @doc Perform an AND of a register with an immediate. +%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to +%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool +%% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> + I1 = jit_riscv32_asm:slli(Reg, Reg, 8), + I2 = jit_riscv32_asm:srli(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + Stream1 = State1#state.stream, + % RISC-V doesn't have bics, use not + and + I1 = jit_riscv32_asm:not_(Temp, Temp), + I2 = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BICS operation (RISC-V: not + and) + I1 = jit_riscv32_asm:not_(a0, a0), + I2 = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, <>), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, Val), + Stream2 = State1#state.stream, + % Perform ANDS operation + I = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, I), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}. + +or_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:or_(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I = jit_riscv32_asm:addi(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:add(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:add(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= -16#800, Val =< 16#7FF +-> + % RISC-V li can handle 12-bit signed immediates in a single instruction (addi) + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + % For values outside 12-bit range, li will use lui + addi (2 instructions) + % which is efficient enough, no need for literal pool + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +flush_literal_pool(#state{literal_pool = []} = State) -> + State; +flush_literal_pool( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State +) -> + % Align + Offset = StreamModule:offset(Stream0), + Stream1 = + if + Offset rem 4 =:= 0 -> Stream0; + true -> StreamModule:append(Stream0, <<0:16>>) + end, + % Lay all values and update ldr instructions + Stream2 = lists:foldl( + fun({LdrInstructionAddr, Reg, Val}, AccStream) -> + LiteralPosition = StreamModule:offset(AccStream), + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + LdrInstruction = jit_riscv32_asm:lw(Reg, pc, LiteralOffset), + AccStream1 = StreamModule:append(AccStream, <>), + StreamModule:replace( + AccStream1, LdrInstructionAddr, LdrInstruction + ) + end, + Stream1, + lists:reverse(LP) + ), + State#state{stream = Stream2, literal_pool = []}. + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I1 = jit_riscv32_asm:addi(Reg, Reg, -Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:sub(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 1), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 2), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 4), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:mul(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + +%% +%% Analysis of AArch64 pattern and RISC-V32 implementation: +%% +%% AArch64 layout (from call_ext_only_test): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: b.ne 0x20 ; Branch if reductions != 0 to continuation +%% 0x10-0x1c: adr/str/ldr/br sequence for scheduling next process +%% 0x20: [CONTINUATION POINT] - Actual function starts here +%% +%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: bne continuation ; Branch if reductions != 0 to continuation +%% 0x10-0x?: adr/sw/ldr/jalr sequence for scheduling next process +%% continuation: [actual function body] +%% +%% Key insight: With 32 registers, RISC-V32 doesn't need prolog/epilog like ARM Thumb. +%% When reductions != 0, we branch directly to continue execution. +%% When reductions == 0, we schedule the next process, and resume at the continuation point. +%% +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = jit_riscv32_asm:bne(Temp, zero, 0), + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + I5 = pc_relative_address(Temp, 0), + I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset), + NewI5 = pc_relative_address(Temp, NewOffset - ADROffset), + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + Label +) -> + % Load reduction count (jit_state is in a1) + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + % Use trampoline technique: branch if zero (eq) to skip over the long branch + % If not zero, we want to continue execution at Label + % If zero, we want to fall through to scheduling code + + % Look up label once to avoid duplicate lookup in helper + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + BccOffset = StreamModule:offset(Stream1), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + % Label is known, check if we can optimize the conditional branch + % After branch instruction + Rel = LabelOffset - BccOffset, + + if + Rel >= -4096 andalso Rel =< 4094 andalso (Rel rem 2) =:= 0 -> + % Near branch: use direct conditional branch (RISC-V has ±4KB range) + + % Branch if NOT zero (temp != 0) + I4 = jit_riscv32_asm:bne(Temp, zero, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + % Far branch: use trampoline with helper + % Get the code block size for the far branch sequence that will follow + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + % Skip over the far branch sequence if zero (temp == 0) + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + % Label not known, get the far branch size for the skip + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +-spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}. +set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) -> + % Reserve a temporary register for the offset BEFORE calling get_module_index + % to avoid running out of available registers + State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]}, + % get module index (dynamically) + { + #state{stream_module = StreamModule, stream = Stream0} = State1, + Reg + } = get_module_index( + State0b + ), + + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_riscv32_asm:slli(Reg, Reg, 24), + % Reserve space for offset load instruction + % li can generate 1 instruction (4 bytes) for small immediates (< 2048) + % or 2 instructions (8 bytes) for large immediates + % Since we use (offset bsl 2), threshold is when offset >= 512 bytes + % To be safe, use same threshold as AArch64 relative to instruction encoding limits + {I2, I3} = + if + Offset >= 512 -> + % Need 2 instructions (lui + addi) for large offsets + {jit_riscv32_asm:nop(), jit_riscv32_asm:nop()}; + true -> + % Need 1 instruction (addi) for small offsets + {jit_riscv32_asm:nop(), <<>>} + end, + MOVOffset = Offset + byte_size(I1), + % OR the module index with the offset (loaded in temp register) + I4 = jit_riscv32_asm:or_(Reg, TempReg), + {BaseReg, Off} = ?CP, + I5 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + State4 = free_native_register(State3, TempReg), + {State4, MOVOffset, TempReg}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), riscv32_register()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + TempReg +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + NewMoveInstr = jit_riscv32_asm:li(TempReg, NewOffset bsl 2), + Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), + State0#state{stream = Stream1}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + TermReg +) -> + {BaseReg1, Off1} = ?BS, + I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1), + I2 = jit_riscv32_asm:li(Temp, 0), + {BaseReg2, Off2} = ?BS_OFFSET, + I3 = jit_riscv32_asm:sw(BaseReg2, Temp, Off2), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + I1 = pc_relative_address(a0, 12), + I2 = jit_riscv32_asm:ret(), + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% @doc Generate PC-relative address calculation using AUIPC + ADDI +%% This replaces the ARM-style 'adr' pseudo-instruction with native RISC-V instructions +-spec pc_relative_address(riscv32_register(), integer()) -> binary(). +pc_relative_address(Rd, 0) -> + % Simple case: just get current PC + jit_riscv32_asm:auipc(Rd, 0); +pc_relative_address(Rd, Offset) -> + % PC-relative address calculation + % Split offset into upper 20 bits and lower 12 bits + % AUIPC can represent offsets in range: (-524288 << 12) to (524287 << 12) + % Combined with ADDI: (-524288 << 12) - 2048 to (524287 << 12) + 2047 + Lower = Offset band 16#FFF, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + % Compute upper 20 bits, adjusting if lower is negative + % Use arithmetic right shift (bsr) which preserves sign in Erlang + Upper = + if + LowerSigned < 0 -> + (Offset bsr 12) + 1; + true -> + Offset bsr 12 + end, + % Validate that Upper is in valid range for AUIPC + if + Upper < -16#80000; Upper > 16#7FFFF -> + error({offset_out_of_range, Offset, Upper, -16#80000, 16#7FFFF}); + true -> + ok + end, + case {Upper, LowerSigned} of + {0, 0} -> + % Zero offset + jit_riscv32_asm:auipc(Rd, 0); + {0, _} -> + % Only lower bits needed: auipc + addi + AuipcInstr = jit_riscv32_asm:auipc(Rd, 0), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <>; + {_, 0} -> + % Only upper bits needed + jit_riscv32_asm:auipc(Rd, Upper); + {_, _} -> + % Both upper and lower bits + AuipcInstr = jit_riscv32_asm:auipc(Rd, Upper), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <> + end. + +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4), + <>; +str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> + % Large offset - use register arithmetic with second available register + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:li(TempReg2, Offset), + I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1), + I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0), + <>; +str_y_reg(SrcReg, Y, TempReg1, []) -> + % Large offset - no additional registers available, use IP_REG as second temp + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, TempReg1), + I3 = jit_riscv32_asm:li(TempReg1, Offset), + I4 = jit_riscv32_asm:add(TempReg1, TempReg1, ?IP_REG), + I5 = jit_riscv32_asm:sw(TempReg1, SrcReg, 0), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, [TempReg | _]) -> + % Large offset - use DstReg as second temp register for arithmetic + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:li(DstReg, Offset), + I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg), + I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>; +ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> + % Small offset, no registers available - use DstReg as temp + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, []) -> + % Large offset, no registers available - use IP_REG as temp register + % Note: IP_REG (t3) can only be used with mov, not ldr directly + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, DstReg), + I3 = jit_riscv32_asm:li(DstReg, Offset), + I4 = jit_riscv32_asm:add(DstReg, DstReg, ?IP_REG), + I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>. + +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset. Eventually align it with a nop. +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + {State1, Offset1} = + if + Offset0 rem 4 =:= 0 -> + {State0, Offset0}; + true -> + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:nop()), + {State0#state{stream = Stream1}, Offset0 + 2} + end, + add_label(State1, Label, Offset1). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 35330fdecc..db7914438b 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -209,3 +209,4 @@ X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m") +X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32") diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 6539f43c61..f1191ba11e 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -86,6 +86,20 @@ _Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->rema _Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 +_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_riscv32.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_riscv32.erl"); + +_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); + #else #error Unknown jit target #endif @@ -133,7 +147,7 @@ static void destroy_extended_registers(Context *ctx, unsigned int live) static void jit_trim_live_regs(Context *ctx, uint32_t live) { - TRACE("jit_trim_live_regs: ctx->process_id = %d, live = %d\n", ctx->process_id, live); + TRACE("jit_trim_live_regs: ctx->process_id = %" PRId32 ", live = %" PRIu32 "\n", ctx->process_id, live); if (UNLIKELY(!list_is_empty(&ctx->extended_x_regs))) { destroy_extended_registers(ctx, live); } @@ -173,8 +187,8 @@ static Context *jit_return(Context *ctx, JITState *jit_state) static Context *jit_terminate_context(Context *ctx, JITState *jit_state) { - TRACE("jit_terminate_context: ctx->process_id = %d\n", ctx->process_id); - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 "\n", ctx->process_id); + TRACE("-- Code execution finished for %" PRId32 "--\n", ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); @@ -186,7 +200,7 @@ static Context *jit_terminate_context(Context *ctx, JITState *jit_state) static Context *jit_handle_error(Context *ctx, JITState *jit_state, int offset) { - TRACE("jit_terminate_context: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); if (offset || term_is_invalid_term(ctx->x[2])) { ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, ctx->x[0]); } @@ -253,14 +267,14 @@ static void set_error(Context *ctx, JITState *jit_state, int offset, term error_ static Context *jit_raise_error(Context *ctx, JITState *jit_state, int offset, term error_type_atom) { - TRACE("jit_raise_error: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); set_error(ctx, jit_state, offset, error_type_atom); return jit_handle_error(ctx, jit_state, 0); } static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int offset, term error_atom, term arg1) { - TRACE("jit_raise_error_tuple: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error_tuple: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); // We can gc as we are raising if (UNLIKELY(memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, &arg1, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { set_error(ctx, jit_state, offset, OUT_OF_MEMORY_ATOM); @@ -277,7 +291,7 @@ static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int off static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term stacktrace, term exc_value) { - TRACE("jit_raise: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); ctx->x[0] = stacktrace_exception_class(stacktrace); ctx->x[1] = exc_value; ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, stacktrace); @@ -286,7 +300,7 @@ static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term st static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_next_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_next_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -295,7 +309,7 @@ static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) static Context *jit_schedule_wait_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_wait_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_wait_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -433,7 +447,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = bif->bif2_ptr(ctx, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -463,7 +477,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = gcbif->gcbif2_ptr(ctx, 0, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -486,7 +500,7 @@ static term jit_module_get_atom_term_by_id(JITState *jit_state, int atom_index) static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, uint32_t heap_need, uint32_t live) { - TRACE("jit_allocate: stack_need=%u heap_need=%u live=%u\n", stack_need, heap_need, live); + TRACE("jit_allocate: ENTRY ctx=%p jit_state=%p stack_need=%" PRIu32 " heap_need=%" PRIu32 " live=%" PRIu32 "\n", (void*)ctx, (void*)jit_state, stack_need, heap_need, live); if (ctx->heap.root->next || ((ctx->heap.heap_ptr + heap_need > ctx->e - (stack_need + 1)))) { TRIM_LIVE_REGS(live); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need + stack_need + 1, live, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { @@ -501,7 +515,7 @@ static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) { - TRACE("jit_get_imported_bif: bif=%u\n", bif); + TRACE("jit_get_imported_bif: bif=%" PRIu32 "\n", bif); const struct ExportedFunction *exported_bif = jit_state->module->imported_funcs[bif]; const BifImpl0 result = EXPORTED_FUNCTION_TO_BIF(exported_bif)->bif0_ptr; return result; @@ -509,7 +523,7 @@ static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) static bool jit_deallocate(Context *ctx, JITState *jit_state, uint32_t n_words) { - TRACE("jit_deallocate: n_words=%u\n", n_words); + TRACE("jit_deallocate: n_words=%" PRIu32 "\n", n_words); ctx->cp = ctx->e[n_words]; ctx->e += n_words + 1; // Hopefully, we only need x[0] @@ -534,7 +548,7 @@ static TermCompareResult jit_term_compare(Context *ctx, JITState *jit_state, ter static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, uint32_t live_registers) { - TRACE("jit_test_heap: heap_need=%u live_registers=%u\n", heap_need, live_registers); + TRACE("jit_test_heap: heap_need=%" PRIu32 " live_registers=%" PRIu32 "\n", heap_need, live_registers); size_t heap_free = context_avail_free_memory(ctx); // if we need more heap space than is currently free, then try to GC the needed space if (heap_free < heap_need) { @@ -548,7 +562,7 @@ static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, } else if (heap_free > heap_need * HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF) { TRIM_LIVE_REGS(live_registers); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need * (HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF / 2), live_registers, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { - TRACE("Unable to ensure free memory. heap_need=%i\n", heap_need); + TRACE("Unable to ensure free memory. heap_need=%" PRIu32 "\n", heap_need); set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM); return false; } @@ -629,13 +643,13 @@ static term maybe_alloc_boxed_integer_fragment(Context *ctx, avm_int64_t value) static term jit_term_alloc_tuple(Context *ctx, uint32_t size) { - TRACE("jit_term_alloc_tuple: size=%u\n", size); + TRACE("jit_term_alloc_tuple: size=%" PRIu32 "\n", size); return term_alloc_tuple(size, &ctx->heap); } static term jit_term_alloc_fun(Context *ctx, JITState *jit_state, uint32_t fun_index, uint32_t numfree) { - TRACE("jit_term_alloc_fun: fun_index=%u numfree=%u\n", fun_index, numfree); + TRACE("jit_term_alloc_fun: fun_index=%" PRIu32 " numfree=%" PRIu32 "\n", fun_index, numfree); size_t size = numfree + BOXED_FUN_SIZE; term *boxed_func = memory_heap_alloc(&ctx->heap, size); @@ -841,7 +855,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state) static term jit_mailbox_peek(Context *ctx) { - TRACE("jit_mailbox_peek: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id); term out = term_invalid_term(); mailbox_peek(ctx, &out); return out; @@ -849,26 +863,26 @@ static term jit_mailbox_peek(Context *ctx) static void jit_mailbox_remove_message(Context *ctx) { - TRACE("jit_mailbox_remove_message: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_remove_message(&ctx->mailbox, &ctx->heap); } static void jit_timeout(Context *ctx) { - TRACE("jit_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); mailbox_reset(&ctx->mailbox); } static void jit_mailbox_next(Context *ctx) { - TRACE("jit_mailbox_next: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_next: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_next(&ctx->mailbox); } static void jit_cancel_timeout(Context *ctx) { - TRACE("jit_cancel_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_cancel_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); if (context_get_flags(ctx, WaitingTimeout | WaitingTimeoutExpired)) { scheduler_cancel_timeout(ctx); } @@ -876,7 +890,7 @@ static void jit_cancel_timeout(Context *ctx) static void jit_clear_timeout_flag(Context *ctx) { - TRACE("jit_clear_timeout_flag: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_clear_timeout_flag: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); } diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index af31ed3b17..1b911f200a 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -174,6 +174,7 @@ enum TrapAndLoadResult #define JIT_ARCH_X86_64 1 #define JIT_ARCH_AARCH64 2 #define JIT_ARCH_ARMV6M 3 +#define JIT_ARCH_RISCV32 4 #define JIT_VARIANT_PIC 1 #define JIT_VARIANT_FLOAT32 2 @@ -195,6 +196,11 @@ enum TrapAndLoadResult #define JIT_JUMPTABLE_ENTRY_SIZE 12 #endif +#if defined(__riscv) && (__riscv_xlen == 32) +#define JIT_ARCH_TARGET JIT_ARCH_RISCV32 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#endif + #ifndef JIT_ARCH_TARGET #error Unknown JIT target #endif diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 64ff0569a6..fb38679c8b 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -38,6 +38,9 @@ #include #include +// #define ENABLE_TRACE +#include "trace.h" + #ifdef WITH_ZLIB #include #endif diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index dc87df2250..f42ebf9202 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -5692,6 +5692,8 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) return JIT_AARCH64_ATOM; #elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M return JIT_ARMV6M_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + return JIT_RISCV32_ATOM; #else #error Unknown JIT target #endif diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index 547dbbfe74..f18d892838 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -7476,7 +7476,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) } terminate_context: - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("-- Code execution finished for %i--\n", (int) ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 9dec6ec5f3..1212a15fe9 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,11 +51,23 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) - project(atomvm-esp32) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") + else() + message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) + message(STATUS "JIT compilation disabled") +endif() + # esp-idf does not use compile_feature but instead sets version in # c_compile_options # Ensure project is compiled with at least C11 diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt index ebcedd3b57..8156bb2ac8 100644 --- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt +++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt @@ -25,6 +25,7 @@ set(AVM_SYS_COMPONENT_SRCS "sys.c" "platform_nifs.c" "platform_defaultatoms.c" + "jit_stream_flash.c" "../../../../libAtomVM/inet.c" "../../../../libAtomVM/otp_crypto.c" "../../../../libAtomVM/otp_net.c" diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c index 8318ae759a..ec229d70b2 100644 --- a/src/platforms/esp32/components/avm_sys/sys.c +++ b/src/platforms/esp32/components/avm_sys/sys.c @@ -807,3 +807,28 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) UNUSED(global); #endif } + +#ifndef AVM_NO_JIT +#include + +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + uintptr_t addr = (uintptr_t) (native_code + offset); + +#if defined(CONFIG_IDF_TARGET_ARCH_RISCV) + // On RISC-V ESP32 targets, native code in flash needs to be accessed + // through the instruction cache (IROM) not data cache (DROM) +#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2) + // ESP32-C3 and C2 have separate DROM and IROM regions + if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) { + // Convert from data cache address to instruction cache address + addr = addr - SOC_DROM_LOW + SOC_IROM_LOW; + } +#endif + // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed +#endif + + return (ModuleNativeEntryPoint) addr; +} +#endif diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index 97580dbfea..c8e3ede411 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -32,6 +32,6 @@ if (HAVE_PLATFORM_ATOMIC_H) endif() target_link_libraries(${COMPONENT_LIB} - INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11) diff --git a/src/platforms/esp32/main/Kconfig.projbuild b/src/platforms/esp32/main/Kconfig.projbuild index 88bf92aa1a..1eba944ed7 100755 --- a/src/platforms/esp32/main/Kconfig.projbuild +++ b/src/platforms/esp32/main/Kconfig.projbuild @@ -39,5 +39,11 @@ menu "AtomVM configuration" depends on USE_USB_SERIAL help Enable TinyUSB CDC functionality if USE_USB_SERIAL is enabled. + + config JIT_ENABLED + bool "Enable just in time compilation" + default n + help + Enable Just in time compilation, or just execution of precompiled native code endmenu diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt index 2d97d91345..cee138d34c 100644 --- a/src/platforms/esp32/test/CMakeLists.txt +++ b/src/platforms/esp32/test/CMakeLists.txt @@ -57,8 +57,16 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# This must be set before project() so libAtomVM is configured correctly +if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") +else() + message(STATUS "JIT is not supported on ${IDF_TARGET} (Xtensa architecture) - using interpreter") + set(AVM_DISABLE_JIT ON) +endif() project(atomvm-esp32-test) diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt index e2d67269e8..dc4789f374 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt +++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt @@ -20,11 +20,31 @@ add_library(esp32_test_modules) +include(ExternalProject) +if(NOT AVM_DISABLE_JIT) +set(host_atomvm_jit_target "--target=jit") +else() +set(host_atomvm_jit_target "") +endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." + BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM ) +macro(jit_precompile module_name) + if(NOT AVM_DISABLE_JIT) + add_custom_command( + OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam + COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH} + && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam + DEPENDS ${module_name}.beam HostAtomVM + COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}" + VERBATIM + ) + endif() +endmacro() + function(compile_erlang module_name) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" @@ -33,6 +53,7 @@ function(compile_erlang module_name) COMMENT "Compiling ${module_name}.erl" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + jit_precompile(${module_name}) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() @@ -55,46 +76,44 @@ compile_erlang(test_time_and_processes) compile_erlang(test_twdt) compile_erlang(test_tz) +set(erlang_test_beams + test_esp_partition.beam + test_file.beam + test_wifi_example.beam + test_list_to_atom.beam + test_list_to_binary.beam + test_md5.beam + test_crypto.beam + test_monotonic_time.beam + test_mount.beam + test_net.beam + test_rtc_slow.beam + test_select.beam + test_socket.beam + test_ssl.beam + test_time_and_processes.beam + test_twdt.beam + test_tz.beam +) + +if(NOT AVM_DISABLE_JIT) + set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) + list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) + set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) +else() + set(erlang_test_beams_to_package ${erlang_test_beams}) + set(erlang_test_beams_depends ${erlang_test_beams}) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i esp32_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm - test_esp_partition.beam - test_file.beam - test_wifi_example.beam - test_list_to_atom.beam - test_list_to_binary.beam - test_md5.beam - test_crypto.beam - test_monotonic_time.beam - test_mount.beam - test_net.beam - test_rtc_slow.beam - test_select.beam - test_socket.beam - test_ssl.beam - test_time_and_processes.beam - test_twdt.beam - test_tz.beam + ${erlang_test_beams_to_package} DEPENDS HostAtomVM - "${CMAKE_CURRENT_BINARY_DIR}/test_esp_partition.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_wifi_example.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_file.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_atom.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_binary.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_md5.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_monotonic_time.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_mount.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_net.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_rtc_slow.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_select.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_socket.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_ssl.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_time_and_processes.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_twdt.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_tz.beam" + ${erlang_test_beams_depends} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM ) diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 9bc1c8c78b..45473d9f10 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -30,6 +30,7 @@ set(ERLANG_MODULES jit_aarch64_asm_tests jit_armv6m_tests jit_armv6m_asm_tests + jit_riscv32_tests jit_riscv32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl new file mode 100644 index 0000000000..4a4fba5593 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -0,0 +1,3419 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_riscv32). + +% disassembly obtained with: +% arm-elf-objdump -b binary -D dump.bin -M arm + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00462f83 lw t6,4(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00862f83 lw t6,8(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 02a00593 li a1,42\n" + " 1c: 02b00613 li a2,43\n" + " 20: 02c00693 li a3,44\n" + " 24: 000f80e7 jalr t6\n" + " 28: 00050f93 mv t6,a0\n" + " 2c: 00012083 lw ra,0(sp)\n" + " 30: 00412503 lw a0,4(sp)\n" + " 34: 00812583 lw a1,8(sp)\n" + " 38: 00c12603 lw a2,12(sp)\n" + " 3c: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01462f83 lw t6,20(a2)\n" + " 4: 01000613 li a2,16\n" + " 8: 02000693 li a3,32\n" + " c: 00200713 li a4,2\n" + " 10: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 00300f13 li t5,3\n" + " 8: ffff4f13 not t5,t5\n" + " c: 01efffb3 and t6,t6,t5\n" + " 10: 01c52f03 lw t5,28(a0)\n" + " 14: 0b800e93 li t4,184\n" + " 18: 00ce8eb3 add t4,t4,a2\n" + " 1c: 000eae83 lw t4,0(t4)\n" + " 20: ff010113 addi sp,sp,-16\n" + " 24: 00112023 sw ra,0(sp)\n" + " 28: 00a12223 sw a0,4(sp)\n" + " 2c: 00b12423 sw a1,8(sp)\n" + " 30: 00c12623 sw a2,12(sp)\n" + " 34: 000f8613 mv a2,t6\n" + " 38: 04000693 li a3,64\n" + " 3c: 00800713 li a4,8\n" + " 40: 000f0793 mv a5,t5\n" + " 44: 000e80e7 jalr t4\n" + " 48: 00050e93 mv t4,a0\n" + " 4c: 00012083 lw ra,0(sp)\n" + " 50: 00412503 lw a0,4(sp)\n" + " 54: 00812583 lw a1,8(sp)\n" + " 58: 00c12603 lw a2,12(sp)\n" + " 5c: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 04862f83 lw t6,72(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 01300593 li a1,19\n" + " 1c: 000f80e7 jalr t6\n" + " 20: 00050f93 mv t6,a0\n" + " 24: 00012083 lw ra,0(sp)\n" + " 28: 00412503 lw a0,4(sp)\n" + " 2c: 00812583 lw a1,8(sp)\n" + " 30: 00c12603 lw a2,12(sp)\n" + " 34: 01010113 addi sp,sp,16\n" + " 38: 04862f03 lw t5,72(a2)\n" + " 3c: fe010113 addi sp,sp,-32\n" + " 40: 00112023 sw ra,0(sp)\n" + " 44: 00a12223 sw a0,4(sp)\n" + " 48: 00b12423 sw a1,8(sp)\n" + " 4c: 00c12623 sw a2,12(sp)\n" + " 50: 01f12823 sw t6,16(sp)\n" + " 54: 01400593 li a1,20\n" + " 58: 000f00e7 jalr t5\n" + " 5c: 00050f13 mv t5,a0\n" + " 60: 00012083 lw ra,0(sp)\n" + " 64: 00412503 lw a0,4(sp)\n" + " 68: 00812583 lw a1,8(sp)\n" + " 6c: 00c12603 lw a2,12(sp)\n" + " 70: 01012f83 lw t6,16(sp)\n" + " 74: 02010113 addi sp,sp,32\n" + " 78: 04862e83 lw t4,72(a2)\n" + " 7c: fe010113 addi sp,sp,-32\n" + " 80: 00112023 sw ra,0(sp)\n" + " 84: 00a12223 sw a0,4(sp)\n" + " 88: 00b12423 sw a1,8(sp)\n" + " 8c: 00c12623 sw a2,12(sp)\n" + " 90: 01e12823 sw t5,16(sp)\n" + " 94: 01f12a23 sw t6,20(sp)\n" + " 98: 01300593 li a1,19\n" + " 9c: 000e80e7 jalr t4\n" + " a0: 00050e93 mv t4,a0\n" + " a4: 00012083 lw ra,0(sp)\n" + " a8: 00412503 lw a0,4(sp)\n" + " ac: 00812583 lw a1,8(sp)\n" + " b0: 00c12603 lw a2,12(sp)\n" + " b4: 01012f03 lw t5,16(sp)\n" + " b8: 01412f83 lw t6,20(sp)\n" + " bc: 02010113 addi sp,sp,32\n" + " c0: 03462e03 lw t3,52(a2)\n" + " c4: fe010113 addi sp,sp,-32\n" + " c8: 00112023 sw ra,0(sp)\n" + " cc: 00a12223 sw a0,4(sp)\n" + " d0: 00b12423 sw a1,8(sp)\n" + " d4: 00c12623 sw a2,12(sp)\n" + " d8: 01d12823 sw t4,16(sp)\n" + " dc: 000fa583 lw a1,0(t6)\n" + " e0: 000f2603 lw a2,0(t5)\n" + " e4: 000e00e7 jalr t3\n" + " e8: 00050e13 mv t3,a0\n" + " ec: 00012083 lw ra,0(sp)\n" + " f0: 00412503 lw a0,4(sp)\n" + " f4: 00812583 lw a1,8(sp)\n" + " f8: 00c12603 lw a2,12(sp)\n" + " fc: 01012e83 lw t4,16(sp)\n" + " 100: 02010113 addi sp,sp,32\n" + " 104: 01cea023 sw t3,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, 1), + {State2, t5} = ?BACKEND:move_to_native_register(State1, 2), + {State3, t4} = ?BACKEND:move_to_native_register(State2, 3), + {State4, t3} = ?BACKEND:move_to_native_register(State3, 4), + {State5, t2} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + t5, t6, {free, t3}, t4, {free, t2} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, t5, t6, t4]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: 00100f93 li t6,1\n" + " 4: 00200f13 li t5,2\n" + " 8: 00300e93 li t4,3\n" + " c: 00400e13 li t3,4\n" + " 10: 00500393 li t2,5\n" + " 14: 0e400313 li t1,228\n" + " 18: 00c30333 add t1,t1,a2\n" + " 1c: 00032303 lw t1,0(t1)\n" + " 20: fe010113 addi sp,sp,-32\n" + " 24: 00112023 sw ra,0(sp)\n" + " 28: 00a12223 sw a0,4(sp)\n" + " 2c: 00b12423 sw a1,8(sp)\n" + " 30: 00c12623 sw a2,12(sp)\n" + " 34: 01d12823 sw t4,16(sp)\n" + " 38: 01e12a23 sw t5,20(sp)\n" + " 3c: 01f12c23 sw t6,24(sp)\n" + " 40: 000f0513 mv a0,t5\n" + " 44: 000f8593 mv a1,t6\n" + " 48: 000e0613 mv a2,t3\n" + " 4c: 000e8693 mv a3,t4\n" + " 50: 00038713 mv a4,t2\n" + " 54: 000300e7 jalr t1\n" + " 58: fea12e23 sw a0,-4(sp)\n" + " 5c: 00012083 lw ra,0(sp)\n" + " 60: 00412503 lw a0,4(sp)\n" + " 64: 00812583 lw a1,8(sp)\n" + " 68: 00c12603 lw a2,12(sp)\n" + " 6c: 01012e83 lw t4,16(sp)\n" + " 70: 01412f03 lw t5,20(sp)\n" + " 74: 01812f83 lw t6,24(sp)\n" + " 78: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 00200693 li a3,2\n" + " 2c: 00200713 li a4,2\n" + " 30: fff00793 li a5,-1\n" + " 34: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04c62f03 lw t5,76(a2)\n" + " 8: 00800613 li a2,8\n" + " c: 2cb00693 li a3,715\n" + " 10: 000f8713 mv a4,t6\n" + " 14: 000f0067 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 00200693 li a3,2\n" + " 2c: 00200713 li a4,2\n" + " 30: 00a00793 li a5,10\n" + " 34: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 02a00613 li a2,42\n" + " 8: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16\n" + " 34: 00af8463 beq t6,a0,0x3c\n" + " 38: 000f8513 mv a0,t6\n" + " 3c: 00008067 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(t5, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 000f80e7 jalr t6\n" + " 1c: 00050f93 mv t6,a0\n" + " 20: 00012083 lw ra,0(sp)\n" + " 24: 00412503 lw a0,4(sp)\n" + " 28: 00812583 lw a1,8(sp)\n" + " 2c: 00c12603 lw a2,12(sp)\n" + " 30: 01010113 addi sp,sp,16\n" + " 34: 000f8f13 mv t5,t6\n" + " 38: 00af0463 beq t5,a0,0x40\n" + " 3c: 000f0513 mv a0,t5\n" + " 40: 00008067 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 05f52e23 sw t6,92(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 01cf8f93 addi t6,t6,28\n" + " 8: 01f52a23 sw t6,20(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000fd463 bgez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd463 bge t6,t5,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01dfd463 bge t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 1024}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01dfd463 bge t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2\n" + " 14: 0ec0006f j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', -1}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: fff00e93 li t4,-1\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9463 bnez t6,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + % Test large immediate (1995) that requires temporary register + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', 1995}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 1) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 7cb00e93 li t4,1995\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 001f0f13 addi t5,t5,1\n" + " 14: 0ec0006f j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8463 beq t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9463 bne t6,t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec463 bltz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec463 bltz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed463 bgez t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed463 bgez t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 005ffe93 andi t4,t6,5\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffce93 not t4,t6\n" + " c: 01ce9e93 slli t4,t4,0x1c\n" + " 10: 000e8463 beqz t4,0x18\n" + " 14: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffcf93 not t6,t6\n" + " c: 01cf9f93 slli t6,t6,0x1c\n" + " 10: 000f8463 beqz t6,0x18\n" + " 14: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f8e93 mv t4,t6\n" + " c: 03f00e13 li t3,63\n" + " 10: 01cefeb3 and t4,t4,t3\n" + " 14: 00800e13 li t3,8\n" + " 18: 01ce8463 beq t4,t3,0x20\n" + " 1c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd463 bge t6,t5,0x10\n" + " c: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + { + {free, RegA}, + '&', + ?TERM_BOXED_TAG_MASK, + '!=', + ?TERM_BOXED_POSITIVE_INTEGER + }, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03f00e93 li t4,63\n" + " c: 01dfffb3 and t6,t6,t4\n" + " 10: 00800e93 li t4,8\n" + " 14: 01df8463 beq t6,t4,0x1c\n" + " 18: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + %% Test {RegA, '&', 16#3, '!=', 0} using ANDI instruction + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#3, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 003ffe93 andi t4,t6,3\n" + " c: 000e8463 beqz t4,0x14\n" + " 10: 002f0f13 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9663 bne t6,t4,0x18\n" + " 10: 002f0f13 addi t5,t5,2\n" + " 14: 0080006f j 0x1c\n" + " 18: 004f0f13 addi t5,t5,4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf93 srli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf13 srli t5,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003f9f93 slli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 05468067 jr 84(a3)\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3)\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 03c68067 jr 60(a3)\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: ffff8f93 addi t6,t6,-1\n" + " 20: 01f5a423 sw t6,8(a1)\n" + " 24: 000f8a63 beqz t6,0x38\n" + " 28: 0240006f j 0x4c\n" + " 2c: 00000013 nop\n" + " 30: 00000013 nop\n" + " 34: 00000013 nop\n" + " 38: 00000f97 auipc t6,0x0\n" + " 3c: 014f8f93 addi t6,t6,20\n" + " 40: 01f5a223 sw t6,4(a1)\n" + " 44: 00862f83 lw t6,8(a2)\n" + " 48: 000f8067 jr t6\n" + " 4c: 00062f83 lw t6,0(a2)\n" + " 50: 000f8067 jr t6\n" + " 54: 00462f83 lw t6,4(a2)\n" + " 58: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with large gap (256+ bytes) to force mov_immediate path +call_only_or_schedule_next_and_label_relocation_large_gap_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the actual function bodies + % Each operation emits ~2 bytes, so 128 operations = ~256 bytes + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, a3) + end, + State1, + lists:seq(1, 128) + ), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + % Extract the final section starting at 0x218 (after jump table 24 bytes + 128 loads 512 bytes) + % RISC-V: Jump table is 3×8=24 bytes, loads are 4 bytes each + Dump = << + " 218: 0085af83 lw t6,8(a1)\n" + " 21c: ffff8f93 addi t6,t6,-1\n" + " 220: 01f5a423 sw t6,8(a1)\n" + " 224: 000f8a63 beqz t6,0x238\n" + " 228: 0240006f j 0x24c\n" + " 22c: 00000013 nop\n" + " 230: 00000013 nop\n" + " 234: 00000013 nop\n" + " 238: 00000f97 auipc t6,0x0\n" + " 23c: 014f8f93 addi t6,t6,20\n" + " 240: 01f5a223 sw t6,4(a1)\n" + " 244: 00862f83 lw t6,8(a2)\n" + " 248: 000f8067 jr t6\n" + " 24c: 00062f83 lw t6,0(a2)\n" + " 250: 000f8067 jr t6\n" + " 254: 00462f83 lw t6,4(a2)\n" + " 258: 000f8067 jr t6" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#218), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 00058513 mv a0,a1\n" + " 1c: 00200593 li a1,2\n" + " 20: 000f80e7 jalr t6\n" + " 24: 00050f93 mv t6,a0\n" + " 28: 00012083 lw ra,0(sp)\n" + " 2c: 00412503 lw a0,4(sp)\n" + " 30: 00812583 lw a1,8(sp)\n" + " 34: 00c12603 lw a2,12(sp)\n" + " 38: 01010113 addi sp,sp,16\n" + " 3c: 03c62f03 lw t5,60(a2)\n" + " 40: fe010113 addi sp,sp,-32\n" + " 44: 00112023 sw ra,0(sp)\n" + " 48: 00a12223 sw a0,4(sp)\n" + " 4c: 00b12423 sw a1,8(sp)\n" + " 50: 00c12623 sw a2,12(sp)\n" + " 54: 01f12823 sw t6,16(sp)\n" + " 58: 3b7ff5b7 lui a1,0x3b7ff\n" + " 5c: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" + " 60: 000f00e7 jalr t5\n" + " 64: 00050f13 mv t5,a0\n" + " 68: 00012083 lw ra,0(sp)\n" + " 6c: 00412503 lw a0,4(sp)\n" + " 70: 00812583 lw a1,8(sp)\n" + " 74: 00c12603 lw a2,12(sp)\n" + " 78: 01012f83 lw t6,16(sp)\n" + " 7c: 02010113 addi sp,sp,32\n" + " 80: ff010113 addi sp,sp,-16\n" + " 84: 00112023 sw ra,0(sp)\n" + " 88: 00a12223 sw a0,4(sp)\n" + " 8c: 00b12423 sw a1,8(sp)\n" + " 90: 00c12623 sw a2,12(sp)\n" + " 94: 00000593 li a1,0\n" + " 98: 00100613 li a2,1\n" + " 9c: 01852683 lw a3,24(a0)\n" + " a0: 000f0713 mv a4,t5\n" + " a4: 000f80e7 jalr t6\n" + " a8: 00050f93 mv t6,a0\n" + " ac: 00012083 lw ra,0(sp)\n" + " b0: 00412503 lw a0,4(sp)\n" + " b4: 00812583 lw a1,8(sp)\n" + " b8: 00c12603 lw a2,12(sp)\n" + " bc: 01010113 addi sp,sp,16\n" + " c0: 000f9863 bnez t6,0xd0\n" + " c4: 01862f83 lw t6,24(a2)\n" + " c8: 0c800613 li a2,200\n" + " cc: 000f8067 jr t6\n" + " d0: 01f52c23 sw t6,24(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 00300f13 li t5,3\n" + " 8: ffff4f13 not t5,t5\n" + " c: 01efffb3 and t6,t6,t5\n" + " 10: 004fae83 lw t4,4(t6)\n" + " 14: 01452f03 lw t5,20(a0)\n" + " 18: 01df2223 sw t4,4(t5)\n" + " 1c: 000fae83 lw t4,0(t6)\n" + " 20: 01452f03 lw t5,20(a0)\n" + " 24: 01df2023 sw t4,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 01cf1f13 slli t5,t5,0x1c\n" + " c: 040f0c63 beqz t5,0x64\n" + " 10: 000f8f13 mv t5,t6\n" + " 14: 00300e93 li t4,3\n" + " 18: 01df7f33 and t5,t5,t4\n" + " 1c: 00200e93 li t4,2\n" + " 20: 01df0a63 beq t5,t4,0x34\n" + " 24: 0dc0006f j 0x100\n" + " 28: 00000013 nop\n" + " 2c: 00000013 nop\n" + " 30: 00000013 nop\n" + " 34: 00300f13 li t5,3\n" + " 38: ffff4f13 not t5,t5\n" + " 3c: 01efffb3 and t6,t6,t5\n" + " 40: 000faf83 lw t6,0(t6)\n" + " 44: 03f00f13 li t5,63\n" + " 48: 01efffb3 and t6,t6,t5\n" + " 4c: 00800f13 li t5,8\n" + " 50: 01ef8a63 beq t6,t5,0x64\n" + " 54: 0ac0006f j 0x100\n" + " 58: 00000013 nop\n" + " 5c: 00000013 nop\n" + " 60: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +%% Keep the unoptimized version to test the and case. +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 01cf1f13 slli t5,t5,0x1c\n" + " c: 060f0663 beqz t5,0x78\n" + " 10: 000f8f13 mv t5,t6\n" + " 14: 00300e93 li t4,3\n" + " 18: 01df7f33 and t5,t5,t4\n" + " 1c: 00200e93 li t4,2\n" + " 20: 01df0a63 beq t5,t4,0x34\n" + " 24: 0dc0006f j 0x100\n" + " 28: 00000013 nop\n" + " 2c: 00000013 nop\n" + " 30: 00000013 nop\n" + " 34: 00300f13 li t5,3\n" + " 38: ffff4f13 not t5,t5\n" + " 3c: 01efffb3 and t6,t6,t5\n" + " 40: 000faf83 lw t6,0(t6)\n" + " 44: 000f8f13 mv t5,t6\n" + " 48: 03f00e93 li t4,63\n" + " 4c: 01df7f33 and t5,t5,t4\n" + " 50: 00800e93 li t4,8\n" + " 54: 03df0263 beq t5,t4,0x78\n" + " 58: 03f00f13 li t5,63\n" + " 5c: 01efffb3 and t6,t6,t5\n" + " 60: 01800f13 li t5,24\n" + " 64: 01ef8a63 beq t6,t5,0x78\n" + " 68: 0980006f j 0x100\n" + " 6c: 00000013 nop\n" + " 70: 00000013 nop\n" + " 74: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8e63 beq t6,t5,0x24\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8a63 beq t6,t5,0x24\n" + " 14: 0ec0006f j 0x100\n" + " 18: 00000013 nop\n" + " 1c: 00000013 nop\n" + " 20: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8e63 beq t6,t5,0x24\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8a63 beq t6,t5,0x24\n" + " 14: 7ed0006f j 0x1000\n" + " 18: 00000013 nop\n" + " 1c: 00000013 nop\n" + " 20: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8e63 beq t6,t5,0x24\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8a63 beq t6,t5,0x24\n" + " 14: 00000f17 auipc t5,0x0\n" + " 18: 008f2f03 lw t5,8(t5)\n" + " 1c: 000f0067 jr t5\n" + " 20: 00001000 .word 0x00001000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 024f8f93 addi t6,t6,36\n" + " 8: 01f5a223 sw t6,4(a1)\n" + " c: 00001fb7 lui t6,0x1\n" + " 10: 388f8f93 addi t6,t6,904\n" + " 14: 07862f03 lw t5,120(a2)\n" + " 18: 000f8613 mv a2,t6\n" + " 1c: 02a00693 li a3,42\n" + " 20: 000f0067 jr t5\n" + " 24: 05462f83 lw t6,84(a2)\n" + " 28: ff010113 addi sp,sp,-16\n" + " 2c: 00112023 sw ra,0(sp)\n" + " 30: 00a12223 sw a0,4(sp)\n" + " 34: 00b12423 sw a1,8(sp)\n" + " 38: 00c12623 sw a2,12(sp)\n" + " 3c: 000f80e7 jalr t6\n" + " 40: 00050f93 mv t6,a0\n" + " 44: 00012083 lw ra,0(sp)\n" + " 48: 00412503 lw a0,4(sp)\n" + " 4c: 00812583 lw a1,8(sp)\n" + " 50: 00c12603 lw a2,12(sp)\n" + " 54: 01010113 addi sp,sp,16\n" + " 58: 00af8463 beq t6,a0,0x60\n" + " 5c: 000f8513 mv a0,t6\n" + " 60: 00008067 ret\n" + " 64: 08400f93 li t6,132\n" + " 68: 00cf8fb3 add t6,t6,a2\n" + " 6c: 000faf83 lw t6,0(t6)\n" + " 70: ff010113 addi sp,sp,-16\n" + " 74: 00112023 sw ra,0(sp)\n" + " 78: 00a12223 sw a0,4(sp)\n" + " 7c: 00b12423 sw a1,8(sp)\n" + " 80: 00c12623 sw a2,12(sp)\n" + " 84: 00200593 li a1,2\n" + " 88: 000f80e7 jalr t6\n" + " 8c: 00050f93 mv t6,a0\n" + " 90: 00012083 lw ra,0(sp)\n" + " 94: 00412503 lw a0,4(sp)\n" + " 98: 00812583 lw a1,8(sp)\n" + " 9c: 00c12603 lw a2,12(sp)\n" + " a0: 01010113 addi sp,sp,16\n" + " a4: 000f9863 bnez t6,0xb4\n" + " a8: 07c62f83 lw t6,124(a2)\n" + " ac: 02a00613 li a2,42\n" + " b0: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 00000697 auipc a3,0x0\n" + " 4: 00068067 jr a3\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 00068067 jr a3\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 00068067 jr a3\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 00068067 jr a3\n" + " 20: 00000697 auipc a3,0x0\n" + " 24: 00068067 jr a3\n" + " 28: 00000697 auipc a3,0x0\n" + " 2c: 00068067 jr a3\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 004f8f93 addi t6,t6,4\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/2 function +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated auipc + addi + ret + labels table + lines table + % auipc = 4 bytes, addi = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Total minimum: 36 bytes + ?assert(byte_size(Stream) >= 36), + + % Expected: auipc a0, 0 + addi a0, a0, 12 + ret + labels table + lines table + % The data tables start at offset 0xC (12), so we load PC + 12 into a0 + Dump = << + " 0: 00000517 auipc a0,0x0\n" + " 4: 00c50513 addi a0,a0,12\n" + " 8: 00008067 ret\n" + " c: 0200 addi s0,sp,256\n" + " e: 0100 addi s0,sp,128\n" + " 10: 0000 unimp\n" + " 12: 1000 addi s0,sp,32\n" + " 14: 0200 addi s0,sp,256\n" + " 16: 0000 unimp\n" + " 18: 2000 fld fs0,0(s0)\n" + " 1a: 0200 addi s0,sp,256\n" + " 1c: 0a00 addi s0,sp,272\n" + " 1e: 0000 unimp\n" + " 20: 1000 addi s0,sp,32\n" + " 22: 1400 addi s0,sp,544\n" + " 24: 0000 unimp\n" + " 26: 2000 fld fs0,0(s0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 00058513 mv a0,a1\n" + " 1c: 02a00593 li a1,42\n" + " 20: 000f80e7 jalr t6\n" + " 24: 00050f93 mv t6,a0\n" + " 28: 00012083 lw ra,0(sp)\n" + " 2c: 00412503 lw a0,4(sp)\n" + " 30: 00812583 lw a1,8(sp)\n" + " 34: 00c12603 lw a2,12(sp)\n" + " 38: 01010113 addi sp,sp,16\n" + " 3c: ff010113 addi sp,sp,-16\n" + " 40: 00112023 sw ra,0(sp)\n" + " 44: 00a12223 sw a0,4(sp)\n" + " 48: 00b12423 sw a1,8(sp)\n" + " 4c: 00c12623 sw a2,12(sp)\n" + " 50: 00000593 li a1,0\n" + " 54: 00300613 li a2,3\n" + " 58: 01452f03 lw t5,20(a0)\n" + " 5c: 000f2683 lw a3,0(t5)\n" + " 60: 01852703 lw a4,24(a0)\n" + " 64: 000f80e7 jalr t6\n" + " 68: 00050f93 mv t6,a0\n" + " 6c: 00012083 lw ra,0(sp)\n" + " 70: 00412503 lw a0,4(sp)\n" + " 74: 00812583 lw a1,8(sp)\n" + " 78: 00c12603 lw a2,12(sp)\n" + " 7c: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in a1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, a1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 0b000f93 li t6,176\n" + " 4: 00cf8fb3 add t6,t6,a2\n" + " 8: 000faf83 lw t6,0(t6)\n" + " c: ff010113 addi sp,sp,-16\n" + " 10: 00112023 sw ra,0(sp)\n" + " 14: 00a12223 sw a0,4(sp)\n" + " 18: 00b12423 sw a1,8(sp)\n" + " 1c: 00c12623 sw a2,12(sp)\n" + " 20: 00058f13 mv t5,a1\n" + " 24: 000f0613 mv a2,t5\n" + " 28: 00400693 li a3,4\n" + " 2c: 00100713 li a4,1\n" + " 30: 000f80e7 jalr t6\n" + " 34: 00050f93 mv t6,a0\n" + " 38: 00012083 lw ra,0(sp)\n" + " 3c: 00412503 lw a0,4(sp)\n" + " 40: 00812583 lw a1,8(sp)\n" + " 44: 00c12603 lw a2,12(sp)\n" + " 48: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 0005af03 lw t5,0(a1)\n" + " 24: 000f2f03 lw t5,0(t5)\n" + " 28: 018f1f13 slli t5,t5,0x18\n" + " 2c: 13000f93 li t6,304\n" + " 30: 01ff6f33 or t5,t5,t6\n" + " 34: 05e52e23 sw t5,92(a0)\n" + " 38: 01062f83 lw t6,16(a2)\n" + " 3c: 00200613 li a2,2\n" + " 40: 00500693 li a3,5\n" + " 44: fff00713 li a4,-1\n" + " 48: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: ffff8f93 addi t6,t6,-1\n" + " 8: 01f5a423 sw t6,8(a1)\n" + " c: 000f9a63 bnez t6,0x20\n" + " 10: 00000f97 auipc t6,0x0\n" + " 14: 010f8f93 addi t6,t6,16\n" + " 18: 00862f83 lw t6,8(a2)\n" + " 1c: 000f8067 jr t6\n" + " 20: 01852f83 lw t6,24(a0)\n" + " 24: 000f8f13 mv t5,t6\n" + " 28: 000f0e93 mv t4,t5\n" + " 2c: 00300e13 li t3,3\n" + " 30: 01cefeb3 and t4,t4,t3\n" + " 34: 00200e13 li t3,2\n" + " 38: 01ce8c63 beq t4,t3,0x50\n" + " 3c: 04c62f83 lw t6,76(a2)\n" + " 40: 04000613 li a2,64\n" + " 44: 18b00693 li a3,395\n" + " 48: 000f0713 mv a4,t5\n" + " 4c: 000f8067 jr t6\n" + " 50: 00300e93 li t4,3\n" + " 54: fffece93 not t4,t4\n" + " 58: 01df7f33 and t5,t5,t4\n" + " 5c: 000f2f03 lw t5,0(t5)\n" + " 60: 000f0e93 mv t4,t5\n" + " 64: 03f00e13 li t3,63\n" + " 68: 01cefeb3 and t4,t4,t3\n" + " 6c: 01400e13 li t3,20\n" + " 70: 01ce8c63 beq t4,t3,0x88\n" + " 74: 04c62f83 lw t6,76(a2)\n" + " 78: 07800613 li a2,120\n" + " 7c: 18b00693 li a3,395\n" + " 80: 000f0713 mv a4,t5\n" + " 84: 000f8067 jr t6\n" + " 88: 0005ae83 lw t4,0(a1)\n" + " 8c: 000eae83 lw t4,0(t4)\n" + " 90: 018e9e93 slli t4,t4,0x18\n" + " 94: 2e000f13 li t5,736\n" + " 98: 01eeeeb3 or t4,t4,t5\n" + " 9c: 05d52e23 sw t4,92(a0)\n" + " a0: 08000f13 li t5,128\n" + " a4: 00cf0f33 add t5,t5,a2\n" + " a8: 000f2f03 lw t5,0(t5)\n" + " ac: 000f8613 mv a2,t6\n" + " b0: 00000693 li a3,0\n" + " b4: 000f0067 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: 00000f93 li t6,0\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: 00000f93 li t6,0\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, t5}, << + " 0: 00000f93 li t6,0\n" + " 4: 01ff2023 sw t6,0(t5)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: 00000f13 li t5,0\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: 00000f13 li t5,0\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: 02a00f93 li t6,42\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, a3}, << + " 0: 06300f93 li t6,99\n" + " 4: 01f6a023 sw t6,0(a3)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 03f52023 sw t6,32(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f5a023 sw t6,0(a1)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << + " 0: 000e2f83 lw t6,0(t3)\n" + " 4: 03f52223 sw t6,36(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 01ff2223 sw t6,4(t5)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 004f2f83 lw t6,4(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, t4, {x_reg, 0}, << + " 0: 01d52c23 sw t4,24(a0)\n" + " 4: 0fc0006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, t5, {x_reg, extra}, << + " 0: 05e52c23 sw t5,88(a0)\n" + " 4: 0fc0006f j 0x100" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, t3, {ptr, a3}, << + " 0: 01c6a023 sw t3,0(a3)\n" + " 4: 0fc0006f j 0x100" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, a1, {y_reg, 0}, << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 00bfa023 sw a1,0(t6)\n" + " 8: 0f80006f j 0x100" + >>) + end), + %% Test: Large immediate to x_reg (uses lui + addi in RISC-V) + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f52c23 sw t6,24(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 05f52c23 sw t6,88(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 01ff2423 sw t6,8(t5)\n" + " 10: 0f00006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 05ff2823 sw t6,80(t5)\n" + " 10: 0f00006f j 0x100" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f6a023 sw t6,0(a3)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: 05452f83 lw t6,84(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 07ff2e23 sw t6,124(t5)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)\n" + " 8: 05f52a23 sw t6,84(a0)\n" + " c: 0f40006f j 0x100" + >>) + end), + %% Test: Large y_reg index (32) that exceeds str immediate offset limit + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 32}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 08000e93 li t4,128\n" + " c: 01fe8eb3 add t4,t4,t6\n" + " 10: 01eea023 sw t5,0(t4)\n" + " 14: 0ec0006f j 0x100" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: fff00f93 li t6,-1\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -100, {x_reg, 0}, << + " 0: f9c00f93 li t6,-100\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << + " 0: c1800f93 li t6,-1000\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: 0f80006f j 0x100" + >>) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 2, {x_reg, 0}, << + " 0: 0086af83 lw t6,8(a3)\n" + " 4: 01f52c23 sw t6,24(a0)" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, a3, 3, {ptr, t4}, << + " 0: 00c6af83 lw t6,12(a3)\n" + " 4: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 1, {y_reg, 2}, << + " 0: 0046af03 lw t5,4(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)" + >>) + end), + %% move_array_element: reg[x] to native reg (t4) + ?_test(begin + move_array_element_test0(State0, a3, 1, t4, << + " 0: 0046ae83 lw t4,4(a3)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {y_reg, 31}, << + " 0: 01c6af03 lw t5,28(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 07efae23 sw t5,124(t6)" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {x_reg, 15}, << + " 0: 01c6af83 lw t6,28(a3)\n" + " 4: 05f52a23 sw t6,84(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 01f68fb3 add t6,a3,t6\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: 03f52023 sw t6,32(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 01f68fb3 add t6,a3,t6\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 01f68fb3 add t6,a3,t6\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: 01452f03 lw t5,20(a0)\n" + " 14: 07ff2e23 sw t6,124(t5)" + >>) + end), + %% move_array_element with integer index and x_reg destination + ?_test(begin + {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 008faf03 lw t5,8(t6)\n" + " 8: 03e52623 sw t5,44(a0)" + >>) + end) + ] + end}. + +get_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 010e2f83 lw t6,16(t3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg) + end) + ] + end}. + +move_to_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 000e0f13 mv t5,t3\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000faf83 lw t6,0(t6)\n" + " 4: 000e0f13 mv t5,t3\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 008f2f83 lw t6,8(t5)\n" + " 8: 000e0f13 mv t5,t3\n" + " c: 002f1f13 slli t5,t5,0x2\n" + " 10: 01e68f33 add t5,a3,t5\n" + " 14: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 02a00f93 li t6,42\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 002f1f13 slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02a00f93 li t6,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: negative value + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: fd600f93 li t6,-42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -255 (boundary case) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0100f93 li t6,-255" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -256 (boundary case, fits in immediate for RISC-V) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0000f93 li t6,-256\n" + " 4: 0fc0006f j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, t5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t5, Reg), + Dump = << + " 0: 000f2f03 lw t5,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02c52f83 lw t6,44(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 00cf2f83 lw t6,12(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, t5), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 02a00f13 li t5,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, t6, t4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000f8e93 mv t4,t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000fae03 lw t3,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 02052683 lw a3,32(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008fa583 lw a1,8(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 06052f03 lw t5,96(a0)\n" + " 8: 004fae83 lw t4,4(t6)\n" + " c: 01df2c23 sw t4,24(t5)\n" + " 10: 008fae83 lw t4,8(t6)\n" + " 14: 01df2e23 sw t4,28(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, a2, 2, << + " 0: 00260613 addi a2,a2,2\n" + " 4: 0fc0006f j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 01f60633 add a2,a2,t6\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, a3, << + " 0: 00d60633 add a2,a2,a3\n" + " 4: 0fc0006f j 0x100" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, a2, 2, << + " 0: ffe60613 addi a2,a2,-2\n" + " 4: 0fc0006f j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 41f60633 sub a2,a2,t6\n" + " 8: 0f80006f j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, a3, << + " 0: 40d60633 sub a2,a2,a3\n" + " 4: 0fc0006f j 0x100" + >>) + end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, a2, 2, << + " 0: 00161613 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 3, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 4, << + " 0: 00261613 slli a2,a2,0x2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 5, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 6, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 00161613 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 7, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 40cf8633 sub a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 8, << + " 0: 00361613 slli a2,a2,0x3" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 9, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 10, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 00161613 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 11, << + " 0: 00b00f93 li t6,11\n" + " 4: 03f60633 mul a2,a2,t6" + >>) + end) + ] + end}. + +%% Test set_args1 with y_reg pattern +set_args1_y_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1 + % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}]) + % but with {y_reg, 5} instead of {free, Src} + {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [ + {y_reg, 5} + ]), + + Stream = ?BACKEND:stream(State1), + % Expected disassembly for loading from y_reg and calling primitive + Dump = << + " 0: 04300f93 li t6,67\n" + " 4: 002f9f93 slli t6,t6,0x2\n" + " 8: 00cf8fb3 add t6,t6,a2\n" + " c: 000faf83 lw t6,0(t6)\n" + " 10: ff010113 addi sp,sp,-16\n" + " 14: 00112023 sw ra,0(sp)\n" + " 18: 00a12223 sw a0,4(sp)\n" + " 1c: 00b12423 sw a1,8(sp)\n" + " 20: 00c12623 sw a2,12(sp)\n" + " 24: 01452f03 lw t5,20(a0)\n" + " 28: 014f2503 lw a0,20(t5)\n" + " 2c: 000f80e7 jalr t6\n" + " 30: 00050f93 mv t6,a0\n" + " 34: 00012083 lw ra,0(sp)\n" + " 38: 00412503 lw a0,4(sp)\n" + " 3c: 00812583 lw a1,8(sp)\n" + " 40: 00c12603 lw a2,12(sp)\n" + " 44: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read (Y=123, offset=492, exceeds immediate limit) +large_y_reg_read_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move from a large Y register (123 * 4 = 492 bytes, exceeds immediate limit) + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp register for large offset + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 1ec00f93 li t6,492\n" + " 8: 01ef8fb3 add t6,t6,t5\n" + " c: 000faf83 lw t6,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test large Y register write with immediate value +large_y_reg_write_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move immediate to a large Y register (123 * 4 = 492 bytes) + State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp registers for large offset + Dump = << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 1ec00e93 li t4,492\n" + " c: 01fe8eb3 add t4,t4,t6\n" + " 10: 01eea023 sw t5,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read with limited registers (uses IP_REG fallback) +large_y_reg_read_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper) + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback + {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t0+t1 fallback sequence when temps are exhausted + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452283 lw t0,20(a0)\n" + " 18: 08c00313 li t1,140\n" + " 1c: 00530333 add t1,t1,t0\n" + " 20: 00032303 lw t1,0(t1)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t1, ResultReg). + +%% Test large Y register write with register exhaustion (uses t1/t0 fallback) +large_y_reg_write_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a source register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Allocate most remaining registers to simulate exhaustion + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Try to write to large Y register when only one temp register is available + StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t1/t0 fallback sequence + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452303 lw t1,20(a0)\n" + " 18: 0c800293 li t0,200\n" + " 1c: 006282b3 add t0,t0,t1\n" + " 20: 01f2a023 sw t6,0(t0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing) +y_reg_boundary_direct_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}), + Stream = ?BACKEND:stream(State1), + % Expected: uses direct addressing since 31 * 4 = 124 < 2048 + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test debugger function +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 00100073 ebreak" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +and_register_exhaustion_negative_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test negative immediate (-4) which should use NOT+AND with t0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, t6, -4), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 00300293 li t0,3\n" + " 1c: fff2c293 not t0,t0\n" + " 20: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +and_register_exhaustion_positive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test positive immediate (0x3F) which should use AND with t0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, t6, 16#3F), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 03f00293 li t0,63\n" + " 1c: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +jump_table_large_labels_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 512), + Stream = ?BACKEND:stream(State1), + % RISC-V: Each jump table entry is 8 bytes (AUIPC + JALR) + ?assertEqual((512 + 1) * 8, byte_size(Stream)). + +alloc_boxed_integer_fragment_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 42} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 02a00613 li a2,42\n" + " 1c: 00000693 li a3,0\n" + " 20: 000f80e7 jalr t6\n" + " 24: 00050f93 mv t6,a0\n" + " 28: 00012083 lw ra,0(sp)\n" + " 2c: 00412503 lw a0,4(sp)\n" + " 30: 00812583 lw a1,8(sp)\n" + " 34: 00c12603 lw a2,12(sp)\n" + " 38: 01010113 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +alloc_boxed_integer_fragment_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 16#123456789ABCDEF0} + ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: ff010113 addi sp,sp,-16\n" + " 8: 00112023 sw ra,0(sp)\n" + " c: 00a12223 sw a0,4(sp)\n" + " 10: 00b12423 sw a1,8(sp)\n" + " 14: 00c12623 sw a2,12(sp)\n" + " 18: 9abce637 lui a2,0x9abce\n" + " 1c: ef060613 addi a2,a2,-272\n" + " 20: 123456b7 lui a3,0x12345\n" + " 24: 67868693 addi a3,a3,1656\n" + " 28: 000f80e7 jalr t6\n" + " 2c: 00050f93 mv t6,a0\n" + " 30: 00012083 lw ra,0(sp)\n" + " 34: 00412503 lw a0,4(sp)\n" + " 38: 00812583 lw a1,8(sp)\n" + " 3c: 00c12603 lw a2,12(sp)\n" + " 40: 01010113 addi sp,sp,16\n" + " 44: 04c62f03 lw t5,76(a2)\n" + " 48: 04800613 li a2,72\n" + " 4c: 28b00693 li a3,651\n" + " 50: 000f8713 mv a4,t6\n" + " 54: 000f0067 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for stack alignment issue in call_func_ptr +%% RISC-V maintains 16-byte stack alignment (RISC-V calling convention) +call_func_ptr_stack_alignment_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _ResultReg} = ?BACKEND:call_func_ptr(State4, {free, t3}, [42]), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: fe010113 addi sp,sp,-32\n" + " 14: 00112023 sw ra,0(sp)\n" + " 18: 00a12223 sw a0,4(sp)\n" + " 1c: 00b12423 sw a1,8(sp)\n" + " 20: 00c12623 sw a2,12(sp)\n" + " 24: 01d12823 sw t4,16(sp)\n" + " 28: 01e12a23 sw t5,20(sp)\n" + " 2c: 01f12c23 sw t6,24(sp)\n" + " 30: 02a00513 li a0,42\n" + " 34: 000e00e7 jalr t3\n" + " 38: fea12e23 sw a0,-4(sp)\n" + " 3c: 00012083 lw ra,0(sp)\n" + " 40: 00412503 lw a0,4(sp)\n" + " 44: 00812583 lw a1,8(sp)\n" + " 48: 00c12603 lw a2,12(sp)\n" + " 4c: 01012e83 lw t4,16(sp)\n" + " 50: 01412f03 lw t5,20(sp)\n" + " 54: 01812f83 lw t6,24(sp)\n" + " 58: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for register exhaustion issue in call_func_ptr with 5+ arguments +%% When all registers are used and we call a function with 5+ args, +%% set_args needs temporary registers but none are available +call_func_ptr_register_exhaustion_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Allocate all available registers to simulate register pressure + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + State6 + end, + fun(State6) -> + [ + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 3, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fe010113 addi sp,sp,-32\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 01c12a23 sw t3,20(sp)\n" + " 34: 01d12c23 sw t4,24(sp)\n" + " 38: 01f12e23 sw t6,28(sp)\n" + " 3c: 00038613 mv a2,t2\n" + " 40: 00300693 li a3,3\n" + " 44: 00100713 li a4,1\n" + " 48: 000f00e7 jalr t5\n" + " 4c: fea12e23 sw a0,-4(sp)\n" + " 50: 00012083 lw ra,0(sp)\n" + " 54: 00412503 lw a0,4(sp)\n" + " 58: 00812583 lw a1,8(sp)\n" + " 5c: 00c12603 lw a2,12(sp)\n" + " 60: 01012303 lw t1,16(sp)\n" + " 64: 01412e03 lw t3,20(sp)\n" + " 68: 01812e83 lw t4,24(sp)\n" + " 6c: 01c12f83 lw t6,28(sp)\n" + " 70: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 1, t1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fe010113 addi sp,sp,-32\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 01c12a23 sw t3,20(sp)\n" + " 34: 01d12c23 sw t4,24(sp)\n" + " 38: 01f12e23 sw t6,28(sp)\n" + " 3c: 00038613 mv a2,t2\n" + " 40: 00100693 li a3,1\n" + " 44: 00030713 mv a4,t1\n" + " 48: 000f00e7 jalr t5\n" + " 4c: fea12e23 sw a0,-4(sp)\n" + " 50: 00012083 lw ra,0(sp)\n" + " 54: 00412503 lw a0,4(sp)\n" + " 58: 00812583 lw a1,8(sp)\n" + " 5c: 00c12603 lw a2,12(sp)\n" + " 60: 01012303 lw t1,16(sp)\n" + " 64: 01412e03 lw t3,20(sp)\n" + " 68: 01812e83 lw t4,24(sp)\n" + " 6c: 01c12f83 lw t6,28(sp)\n" + " 70: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, t1, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fe010113 addi sp,sp,-32\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 01c12a23 sw t3,20(sp)\n" + " 34: 01d12c23 sw t4,24(sp)\n" + " 38: 01f12e23 sw t6,28(sp)\n" + " 3c: 00038613 mv a2,t2\n" + " 40: 00030693 mv a3,t1\n" + " 44: 00100713 li a4,1\n" + " 48: 000f00e7 jalr t5\n" + " 4c: fea12e23 sw a0,-4(sp)\n" + " 50: 00012083 lw ra,0(sp)\n" + " 54: 00412503 lw a0,4(sp)\n" + " 58: 00812583 lw a1,8(sp)\n" + " 5c: 00c12603 lw a2,12(sp)\n" + " 60: 01012303 lw t1,16(sp)\n" + " 64: 01412e03 lw t3,20(sp)\n" + " 68: 01812e83 lw t4,24(sp)\n" + " 6c: 01c12f83 lw t6,28(sp)\n" + " 70: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t5, ResultReg) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, a1}, + [t5, a3] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 00712a23 sw t2,20(sp)\n" + " 34: 01c12c23 sw t3,24(sp)\n" + " 38: 01d12e23 sw t4,28(sp)\n" + " 3c: 03e12023 sw t5,32(sp)\n" + " 40: 03f12223 sw t6,36(sp)\n" + " 44: 00058313 mv t1,a1\n" + " 48: 000f0513 mv a0,t5\n" + " 4c: 00068593 mv a1,a3\n" + " 50: 000300e7 jalr t1\n" + " 54: 00a12423 sw a0,8(sp)\n" + " 58: 00012083 lw ra,0(sp)\n" + " 5c: 00412503 lw a0,4(sp)\n" + " 60: 00812583 lw a1,8(sp)\n" + " 64: 00c12603 lw a2,12(sp)\n" + " 68: 01012303 lw t1,16(sp)\n" + " 6c: 01412383 lw t2,20(sp)\n" + " 70: 01812e03 lw t3,24(sp)\n" + " 74: 01c12e83 lw t4,28(sp)\n" + " 78: 02012f03 lw t5,32(sp)\n" + " 7c: 02412f83 lw t6,36(sp)\n" + " 80: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {primitive, 2}, + [{free, t5}, a3] + ), + ?assertEqual(ResultReg, t5), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: 00112023 sw ra,0(sp)\n" + " 20: 00a12223 sw a0,4(sp)\n" + " 24: 00b12423 sw a1,8(sp)\n" + " 28: 00c12623 sw a2,12(sp)\n" + " 2c: 00612823 sw t1,16(sp)\n" + " 30: 00712a23 sw t2,20(sp)\n" + " 34: 01c12c23 sw t3,24(sp)\n" + " 38: 01d12e23 sw t4,28(sp)\n" + " 3c: 03f12023 sw t6,32(sp)\n" + " 40: 00862303 lw t1,8(a2)\n" + " 44: 000f0513 mv a0,t5\n" + " 48: 00068593 mv a1,a3\n" + " 4c: 000300e7 jalr t1\n" + " 50: 00050f13 mv t5,a0\n" + " 54: 00012083 lw ra,0(sp)\n" + " 58: 00412503 lw a0,4(sp)\n" + " 5c: 00812583 lw a1,8(sp)\n" + " 60: 00c12603 lw a2,12(sp)\n" + " 64: 01012303 lw t1,16(sp)\n" + " 68: 01412383 lw t2,20(sp)\n" + " 6c: 01812e03 lw t3,24(sp)\n" + " 70: 01c12e83 lw t4,28(sp)\n" + " 74: 02012f83 lw t6,32(sp)\n" + " 78: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test_() -> + [ + ?_test(begin + % Test 1: jump_to_continuation at offset 0 + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}), + Stream = ?BACKEND:stream(State1), + % Expected: riscv32 PIC sequence - simpler than ARM, no prolog/epilog needed + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 00af8fb3 add t6,t6,a0\n" + " 8: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + % Test 2: jump_to_continuation after jump table (non-zero relative address) + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Generate a jump table for 3 labels (4 entries * 8 bytes = 32 bytes) + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:jump_to_continuation(State1, {free, a0}), + Stream = ?BACKEND:stream(State2), + % Expected: jump table (32 bytes) + jump_to_continuation + % NetOffset = 0 - 32 = -32 (0xFFFFFFE0) + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 00068067 jr a3\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 00068067 jr a3\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 00068067 jr a3\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 00068067 jr a3\n" + " 20: 00000f97 auipc t6,0x0\n" + " 24: fe0f8f93 addi t6,t6,-32\n" + " 28: 00af8fb3 add t6,t6,a0\n" + " 2c: 000f8067 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +%% Mimic part of add.beam +add_beam_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}), + State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}), + State5 = ?BACKEND:call_only_or_schedule_next(State4, 2), + State6 = ?BACKEND:add_label(State5, 2), + {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [ + ctx, jit_state, 1, 0, 1 + ]), + State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + State10 = ?BACKEND:call_or_schedule_next(State9, 3), + State11 = ?BACKEND:add_label(State10, 3), + State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [ + ctx, jit_state + ]), + % OP_INT_CALL_END + State13 = ?BACKEND:add_label(State12, 0), + State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), + State15 = ?BACKEND:update_branches(State14), + Stream = ?BACKEND:stream(State15), + riscv32_helper:disassemble(Stream), + Dump = + << + % jump table (new 8-byte format) + " 0: 00000697 auipc a3,0x0\n" + " 4: 11868067 jr 280(a3)\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01868067 jr 24(a3)\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 05468067 jr 84(a3)\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 0f868067 jr 248(a3)\n" + % label 1 + % {move,{integer,9},{x,1}}. + " 20: 09f00f93 li t6,159\n" + " 24: 01f52e23 sw t6,28(a0)\n" + % {move,{integer,8},{x,0}} + " 28: 08f00f93 li t6,143\n" + " 2c: 01f52c23 sw t6,24(a0)\n" + % {call_only,2,{f,2}}. + " 30: 0085af83 lw t6,8(a1)\n" + " 34: ffff8f93 addi t6,t6,-1\n" + " 38: 01f5a423 sw t6,8(a1)\n" + " 3c: 000f8a63 beqz t6,0x50\n" + " 40: 0240006f j 0x64\n" + " 44: 00000013 nop\n" + " 48: 00000013 nop\n" + " 4c: 00000013 nop\n" + " 50: 00000f97 auipc t6,0x0\n" + " 54: 014f8f93 addi t6,t6,20\n" + " 58: 01f5a223 sw t6,4(a1)\n" + " 5c: 00862f83 lw t6,8(a2)\n" + " 60: 000f8067 jr t6\n" + % label 2 + % {allocate,1,1}. + " 64: 01462f83 lw t6,20(a2)\n" + " 68: ff010113 addi sp,sp,-16\n" + " 6c: 00112023 sw ra,0(sp)\n" + " 70: 00a12223 sw a0,4(sp)\n" + " 74: 00b12423 sw a1,8(sp)\n" + " 78: 00c12623 sw a2,12(sp)\n" + " 7c: 00100613 li a2,1\n" + " 80: 00000693 li a3,0\n" + " 84: 00100713 li a4,1\n" + " 88: 000f80e7 jalr t6\n" + " 8c: 00050f93 mv t6,a0\n" + " 90: 00012083 lw ra,0(sp)\n" + " 94: 00412503 lw a0,4(sp)\n" + " 98: 00812583 lw a1,8(sp)\n" + " 9c: 00c12603 lw a2,12(sp)\n" + " a0: 01010113 addi sp,sp,16\n" + " a4: 01ff9f13 slli t5,t6,0x1f\n" + " a8: 000f4863 bltz t5,0xb8\n" + " ac: 01862f83 lw t6,24(a2)\n" + " b0: 0b000613 li a2,176\n" + " b4: 000f8067 jr t6\n" + % {init_yregs,{list,[{y,0}]}}. + %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + " b8: 03b00f13 li t5,59\n" + " bc: 01452f83 lw t6,20(a0)\n" + " c0: 01efa023 sw t5,0(t6)\n" + % {call,1,{f,3}} + %% call_or_schedule_next(State9, 3), + " c4: 0005af03 lw t5,0(a1)\n" + " c8: 000f2f03 lw t5,0(t5)\n" + " cc: 018f1f13 slli t5,t5,0x18\n" + " d0: 44000f93 li t6,1088\n" + " d4: 01ff6f33 or t5,t5,t6\n" + " d8: 05e52e23 sw t5,92(a0)\n" + " dc: 0085af83 lw t6,8(a1)\n" + " e0: ffff8f93 addi t6,t6,-1\n" + " e4: 01f5a423 sw t6,8(a1)\n" + " e8: 000f8a63 beqz t6,0xfc\n" + " ec: 0240006f j 0x110\n" + " f0: 00000013 nop\n" + " f4: 00000013 nop\n" + " f8: 00000013 nop\n" + " fc: 00000f97 auipc t6,0x0\n" + " 100: 014f8f93 addi t6,t6,20\n" + " 104: 01f5a223 sw t6,4(a1)\n" + " 108: 00862f83 lw t6,8(a2)\n" + " 10c: 000f8067 jr t6\n" + %% (continuation) + % label 3 + " 110: 00462f83 lw t6,4(a2)\n" + " 114: 000f8067 jr t6\n" + % label 0 + " 118: 00462f83 lw t6,4(a2)\n" + " 11c: 000f8067 jr t6\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$ , Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$ , Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +%% Handle RISC-V 32-bit instructions (8 consecutive hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $ orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + %% RISC-V instructions are 32-bit little-endian + Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +%% Handle 32-bits undefined instruction (ARM format with space: "1234 5678") +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $ orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + InstrA = list_to_integer([H1, H2, H3, H4], 16), + InstrB = list_to_integer([H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<>, <> | Acc]); +%% Handle 16-bit ARM32 Thumb instructions (4 hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $ orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) +-> + %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index a289a01a6f..2d130cad03 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -31,6 +31,7 @@ start() -> jit_aarch64_asm_tests, jit_armv6m_tests, jit_armv6m_asm_tests, + jit_riscv32_tests, jit_riscv32_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests From cf179f214be2fb24157b8ca2b52ac9ecb1bbfc6f Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Wed, 15 Oct 2025 22:25:39 +0200 Subject: [PATCH 10/28] riscv32: add to workflow using a cross toolchain and libraries Signed-off-by: Paul Guyot --- .github/workflows/build-and-test.yaml | 106 +++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index 610db683b1..f78249bcaa 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -366,6 +366,19 @@ jobs: arch: "s390x" library-arch: s390x-linux-gnu + # riscv32-ilp32 build + - os: "ubuntu-24.04" + cc: "riscv32-unknown-linux-gnu-gcc" + cxx: "riscv32-unknown-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake" + compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support" + arch: "riscv32" + library-arch: riscv32-linux-gnu-ilp32 + env: ImageOS: ${{ matrix.container == 'ubuntu:20.04' && 'ubuntu20' || matrix.os == 'ubuntu-20.04' && 'ubuntu20' || matrix.os == 'ubuntu-22.04' && 'ubuntu22' || matrix.os == 'ubuntu-24.04' && 'ubuntu24' || 'ubuntu24' }} CC: ${{ matrix.cc }} @@ -386,7 +399,7 @@ jobs: run: sudo dpkg --add-architecture i386 - name: "Setup cross compilation architecture" - if: matrix.library-arch != '' + if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32' run: | sudo dpkg --add-architecture ${{ matrix.arch }} cat > ${RUNNER_TEMP}/cross-compile-sources.list <> $GITHUB_PATH + + # Install the libs + sudo dpkg -i libc6-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb + + sudo dpkg -i zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb + sudo dpkg -i zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb + + # Install mbedtls runtime packages first (in dependency order) + sudo dpkg -i libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb + # Then install the dev package + sudo dpkg -i libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb + + sudo sed -i '/Types: deb/a Architectures: amd64' /etc/apt/sources.list.d/ubuntu.sources + + cat > ${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake <<'EOF' + # Toolchain file for RISC-V32 ILP32 (RV32-IMAC) cross-compilation + set(CMAKE_SYSTEM_NAME Linux) + set(CMAKE_SYSTEM_PROCESSOR riscv32) + set(CMAKE_C_LIBRARY_ARCHITECTURE riscv32-linux-gnu-ilp32) + + # Specify the cross compiler + set(CMAKE_C_COMPILER riscv32-unknown-linux-gnu-gcc) + set(CMAKE_CXX_COMPILER riscv32-unknown-linux-gnu-g++) + + # Specify the target architecture + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=rv32imac -mabi=ilp32" CACHE STRING "" FORCE) + + # Set up paths for cross-compiled libraries + set(ZLIB_LIBRARY /usr/lib/riscv32-linux-gnu-ilp32/libz.so CACHE FILEPATH "") + set(ZLIB_INCLUDE_DIR /usr/include/riscv32-linux-gnu CACHE PATH "") + set(ZLIB_FOUND TRUE CACHE BOOL "") + + # MbedTLS configuration + set(MBEDTLS_ROOT_DIR /usr) + set(MBEDTLS_LIBRARIES_DIR /usr/lib/riscv32-linux-gnu-ilp32) + + # Add cross-compilation include path to compiler flags + include_directories(SYSTEM /usr/include/riscv32-linux-gnu) + + # Search for programs in the build host directories + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + + # Search for libraries and headers in the target directories + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + EOF + + # Set up qemu-user binfmt to find libraries + sudo ln -s /opt/riscv32-ilp32/sysroot/lib/ld-linux-riscv32-ilp32.so.1 /lib/ld-linux-riscv32-ilp32.so.1 + sudo mkdir -p /usr/gnemul + sudo ln -s /opt/riscv32-ilp32/sysroot /usr/gnemul/qemu-riscv32 + + # Copy cross-compiled libraries to sysroot for qemu-user + sudo cp /usr/lib/${{ matrix.library-arch }}/libz.so.1* /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedtls.so.14 /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedcrypto.so.7 /opt/riscv32-ilp32/sysroot/lib/ + sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedx509.so.1 /opt/riscv32-ilp32/sysroot/lib/ + + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: "APT update" run: sudo apt update -y From d8fbbe337c0d3914da4ad0edd29889d313f4581b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 18 Oct 2025 19:09:59 +0200 Subject: [PATCH 11/28] riscv32: add to precompiled targets and test infrastructure - Add riscv32 to AVM_PRECOMPILED_TARGETS - Enable ENABLE_TRACE for debugging - Add riscv32 directory handling in test.c Signed-off-by: Paul Guyot --- CMakeLists.txt | 2 +- src/libAtomVM/jit.c | 2 +- src/libAtomVM/opcodesswitch.h | 2 +- tests/test.c | 5 +++++ 4 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 32484ee851..307917422f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -64,7 +64,7 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32;riscv32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index f1191ba11e..1141466791 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -39,7 +39,7 @@ #include #include -// #define ENABLE_TRACE +#define ENABLE_TRACE #include "trace.h" // Verify matching atom index in default_atoms.hrl diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index f18d892838..f8e15c831e 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -43,7 +43,7 @@ #include "stacktrace.h" #endif -//#define ENABLE_TRACE +#define ENABLE_TRACE #include "trace.h" // These constants can be used to reduce the size of the VM for a specific diff --git a/tests/test.c b/tests/test.c index 577572c967..5db01bfff0 100644 --- a/tests/test.c +++ b/tests/test.c @@ -713,6 +713,11 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) perror("Error: cannot find armv6m directory"); return EXIT_FAILURE; } +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + if (chdir("riscv32") != 0) { + perror("Error: cannot find riscv32 directory"); + return EXIT_FAILURE; + } #else #error Unknown JIT target #endif From 620832305f3f59b538f610554c7356ffa2906d3f Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 15:40:59 +0200 Subject: [PATCH 12/28] riscv32: remove unused literal pool logic Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 44 ++++-------------------------------- 1 file changed, 5 insertions(+), 39 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 56887fb064..b0f3ba0b62 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -166,8 +166,7 @@ available_regs :: [riscv32_register()], used_regs :: [riscv32_register()], labels :: [{integer() | reference(), integer()}], - variant :: non_neg_integer(), - literal_pool :: [{non_neg_integer(), riscv32_register(), non_neg_integer()}] + variant :: non_neg_integer() }). -type state() :: #state{}. @@ -275,8 +274,7 @@ new(Variant, StreamModule, Stream) -> available_regs = ?AVAILABLE_REGS, used_regs = [], labels = [], - variant = Variant, - literal_pool = [] + variant = Variant }. %%----------------------------------------------------------------------------- @@ -630,8 +628,7 @@ call_primitive_last( State2 = set_registers_args(State1, ArgsForTailCall, 0), tail_call_with_jit_state_registers_only(State2, Temp) end, - State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, - flush_literal_pool(State5). + State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. %%----------------------------------------------------------------------------- %% @doc Tail call to address in register. @@ -708,15 +705,13 @@ jump_to_label( Offset = StreamModule:offset(Stream0), {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), Stream1 = StreamModule:append(Stream0, CodeBlock), - State2 = State1#state{stream = Stream1}, - flush_literal_pool(State2). + State1#state{stream = Stream1}. jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> Offset = StreamModule:offset(Stream0), CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), Stream1 = StreamModule:append(Stream0, CodeBlock), - State2 = State#state{stream = Stream1}, - flush_literal_pool(State2). + State#state{stream = Stream1}. %%----------------------------------------------------------------------------- %% @doc Jump to address in continuation pointer register @@ -2531,35 +2526,6 @@ mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Re Stream1 = StreamModule:append(Stream0, I), State#state{stream = Stream1}. -flush_literal_pool(#state{literal_pool = []} = State) -> - State; -flush_literal_pool( - #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State -) -> - % Align - Offset = StreamModule:offset(Stream0), - Stream1 = - if - Offset rem 4 =:= 0 -> Stream0; - true -> StreamModule:append(Stream0, <<0:16>>) - end, - % Lay all values and update ldr instructions - Stream2 = lists:foldl( - fun({LdrInstructionAddr, Reg, Val}, AccStream) -> - LiteralPosition = StreamModule:offset(AccStream), - LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, - LiteralOffset = LiteralPosition - LdrPC, - LdrInstruction = jit_riscv32_asm:lw(Reg, pc, LiteralOffset), - AccStream1 = StreamModule:append(AccStream, <>), - StreamModule:replace( - AccStream1, LdrInstructionAddr, LdrInstruction - ) - end, - Stream1, - lists:reverse(LP) - ), - State#state{stream = Stream2, literal_pool = []}. - sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when Val >= 0 andalso Val =< 255 -> From 288e87429f9468138e7d5e2c9c001d670d061e32 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 15:41:33 +0200 Subject: [PATCH 13/28] Aarch64: add tests for better coverage Signed-off-by: Paul Guyot --- tests/libs/jit/jit_aarch64_tests.erl | 333 +++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 00e5de8bfe..6d2a937e53 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -88,6 +88,51 @@ call_primitive_2_args_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401447 ldr x7, [x2, #40]\n" + " 4: d2800202 mov x2, #0x10 // #16\n" + " 8: d2800403 mov x3, #0x20 // #32\n" + " c: d2800044 mov x4, #0x2 // #2\n" + " 10: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 8: f9401c08 ldr x8, [x0, #56]\n" + " c: f940b850 ldr x16, [x2, #368]\n" + " 10: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 14: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 18: aa0703e2 mov x2, x7\n" + " 1c: d2800803 mov x3, #0x40 // #64\n" + " 20: d2800104 mov x4, #0x8 // #8\n" + " 24: aa0803e5 mov x5, x8\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_primitive_extended_regs_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), @@ -146,6 +191,44 @@ call_primitive_extended_regs_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, 1), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, 2), + {State3, Reg3} = ?BACKEND:move_to_native_register(State2, 3), + {State4, Reg4} = ?BACKEND:move_to_native_register(State3, 4), + {State5, Reg5} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + Reg2, Reg1, {free, Reg4}, Reg3, {free, Reg5} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, Reg2, Reg1, Reg3]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: d2800027 mov x7, #0x1 // #1\n" + " 4: d2800048 mov x8, #0x2 // #2\n" + " 8: d2800069 mov x9, #0x3 // #3\n" + " c: d280008a mov x10, #0x4 // #4\n" + " 10: d28000ab mov x11, #0x5 // #5\n" + " 14: f940e450 ldr x16, [x2, #456]\n" + " 18: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 1c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 20: a9bf23e9 stp x9, x8, [sp, #-16]!\n" + " 24: f81f0fe7 str x7, [sp, #-16]!\n" + " 28: aa0803e0 mov x0, x8\n" + " 2c: aa0703e1 mov x1, x7\n" + " 30: aa0a03e2 mov x2, x10\n" + " 34: aa0903e3 mov x3, x9\n" + " 38: aa0b03e4 mov x4, x11\n" + " 3c: d63f0200 blr x16\n" + " 40: aa0003ea mov x10, x0\n" + " 44: f84107e7 ldr x7, [sp], #16\n" + " 48: a8c123e9 ldp x9, x8, [sp], #16\n" + " 4c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 50: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -168,6 +251,23 @@ call_ext_only_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9404c48 ldr x8, [x2, #152]\n" + " 8: d2800102 mov x2, #0x8 // #8\n" + " c: d2805963 mov x3, #0x2cb // #715\n" + " 10: aa0703e4 mov x4, x7\n" + " 14: d61f0100 br x8" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_last_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -1038,6 +1138,179 @@ is_boolean_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test OP_WAIT_TIMEOUT pattern +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = << + " 0: 100000e7 adr x7, 0x1c\n" + " 4: f9000427 str x7, [x1, #8]\n" + " 8: d2827107 mov x7, #0x1388 // #5000\n" + " c: f9407848 ldr x8, [x2, #240]\n" + " 10: aa0703e2 mov x2, x7\n" + " 14: d2800543 mov x3, #0x2a // #42\n" + " 18: d61f0100 br x8\n" + " 1c: f9405450 ldr x16, [x2, #168]\n" + " 20: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 24: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16\n" + " 38: eb0000ff cmp x7, x0\n" + " 3c: 54000060 b.eq 0x48 // b.none\n" + " 40: aa0703e0 mov x0, x7\n" + " 44: d65f03c0 ret\n" + " 48: f9408450 ldr x16, [x2, #264]\n" + " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 54: d2800041 mov x1, #0x2 // #2\n" + " 58: d63f0200 blr x16\n" + " 5c: aa0003e7 mov x7, x0\n" + " 60: a8c10be1 ldp x1, x2, [sp], #16\n" + " 64: a8c103fe ldp x30, x0, [sp], #16\n" + " 68: b5000087 cbnz x7, 0x78\n" + " 6c: f9407c47 ldr x7, [x2, #248]\n" + " 70: d2800542 mov x2, #0x2a // #42\n" + " 74: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000000 b 0x4\n" + " 8: 14000000 b 0x8\n" + " c: 14000000 b 0xc\n" + " 10: 14000000 b 0x10\n" + " 14: 14000000 b 0x14\n" + " 18: 10000007 adr x7, 0x18\n" + " 1c: f9000427 str x7, [x1, #8]\n" + " 20: f9407447 ldr x7, [x2, #232]\n" + " 24: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated adr + ret + labels table + lines table + % adr = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Total minimum: 36 bytes + ?assert(byte_size(Stream) >= 36), + + % Expected: adr x0, #8 + ret + labels table + lines table + % The data tables start at offset 0x8, so we load PC + 8 into x0 + Dump = << + " 0: 10000040 adr x0, 0x8\n" + " 4: d65f03c0 ret\n" + " 8: 01000200 .word 0x01000200\n" + " c: 10000000 adr x0, 0xc\n" + " 10: 00000200 .word 0x00000200\n" + " 14: 02002000 .word 0x02002000\n" + " 18: 00000a00 .word 0x00000a00\n" + " 1c: 14001000 .word 0x14001000\n" + " 20: 20000000 .word 0x20000000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9402050 ldr x16, [x2, #64]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e0 mov x0, x1\n" + " 10: d2800541 mov x1, #0x2a // #42\n" + " 14: d63f0200 blr x16\n" + " 18: aa0003e7 mov x7, x0\n" + " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 20: a8c103fe ldp x30, x0, [sp], #16\n" + " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 2c: d2800001 mov x1, #0x0 // #0\n" + " 30: d2800062 mov x2, #0x3 // #3\n" + " 34: f9401403 ldr x3, [x0, #40]\n" + " 38: f9400063 ldr x3, [x3]\n" + " 3c: f9401804 ldr x4, [x0, #48]\n" + " 40: d63f00e0 blr x7\n" + " 44: aa0003e7 mov x7, x0\n" + " 48: a8c10be1 ldp x1, x2, [sp], #16\n" + " 4c: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in r1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, r1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f940b050 ldr x16, [x2, #352]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e2 mov x2, x1\n" + " 10: d2800083 mov x3, #0x4 // #4\n" + " 14: d2800024 mov x4, #0x1 // #1\n" + " 18: d63f0200 blr x16\n" + " 1c: aa0003e7 mov x7, x0\n" + " 20: a8c10be1 ldp x1, x2, [sp], #16\n" + " 24: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -1662,6 +1935,66 @@ move_to_native_register_test_() -> ] end}. +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, r2, 2, << + " 0: 91000842 add x2, x2, #0x2" + >>) + end), + ?_test(begin + add_test0(State0, r2, 256, << + " 0: 91040042 add x2, x2, #0x100" + >>) + end), + ?_test(begin + add_test0(State0, r2, r3, << + " 0: 8b030042 add x2, x2, x3" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, r2, 2, << + " 0: d1000842 sub x2, x2, #0x2" + >>) + end), + ?_test(begin + sub_test0(State0, r2, 256, << + " 0: d1040042 sub x2, x2, #0x100" + >>) + end), + ?_test(begin + sub_test0(State0, r2, r3, << + " 0: cb030042 sub x2, x2, x3" + >>) + end) + ] + end}. + mul_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:mul(State0, Reg, Imm), Stream = ?BACKEND:stream(State1), From 04578e12be2c56a2470a53447ee5b49037fa87af Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 17:08:52 +0200 Subject: [PATCH 14/28] riscv32: fix several backend bugs Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 148 +++++---- src/libAtomVM/jit.c | 2 +- src/libAtomVM/opcodesswitch.h | 2 +- tests/libs/jit/jit_riscv32_tests.erl | 464 +++++++++++++-------------- 4 files changed, 310 insertions(+), 306 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index b0f3ba0b62..83fed2585c 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -468,19 +468,13 @@ update_branches( >>, <>; true -> - % Keep far branch sequence: auipc + lw + jalr + data - % RISC-V far branch is always 16 bytes - case Size of - 16 -> - % 16-byte sequence: auipc + lw + jalr + data - I1 = jit_riscv32_asm:auipc(TempReg, 0), - I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), - I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), - % Calculate absolute target address - TargetAddress = LabelOffset, - I4 = <>, - <> - end + % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes) + % Split the relative offset into upper 20 bits and lower 12 bits + Hi20 = (Rel + 16#800) bsr 12, + Lo12 = Rel - (Hi20 bsl 12), + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + <> end; jump_table_auipc_jalr -> % Calculate PC-relative offset from AUIPC instruction to target @@ -679,7 +673,8 @@ return_if_not_equal_to_ctx( end, I3 = jit_riscv32_asm:ret(), % Branch if equal (skip the return) - I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, byte_size(I2) + byte_size(I3)), + % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3 + I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)), Stream1 = StreamModule:append(Stream0, <>), {AvailableRegs1, UsedRegs1} = free_reg( AvailableRegs0, UsedRegs0, Reg @@ -754,20 +749,30 @@ branch_to_offset_code(_State, Offset, TargetOffset) when Rel = TargetOffset - Offset, jit_riscv32_asm:j(Rel); branch_to_offset_code( - #state{available_regs = [TempReg | _]}, _Offset, TargetOffset + #state{available_regs = [TempReg | _]}, Offset, TargetOffset ) -> - % Far branch: use auipc + lw + jalr sequence (RISC-V) - % This creates a PC-relative load sequence - always 16 bytes (4-byte aligned) + % Far branch: use auipc + jalr sequence for PC-relative addressing + % This computes: PC + Immediate and jumps to it - % TempReg = PC - I1 = jit_riscv32_asm:auipc(TempReg, 0), - % TempReg = *(PC+8) - I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), - % Jump to TempReg - I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), - % The literal value is the absolute target offset - I4 = <>, - <>. + Rel = TargetOffset - Offset, + % Split the relative offset into upper 20 bits and lower 12 bits + % RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12) + % Since jalr's imm12 is sign-extended, if bit 11 of Rel is set, + % we need to add 0x800 before splitting to compensate + Hi20 = (Rel + 16#800) bsr 12, + Lo12Unsigned = Rel band 16#FFF, + % Convert to signed 12-bit value: if bit 11 is set, subtract 4096 + Lo12 = + if + Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000; + true -> Lo12Unsigned + end, + + % TempReg = PC + (Hi20 << 12) + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + % Jump to TempReg + sign_extend(Lo12) + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + <>. branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), @@ -775,17 +780,13 @@ branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> branch_to_label_code( #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false ) -> - % RISC-V: Far branch sequence - always 16 bytes (4-byte aligned) + % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) - % Load PC into temp + % Placeholder: auipc TempReg, 0 I1 = jit_riscv32_asm:auipc(TempReg, 0), - % Load offset from PC+8 - I2 = jit_riscv32_asm:lw(TempReg, TempReg, 8), - % Jump to address - I3 = jit_riscv32_asm:jalr(zero, TempReg, 0), - % Placeholder offset - I4 = <<0:32/little>>, - CodeBlock = <>, + % Placeholder: jalr zero, TempReg, 0 + I2 = jit_riscv32_asm:jalr(zero, TempReg, 0), + CodeBlock = <>, SequenceSize = byte_size(CodeBlock), % Add relocation entry Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, @@ -795,17 +796,13 @@ branch_to_label_code( #state{available_regs = [], branches = Branches} = State0, Offset, Label, false ) -> % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) - % Same sequence as when we have available regs - always 16 bytes (4-byte aligned) + % Far branch sequence using PC-relative auipc + jalr (8 bytes) - % Load PC into t6 + % Placeholder: auipc t6, 0 I1 = jit_riscv32_asm:auipc(t6, 0), - % Load offset from PC+8 - I2 = jit_riscv32_asm:lw(t6, t6, 8), - % Jump to address - I3 = jit_riscv32_asm:jalr(zero, t6, 0), - % Placeholder offset - I4 = <<0:32/little>>, - CodeBlock = <>, + % Placeholder: jalr zero, t6, 0 + I2 = jit_riscv32_asm:jalr(zero, t6, 0), + CodeBlock = <>, SequenceSize = byte_size(CodeBlock), % Add relocation entry Reloc = {Label, Offset, {far_branch, SequenceSize, t6}}, @@ -1528,9 +1525,17 @@ call_func_ptr( % Calculate stack offset: find register index in SavedRegs * 4 bytes ResultReg = element(2, FuncPtrTuple), RegIndex = index_of(ResultReg, SavedRegs), - StoreResultStackOffset = RegIndex * 4, - StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), - {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + case RegIndex >= 0 of + true -> + StoreResultStackOffset = RegIndex * 4, + StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + false -> + % FuncPtrReg was not in SavedRegs, use an available register + [ResultReg1 | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg1, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]} + end; _ -> % Use any free that is not in SavedRegs [ResultReg | _] = AvailableRegs1 -- SavedRegs, @@ -1632,8 +1637,8 @@ parameter_regs0([], _, Acc) -> lists:reverse(Acc); parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) -> parameter_regs0(T, Rest, [a1, a0 | Acc]); -parameter_regs0([{avm_int64_t, _} | T], [a1, a2, a3 | Rest], Acc) -> - parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) -> + parameter_regs0(T, Rest, [a2, a1 | Acc]); parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) -> parameter_regs0(T, Rest, [a3, a2 | Acc]); parameter_regs0([_Other | T], [Reg | Rest], Acc) -> @@ -2637,7 +2642,9 @@ decrement_reductions_and_maybe_schedule_next( I4 = jit_riscv32_asm:bne(Temp, zero, 0), % Set continuation to the next instruction ADROffset = BNEOffset + byte_size(I4), - I5 = pc_relative_address(Temp, 0), + % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address + % This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes) + I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), % Append the instructions to the stream Stream2 = StreamModule:append(Stream1, <>), @@ -2647,7 +2654,17 @@ decrement_reductions_and_maybe_schedule_next( #state{stream = Stream3} = State2, NewOffset = StreamModule:offset(Stream3), NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset), - NewI5 = pc_relative_address(Temp, NewOffset - ADROffset), + NewI5Offset = NewOffset - ADROffset, + % Generate the new pc_relative_address instruction, padding with NOP if needed + NewI5 = + case pc_relative_address(Temp, NewI5Offset) of + I when byte_size(I) =:= 4 -> + % Only auipc, pad with NOP + <>; + I when byte_size(I) =:= 8 -> + % auipc + addi, no padding needed + I + end, Stream4 = StreamModule:replace( Stream3, BNEOffset, <> ), @@ -2753,17 +2770,12 @@ set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State % Reserve space for offset load instruction % li can generate 1 instruction (4 bytes) for small immediates (< 2048) % or 2 instructions (8 bytes) for large immediates - % Since we use (offset bsl 2), threshold is when offset >= 512 bytes - % To be safe, use same threshold as AArch64 relative to instruction encoding limits - {I2, I3} = - if - Offset >= 512 -> - % Need 2 instructions (lui + addi) for large offsets - {jit_riscv32_asm:nop(), jit_riscv32_asm:nop()}; - true -> - % Need 1 instruction (addi) for small offsets - {jit_riscv32_asm:nop(), <<>>} - end, + % Since we don't know the final CP value yet (it depends on code size), + % we must always reserve 2 instructions (8 bytes) to be safe + % The final CP value is (final_offset << 2), and final_offset is unknown + % Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0) + I2 = <<16#FFFFFFFF:32/little>>, + I3 = <<16#FFFFFFFF:32/little>>, MOVOffset = Offset + byte_size(I1), % OR the module index with the offset (loaded in temp register) I4 = jit_riscv32_asm:or_(Reg, TempReg), @@ -2783,8 +2795,16 @@ rewrite_cp_offset( TempReg ) -> NewOffset = StreamModule:offset(Stream0) - CodeOffset, - NewMoveInstr = jit_riscv32_asm:li(TempReg, NewOffset bsl 2), - Stream1 = StreamModule:replace(Stream0, RewriteOffset, NewMoveInstr), + CPValue = NewOffset bsl 2, + NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue), + % We reserved 8 bytes (2 instructions) for the CP value + % If li generates only 4 bytes, pad with a NOP to maintain alignment + PaddedInstr = + case byte_size(NewMoveInstr) of + 4 -> <>; + 8 -> NewMoveInstr + end, + Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), State0#state{stream = Stream1}. set_bs( diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 1141466791..e10abc09a5 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -39,7 +39,7 @@ #include #include -#define ENABLE_TRACE +//#define ENABLE_TRACE #include "trace.h" // Verify matching atom index in default_atoms.hrl diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index f8e15c831e..f18d892838 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -43,7 +43,7 @@ #include "stacktrace.h" #endif -#define ENABLE_TRACE +//#define ENABLE_TRACE #include "trace.h" // These constants can be used to reduce the size of the VM for a specific diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 4a4fba5593..475e96bd5d 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -279,7 +279,7 @@ call_primitive_few_free_regs_test() -> " 4c: 000e8693 mv a3,t4\n" " 50: 00038713 mv a4,t2\n" " 54: 000300e7 jalr t1\n" - " 58: fea12e23 sw a0,-4(sp)\n" + " 58: 00050313 mv t1,a0\n" " 5c: 00012083 lw ra,0(sp)\n" " 60: 00412503 lw a0,4(sp)\n" " 64: 00812583 lw a1,8(sp)\n" @@ -300,17 +300,18 @@ call_ext_only_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 01062f83 lw t6,16(a2)\n" - " 24: 02400613 li a2,36\n" - " 28: 00200693 li a3,2\n" - " 2c: 00200713 li a4,2\n" - " 30: fff00793 li a5,-1\n" - " 34: 000f8067 jr t6" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 01062f83 lw t6,16(a2)\n" + " 28: 02800613 li a2,40\n" + " 2c: 00200693 li a3,2\n" + " 30: 00200713 li a4,2\n" + " 34: fff00793 li a5,-1\n" + " 38: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -340,17 +341,18 @@ call_ext_last_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 01062f83 lw t6,16(a2)\n" - " 24: 02400613 li a2,36\n" - " 28: 00200693 li a3,2\n" - " 2c: 00200713 li a4,2\n" - " 30: 00a00793 li a5,10\n" - " 34: 000f8067 jr t6" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 01062f83 lw t6,16(a2)\n" + " 28: 02800613 li a2,40\n" + " 2c: 00200693 li a3,2\n" + " 30: 00200713 li a4,2\n" + " 34: 00a00793 li a5,10\n" + " 38: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -397,7 +399,7 @@ return_if_not_equal_to_ctx_test_() -> " 28: 00812583 lw a1,8(sp)\n" " 2c: 00c12603 lw a2,12(sp)\n" " 30: 01010113 addi sp,sp,16\n" - " 34: 00af8463 beq t6,a0,0x3c\n" + " 34: 00af8663 beq t6,a0,0x40\n" " 38: 000f8513 mv a0,t6\n" " 3c: 00008067 ret" >>, @@ -430,7 +432,7 @@ return_if_not_equal_to_ctx_test_() -> " 2c: 00c12603 lw a2,12(sp)\n" " 30: 01010113 addi sp,sp,16\n" " 34: 000f8f13 mv t5,t6\n" - " 38: 00af0463 beq t5,a0,0x40\n" + " 38: 00af0663 beq t5,a0,0x44\n" " 3c: 000f0513 mv a0,t5\n" " 40: 00008067 ret" >>, @@ -1159,28 +1161,26 @@ call_only_or_schedule_next_and_label_relocation_test() -> Dump = << " 0: 00000697 auipc a3,0x0\n" - " 4: 05468067 jr 84(a3)\n" + " 4: 04c68067 jr 76(a3) # 0x4c\n" " 8: 00000697 auipc a3,0x0\n" - " c: 01068067 jr 16(a3)\n" + " c: 01068067 jr 16(a3) # 0x18\n" " 10: 00000697 auipc a3,0x0\n" - " 14: 03c68067 jr 60(a3)\n" + " 14: 03468067 jr 52(a3) # 0x44\n" " 18: 0085af83 lw t6,8(a1)\n" " 1c: ffff8f93 addi t6,t6,-1\n" " 20: 01f5a423 sw t6,8(a1)\n" - " 24: 000f8a63 beqz t6,0x38\n" - " 28: 0240006f j 0x4c\n" + " 24: 000f8663 beqz t6,0x30\n" + " 28: 01c0006f j 0x44\n" " 2c: 00000013 nop\n" - " 30: 00000013 nop\n" - " 34: 00000013 nop\n" - " 38: 00000f97 auipc t6,0x0\n" - " 3c: 014f8f93 addi t6,t6,20\n" - " 40: 01f5a223 sw t6,4(a1)\n" - " 44: 00862f83 lw t6,8(a2)\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 014f8f93 addi t6,t6,20 # 0x44\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 00862f83 lw t6,8(a2)\n" + " 40: 000f8067 jr t6\n" + " 44: 00062f83 lw t6,0(a2)\n" " 48: 000f8067 jr t6\n" - " 4c: 00062f83 lw t6,0(a2)\n" - " 50: 000f8067 jr t6\n" - " 54: 00462f83 lw t6,4(a2)\n" - " 58: 000f8067 jr t6" + " 4c: 00462f83 lw t6,4(a2)\n" + " 50: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1213,20 +1213,18 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> " 218: 0085af83 lw t6,8(a1)\n" " 21c: ffff8f93 addi t6,t6,-1\n" " 220: 01f5a423 sw t6,8(a1)\n" - " 224: 000f8a63 beqz t6,0x238\n" - " 228: 0240006f j 0x24c\n" + " 224: 000f8663 beqz t6,0x230\n" + " 228: 01c0006f j 0x244\n" " 22c: 00000013 nop\n" - " 230: 00000013 nop\n" - " 234: 00000013 nop\n" - " 238: 00000f97 auipc t6,0x0\n" - " 23c: 014f8f93 addi t6,t6,20\n" - " 240: 01f5a223 sw t6,4(a1)\n" - " 244: 00862f83 lw t6,8(a2)\n" + " 230: 00000f97 auipc t6,0x0\n" + " 234: 014f8f93 addi t6,t6,20 # 0x244\n" + " 238: 01f5a223 sw t6,4(a1)\n" + " 23c: 00862f83 lw t6,8(a2)\n" + " 240: 000f8067 jr t6\n" + " 244: 00062f83 lw t6,0(a2)\n" " 248: 000f8067 jr t6\n" - " 24c: 00062f83 lw t6,0(a2)\n" - " 250: 000f8067 jr t6\n" - " 254: 00462f83 lw t6,4(a2)\n" - " 258: 000f8067 jr t6" + " 24c: 00462f83 lw t6,4(a2)\n" + " 250: 000f8067 jr t6" >>, {_, RelevantBinary} = split_binary(Stream, 16#218), ?assertEqual(dump_to_bin(Dump), RelevantBinary). @@ -1358,28 +1356,24 @@ is_integer_test() -> " 0: 01852f83 lw t6,24(a0)\n" " 4: ffffcf13 not t5,t6\n" " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 040f0c63 beqz t5,0x64\n" + " c: 040f0463 beqz t5,0x54\n" " 10: 000f8f13 mv t5,t6\n" " 14: 00300e93 li t4,3\n" " 18: 01df7f33 and t5,t5,t4\n" " 1c: 00200e93 li t4,2\n" - " 20: 01df0a63 beq t5,t4,0x34\n" + " 20: 01df0663 beq t5,t4,0x2c\n" " 24: 0dc0006f j 0x100\n" " 28: 00000013 nop\n" - " 2c: 00000013 nop\n" - " 30: 00000013 nop\n" - " 34: 00300f13 li t5,3\n" - " 38: ffff4f13 not t5,t5\n" - " 3c: 01efffb3 and t6,t6,t5\n" - " 40: 000faf83 lw t6,0(t6)\n" - " 44: 03f00f13 li t5,63\n" - " 48: 01efffb3 and t6,t6,t5\n" - " 4c: 00800f13 li t5,8\n" - " 50: 01ef8a63 beq t6,t5,0x64\n" - " 54: 0ac0006f j 0x100\n" - " 58: 00000013 nop\n" - " 5c: 00000013 nop\n" - " 60: 00000013 nop" + " 2c: 00300f13 li t5,3\n" + " 30: ffff4f13 not t5,t5\n" + " 34: 01efffb3 and t6,t6,t5\n" + " 38: 000faf83 lw t6,0(t6)\n" + " 3c: 03f00f13 li t5,63\n" + " 40: 01efffb3 and t6,t6,t5\n" + " 44: 00800f13 li t5,8\n" + " 48: 01ef8663 beq t6,t5,0x54\n" + " 4c: 0b40006f j 0x100\n" + " 50: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1421,33 +1415,29 @@ is_number_test() -> " 0: 01852f83 lw t6,24(a0)\n" " 4: ffffcf13 not t5,t6\n" " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 060f0663 beqz t5,0x78\n" + " c: 040f0e63 beqz t5,0x68\n" " 10: 000f8f13 mv t5,t6\n" " 14: 00300e93 li t4,3\n" " 18: 01df7f33 and t5,t5,t4\n" " 1c: 00200e93 li t4,2\n" - " 20: 01df0a63 beq t5,t4,0x34\n" + " 20: 01df0663 beq t5,t4,0x2c\n" " 24: 0dc0006f j 0x100\n" " 28: 00000013 nop\n" - " 2c: 00000013 nop\n" - " 30: 00000013 nop\n" - " 34: 00300f13 li t5,3\n" - " 38: ffff4f13 not t5,t5\n" - " 3c: 01efffb3 and t6,t6,t5\n" - " 40: 000faf83 lw t6,0(t6)\n" - " 44: 000f8f13 mv t5,t6\n" - " 48: 03f00e93 li t4,63\n" - " 4c: 01df7f33 and t5,t5,t4\n" - " 50: 00800e93 li t4,8\n" - " 54: 03df0263 beq t5,t4,0x78\n" - " 58: 03f00f13 li t5,63\n" - " 5c: 01efffb3 and t6,t6,t5\n" - " 60: 01800f13 li t5,24\n" - " 64: 01ef8a63 beq t6,t5,0x78\n" - " 68: 0980006f j 0x100\n" - " 6c: 00000013 nop\n" - " 70: 00000013 nop\n" - " 74: 00000013 nop" + " 2c: 00300f13 li t5,3\n" + " 30: ffff4f13 not t5,t5\n" + " 34: 01efffb3 and t6,t6,t5\n" + " 38: 000faf83 lw t6,0(t6)\n" + " 3c: 000f8f13 mv t5,t6\n" + " 40: 03f00e93 li t4,63\n" + " 44: 01df7f33 and t5,t5,t4\n" + " 48: 00800e93 li t4,8\n" + " 4c: 01df0e63 beq t5,t4,0x68\n" + " 50: 03f00f13 li t5,63\n" + " 54: 01efffb3 and t6,t6,t5\n" + " 58: 01800f13 li t5,24\n" + " 5c: 01ef8663 beq t6,t5,0x68\n" + " 60: 0a00006f j 0x100\n" + " 64: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1466,15 +1456,13 @@ is_boolean_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8e63 beq t6,t5,0x24\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8a63 beq t6,t5,0x24\n" - " 14: 0ec0006f j 0x100\n" - " 18: 00000013 nop\n" - " 1c: 00000013 nop\n" - " 20: 00000013 nop" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8a63 beq t6,t5,0x1c\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8663 beq t6,t5,0x1c\n" + " 14: 0ec0006f j 0x100\n" + " 18: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1493,15 +1481,13 @@ is_boolean_far_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8e63 beq t6,t5,0x24\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8a63 beq t6,t5,0x24\n" - " 14: 7ed0006f j 0x1000\n" - " 18: 00000013 nop\n" - " 1c: 00000013 nop\n" - " 20: 00000013 nop" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8a63 beq t6,t5,0x1c\n" + " c: 00b00f13 li t5,11\n" + " 10: 01ef8663 beq t6,t5,0x1c\n" + " 14: 7ed0006f j 0x1000\n" + " 18: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1522,13 +1508,11 @@ is_boolean_far_known_test() -> Dump = << " 0: 01852f83 lw t6,24(a0)\n" " 4: 04b00f13 li t5,75\n" - " 8: 01ef8e63 beq t6,t5,0x24\n" + " 8: 01ef8a63 beq t6,t5,0x1c\n" " c: 00b00f13 li t5,11\n" - " 10: 01ef8a63 beq t6,t5,0x24\n" - " 14: 00000f17 auipc t5,0x0\n" - " 18: 008f2f03 lw t5,8(t5)\n" - " 1c: 000f0067 jr t5\n" - " 20: 00001000 .word 0x00001000" + " 10: 01ef8663 beq t6,t5,0x1c\n" + " 14: 00001f17 auipc t5,0x1\n" + " 18: fecf0067 jr -20(t5) # 0x1000" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1581,7 +1565,7 @@ wait_timeout_test() -> " 4c: 00812583 lw a1,8(sp)\n" " 50: 00c12603 lw a2,12(sp)\n" " 54: 01010113 addi sp,sp,16\n" - " 58: 00af8463 beq t6,a0,0x60\n" + " 58: 00af8663 beq t6,a0,0x60\n" " 5c: 000f8513 mv a0,t6\n" " 60: 00008067 ret\n" " 64: 08400f93 li t6,132\n" @@ -1767,22 +1751,24 @@ call_ext_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 0005af03 lw t5,0(a1)\n" - " 24: 000f2f03 lw t5,0(t5)\n" - " 28: 018f1f13 slli t5,t5,0x18\n" - " 2c: 13000f93 li t6,304\n" - " 30: 01ff6f33 or t5,t5,t6\n" - " 34: 05e52e23 sw t5,92(a0)\n" - " 38: 01062f83 lw t6,16(a2)\n" - " 3c: 00200613 li a2,2\n" - " 40: 00500693 li a3,5\n" - " 44: fff00713 li a4,-1\n" - " 48: 000f8067 jr t6" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 0005af03 lw t5,0(a1)\n" + " 28: 000f2f03 lw t5,0(t5)\n" + " 2c: 018f1f13 slli t5,t5,0x18\n" + " 30: 15000f93 li t6,336\n" + " 34: 00000013 nop\n" + " 38: 01ff6f33 or t5,t5,t6\n" + " 3c: 05e52e23 sw t5,92(a0)\n" + " 40: 01062f83 lw t6,16(a2)\n" + " 44: 00200613 li a2,2\n" + " 48: 00500693 li a3,5\n" + " 4c: fff00713 li a4,-1\n" + " 50: 000f8067 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1819,49 +1805,51 @@ call_fun_test() -> " 0: 0085af83 lw t6,8(a1)\n" " 4: ffff8f93 addi t6,t6,-1\n" " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9a63 bnez t6,0x20\n" + " c: 000f9c63 bnez t6,0x24\n" " 10: 00000f97 auipc t6,0x0\n" - " 14: 010f8f93 addi t6,t6,16\n" - " 18: 00862f83 lw t6,8(a2)\n" - " 1c: 000f8067 jr t6\n" - " 20: 01852f83 lw t6,24(a0)\n" - " 24: 000f8f13 mv t5,t6\n" - " 28: 000f0e93 mv t4,t5\n" - " 2c: 00300e13 li t3,3\n" - " 30: 01cefeb3 and t4,t4,t3\n" - " 34: 00200e13 li t3,2\n" - " 38: 01ce8c63 beq t4,t3,0x50\n" - " 3c: 04c62f83 lw t6,76(a2)\n" - " 40: 04000613 li a2,64\n" - " 44: 18b00693 li a3,395\n" - " 48: 000f0713 mv a4,t5\n" - " 4c: 000f8067 jr t6\n" - " 50: 00300e93 li t4,3\n" - " 54: fffece93 not t4,t4\n" - " 58: 01df7f33 and t5,t5,t4\n" - " 5c: 000f2f03 lw t5,0(t5)\n" - " 60: 000f0e93 mv t4,t5\n" - " 64: 03f00e13 li t3,63\n" - " 68: 01cefeb3 and t4,t4,t3\n" - " 6c: 01400e13 li t3,20\n" - " 70: 01ce8c63 beq t4,t3,0x88\n" - " 74: 04c62f83 lw t6,76(a2)\n" - " 78: 07800613 li a2,120\n" - " 7c: 18b00693 li a3,395\n" - " 80: 000f0713 mv a4,t5\n" - " 84: 000f8067 jr t6\n" - " 88: 0005ae83 lw t4,0(a1)\n" - " 8c: 000eae83 lw t4,0(t4)\n" - " 90: 018e9e93 slli t4,t4,0x18\n" - " 94: 2e000f13 li t5,736\n" - " 98: 01eeeeb3 or t4,t4,t5\n" - " 9c: 05d52e23 sw t4,92(a0)\n" - " a0: 08000f13 li t5,128\n" - " a4: 00cf0f33 add t5,t5,a2\n" - " a8: 000f2f03 lw t5,0(t5)\n" - " ac: 000f8613 mv a2,t6\n" - " b0: 00000693 li a3,0\n" - " b4: 000f0067 jr t5" + " 14: 014f8f93 addi t6,t6,20 # 0x24\n" + " 18: 01f5a223 sw t6,4(a1)\n" + " 1c: 00862f83 lw t6,8(a2)\n" + " 20: 000f8067 jr t6\n" + " 24: 01852f83 lw t6,24(a0)\n" + " 28: 000f8f13 mv t5,t6\n" + " 2c: 000f0e93 mv t4,t5\n" + " 30: 00300e13 li t3,3\n" + " 34: 01cefeb3 and t4,t4,t3\n" + " 38: 00200e13 li t3,2\n" + " 3c: 01ce8c63 beq t4,t3,0x54\n" + " 40: 04c62f83 lw t6,76(a2)\n" + " 44: 04400613 li a2,68\n" + " 48: 18b00693 li a3,395\n" + " 4c: 000f0713 mv a4,t5\n" + " 50: 000f8067 jr t6\n" + " 54: 00300e93 li t4,3\n" + " 58: fffece93 not t4,t4\n" + " 5c: 01df7f33 and t5,t5,t4\n" + " 60: 000f2f03 lw t5,0(t5)\n" + " 64: 000f0e93 mv t4,t5\n" + " 68: 03f00e13 li t3,63\n" + " 6c: 01cefeb3 and t4,t4,t3\n" + " 70: 01400e13 li t3,20\n" + " 74: 01ce8c63 beq t4,t3,0x8c\n" + " 78: 04c62f83 lw t6,76(a2)\n" + " 7c: 07c00613 li a2,124\n" + " 80: 18b00693 li a3,395\n" + " 84: 000f0713 mv a4,t5\n" + " 88: 000f8067 jr t6\n" + " 8c: 0005ae83 lw t4,0(a1)\n" + " 90: 000eae83 lw t4,0(t4)\n" + " 94: 018e9e93 slli t4,t4,0x18\n" + " 98: 30000f13 li t5,768\n" + " 9c: 00000013 nop\n" + " a0: 01eeeeb3 or t4,t4,t5\n" + " a4: 05d52e23 sw t4,92(a0)\n" + " a8: 08000f13 li t5,128\n" + " ac: 00cf0f33 add t5,t5,a2\n" + " b0: 000f2f03 lw t5,0(t5)\n" + " b4: 000f8613 mv a2,t6\n" + " b8: 00000693 li a3,0\n" + " bc: 000f0067 jr t5" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2861,8 +2849,8 @@ alloc_boxed_integer_fragment_small_test() -> " c: 00a12223 sw a0,4(sp)\n" " 10: 00b12423 sw a1,8(sp)\n" " 14: 00c12623 sw a2,12(sp)\n" - " 18: 02a00613 li a2,42\n" - " 1c: 00000693 li a3,0\n" + " 18: 02a00593 li a1,42\n" + " 1c: 00000613 li a2,0\n" " 20: 000f80e7 jalr t6\n" " 24: 00050f93 mv t6,a0\n" " 28: 00012083 lw ra,0(sp)\n" @@ -2892,10 +2880,10 @@ alloc_boxed_integer_fragment_large_test() -> " c: 00a12223 sw a0,4(sp)\n" " 10: 00b12423 sw a1,8(sp)\n" " 14: 00c12623 sw a2,12(sp)\n" - " 18: 9abce637 lui a2,0x9abce\n" - " 1c: ef060613 addi a2,a2,-272\n" - " 20: 123456b7 lui a3,0x12345\n" - " 24: 67868693 addi a3,a3,1656\n" + " 18: 9abce5b7 lui a1,0x9abce\n" + " 1c: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" + " 20: 12345637 lui a2,0x12345\n" + " 24: 67860613 addi a2,a2,1656 # 0x12345678\n" " 28: 000f80e7 jalr t6\n" " 2c: 00050f93 mv t6,a0\n" " 30: 00012083 lw ra,0(sp)\n" @@ -2937,7 +2925,7 @@ call_func_ptr_stack_alignment_test() -> " 2c: 01f12c23 sw t6,24(sp)\n" " 30: 02a00513 li a0,42\n" " 34: 000e00e7 jalr t3\n" - " 38: fea12e23 sw a0,-4(sp)\n" + " 38: 00050e13 mv t3,a0\n" " 3c: 00012083 lw ra,0(sp)\n" " 40: 00412503 lw a0,4(sp)\n" " 44: 00812583 lw a1,8(sp)\n" @@ -2996,7 +2984,7 @@ call_func_ptr_register_exhaustion_test_() -> " 40: 00300693 li a3,3\n" " 44: 00100713 li a4,1\n" " 48: 000f00e7 jalr t5\n" - " 4c: fea12e23 sw a0,-4(sp)\n" + " 4c: 00050f13 mv t5,a0\n" " 50: 00012083 lw ra,0(sp)\n" " 54: 00412503 lw a0,4(sp)\n" " 58: 00812583 lw a1,8(sp)\n" @@ -3037,7 +3025,7 @@ call_func_ptr_register_exhaustion_test_() -> " 40: 00100693 li a3,1\n" " 44: 00030713 mv a4,t1\n" " 48: 000f00e7 jalr t5\n" - " 4c: fea12e23 sw a0,-4(sp)\n" + " 4c: 00050f13 mv t5,a0\n" " 50: 00012083 lw ra,0(sp)\n" " 54: 00412503 lw a0,4(sp)\n" " 58: 00812583 lw a1,8(sp)\n" @@ -3078,7 +3066,7 @@ call_func_ptr_register_exhaustion_test_() -> " 40: 00030693 mv a3,t1\n" " 44: 00100713 li a4,1\n" " 48: 000f00e7 jalr t5\n" - " 4c: fea12e23 sw a0,-4(sp)\n" + " 4c: 00050f13 mv t5,a0\n" " 50: 00012083 lw ra,0(sp)\n" " 54: 00412503 lw a0,4(sp)\n" " 58: 00812583 lw a1,8(sp)\n" @@ -3255,18 +3243,17 @@ add_beam_test() -> State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), State15 = ?BACKEND:update_branches(State14), Stream = ?BACKEND:stream(State15), - riscv32_helper:disassemble(Stream), Dump = << % jump table (new 8-byte format) " 0: 00000697 auipc a3,0x0\n" - " 4: 11868067 jr 280(a3)\n" + " 4: 10c68067 jr 268(a3) # 0x10c\n" " 8: 00000697 auipc a3,0x0\n" - " c: 01868067 jr 24(a3)\n" + " c: 01868067 jr 24(a3) # 0x20\n" " 10: 00000697 auipc a3,0x0\n" - " 14: 05468067 jr 84(a3)\n" + " 14: 04c68067 jr 76(a3) # 0x5c\n" " 18: 00000697 auipc a3,0x0\n" - " 1c: 0f868067 jr 248(a3)\n" + " 1c: 0ec68067 jr 236(a3) # 0x104\n" % label 1 % {move,{integer,9},{x,1}}. " 20: 09f00f93 li t6,159\n" @@ -3278,72 +3265,69 @@ add_beam_test() -> " 30: 0085af83 lw t6,8(a1)\n" " 34: ffff8f93 addi t6,t6,-1\n" " 38: 01f5a423 sw t6,8(a1)\n" - " 3c: 000f8a63 beqz t6,0x50\n" - " 40: 0240006f j 0x64\n" + " 3c: 000f8663 beqz t6,0x48\n" + " 40: 01c0006f j 0x5c\n" " 44: 00000013 nop\n" - " 48: 00000013 nop\n" - " 4c: 00000013 nop\n" - " 50: 00000f97 auipc t6,0x0\n" - " 54: 014f8f93 addi t6,t6,20\n" - " 58: 01f5a223 sw t6,4(a1)\n" - " 5c: 00862f83 lw t6,8(a2)\n" - " 60: 000f8067 jr t6\n" + " 48: 00000f97 auipc t6,0x0\n" + " 4c: 014f8f93 addi t6,t6,20 # 0x5c\n" + " 50: 01f5a223 sw t6,4(a1)\n" + " 54: 00862f83 lw t6,8(a2)\n" + " 58: 000f8067 jr t6\n" % label 2 % {allocate,1,1}. - " 64: 01462f83 lw t6,20(a2)\n" - " 68: ff010113 addi sp,sp,-16\n" - " 6c: 00112023 sw ra,0(sp)\n" - " 70: 00a12223 sw a0,4(sp)\n" - " 74: 00b12423 sw a1,8(sp)\n" - " 78: 00c12623 sw a2,12(sp)\n" - " 7c: 00100613 li a2,1\n" - " 80: 00000693 li a3,0\n" - " 84: 00100713 li a4,1\n" - " 88: 000f80e7 jalr t6\n" - " 8c: 00050f93 mv t6,a0\n" - " 90: 00012083 lw ra,0(sp)\n" - " 94: 00412503 lw a0,4(sp)\n" - " 98: 00812583 lw a1,8(sp)\n" - " 9c: 00c12603 lw a2,12(sp)\n" - " a0: 01010113 addi sp,sp,16\n" - " a4: 01ff9f13 slli t5,t6,0x1f\n" - " a8: 000f4863 bltz t5,0xb8\n" - " ac: 01862f83 lw t6,24(a2)\n" - " b0: 0b000613 li a2,176\n" - " b4: 000f8067 jr t6\n" + " 5c: 01462f83 lw t6,20(a2)\n" + " 60: ff010113 addi sp,sp,-16\n" + " 64: 00112023 sw ra,0(sp)\n" + " 68: 00a12223 sw a0,4(sp)\n" + " 6c: 00b12423 sw a1,8(sp)\n" + " 70: 00c12623 sw a2,12(sp)\n" + " 74: 00100613 li a2,1\n" + " 78: 00000693 li a3,0\n" + " 7c: 00100713 li a4,1\n" + " 80: 000f80e7 jalr t6\n" + " 84: 00050f93 mv t6,a0\n" + " 88: 00012083 lw ra,0(sp)\n" + " 8c: 00412503 lw a0,4(sp)\n" + " 90: 00812583 lw a1,8(sp)\n" + " 94: 00c12603 lw a2,12(sp)\n" + " 98: 01010113 addi sp,sp,16\n" + " 9c: 01ff9f13 slli t5,t6,0x1f\n" + " a0: 000f4863 bltz t5,0xb0\n" + " a4: 01862f83 lw t6,24(a2)\n" + " a8: 0a800613 li a2,168\n" + " ac: 000f8067 jr t6\n" % {init_yregs,{list,[{y,0}]}}. %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), - " b8: 03b00f13 li t5,59\n" - " bc: 01452f83 lw t6,20(a0)\n" - " c0: 01efa023 sw t5,0(t6)\n" + " b0: 03b00f13 li t5,59\n" + " b4: 01452f83 lw t6,20(a0)\n" + " b8: 01efa023 sw t5,0(t6)\n" % {call,1,{f,3}} %% call_or_schedule_next(State9, 3), - " c4: 0005af03 lw t5,0(a1)\n" - " c8: 000f2f03 lw t5,0(t5)\n" - " cc: 018f1f13 slli t5,t5,0x18\n" - " d0: 44000f93 li t6,1088\n" - " d4: 01ff6f33 or t5,t5,t6\n" - " d8: 05e52e23 sw t5,92(a0)\n" - " dc: 0085af83 lw t6,8(a1)\n" - " e0: ffff8f93 addi t6,t6,-1\n" - " e4: 01f5a423 sw t6,8(a1)\n" - " e8: 000f8a63 beqz t6,0xfc\n" - " ec: 0240006f j 0x110\n" - " f0: 00000013 nop\n" - " f4: 00000013 nop\n" - " f8: 00000013 nop\n" - " fc: 00000f97 auipc t6,0x0\n" - " 100: 014f8f93 addi t6,t6,20\n" - " 104: 01f5a223 sw t6,4(a1)\n" - " 108: 00862f83 lw t6,8(a2)\n" - " 10c: 000f8067 jr t6\n" + " bc: 0005af03 lw t5,0(a1)\n" + " c0: 000f2f03 lw t5,0(t5)\n" + " c4: 018f1f13 slli t5,t5,0x18\n" + " c8: 41000f93 li t6,1040\n" + " cc: 00000013 nop\n" + " d0: 01ff6f33 or t5,t5,t6\n" + " d4: 05e52e23 sw t5,92(a0)\n" + " d8: 0085af83 lw t6,8(a1)\n" + " dc: ffff8f93 addi t6,t6,-1\n" + " e0: 01f5a423 sw t6,8(a1)\n" + " e4: 000f8663 beqz t6,0xf0\n" + " e8: 01c0006f j 0x104\n" + " ec: 00000013 nop\n" + " f0: 00000f97 auipc t6,0x0\n" + " f4: 014f8f93 addi t6,t6,20 # 0x104\n" + " f8: 01f5a223 sw t6,4(a1)\n" + " fc: 00862f83 lw t6,8(a2)\n" + " 100: 000f8067 jr t6\n" %% (continuation) % label 3 - " 110: 00462f83 lw t6,4(a2)\n" - " 114: 000f8067 jr t6\n" + " 104: 00462f83 lw t6,4(a2)\n" + " 108: 000f8067 jr t6\n" % label 0 - " 118: 00462f83 lw t6,4(a2)\n" - " 11c: 000f8067 jr t6\n" + " 10c: 00462f83 lw t6,4(a2)\n" + " 110: 000f8067 jr t6\n" >>, ?assertEqual(dump_to_bin(Dump), Stream). From ea692d06662242cc3e94833f321a52ea37db81b3 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sun, 19 Oct 2025 22:07:29 +0200 Subject: [PATCH 15/28] riscv32: fix compilation with JIT disabled on esp32 Signed-off-by: Paul Guyot --- src/platforms/esp32/CMakeLists.txt | 5 +++-- src/platforms/esp32/components/libatomvm/CMakeLists.txt | 9 +++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 1212a15fe9..4ddc362924 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,9 +51,8 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -project(atomvm-esp32) - # JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# Configuration comes from idf.py menuconfig (KConfig), not CMake options if(CONFIG_JIT_ENABLED) if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") set(AVM_DISABLE_JIT OFF) @@ -68,6 +67,8 @@ else() message(STATUS "JIT compilation disabled") endif() +project(atomvm-esp32) + # esp-idf does not use compile_feature but instead sets version in # c_compile_options # Ensure project is compiled with at least C11 diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index c8e3ede411..00595afeef 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -31,7 +31,12 @@ if (HAVE_PLATFORM_ATOMIC_H) target_include_directories(libAtomVM PUBLIC ../avm_sys/) endif() -target_link_libraries(${COMPONENT_LIB} - INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") +if (AVM_DISABLE_JIT) + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +else() + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") +endif() target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11) From 96d4e98f0114483d09cd0f28a78dcfa4f3a02b8b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 21 Oct 2025 08:00:11 +0200 Subject: [PATCH 16/28] riscv32: enable JIT for pico2, add it to CI Signed-off-by: Paul Guyot --- .github/workflows/pico-build.yaml | 34 ++++++++++++++++++++++++------- src/platforms/rp2/CMakeLists.txt | 7 ++++++- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/.github/workflows/pico-build.yaml b/.github/workflows/pico-build.yaml index 9cf01d045a..c5ce30f371 100644 --- a/.github/workflows/pico-build.yaml +++ b/.github/workflows/pico-build.yaml @@ -41,7 +41,17 @@ jobs: strategy: matrix: board: ["pico", "pico_w", "pico2"] + platform: [""] language: ["cpp"] + jit: ["", "-DAVM_DISABLE_JIT=OFF"] + include: + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "" + + - board: "pico2" + platform: "-DPICO_PLATFORM=rp2350-riscv" + jit: "-DAVM_DISABLE_JIT=OFF" steps: - name: Checkout repo @@ -57,6 +67,16 @@ jobs: libnewlib-arm-none-eabi libstdc++-arm-none-eabi-newlib \ erlang-base erlang-dev erlang-dialyzer erlang-eunit rebar3 + - name: Install riscv32 toolchain + if: matrix.platform == '-DPICO_PLATFORM=rp2350-riscv' + run: | + sudo mkdir -p /opt + cd /opt + sudo wget https://github.com/raspberrypi/pico-sdk-tools/releases/download/v2.2.0-3/riscv-toolchain-15-x86_64-lin.tar.gz + sudo tar xzf riscv-toolchain-15-x86_64-lin.tar.gz + ls /opt + echo "/opt/riscv-toolchain-15-x86_64-lin/bin" >> $GITHUB_PATH + - name: "Git config safe.directory for codeql" run: git config --global --add safe.directory /__w/AtomVM/AtomVM @@ -74,7 +94,7 @@ jobs: set -euo pipefail mkdir build cd build - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.platform }} ${{ matrix.jit }} ninja - name: "Perform CodeQL Analysis" @@ -97,7 +117,7 @@ jobs: mkdir build.nosmp cd build.nosmp # TODO: fix all warnings and enable -DAVM_WARNINGS_ARE_ERRORS=ON - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} -DAVM_DISABLE_SMP=1 + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.jit }} -DAVM_DISABLE_SMP=1 cmake --build . --target=rp2_tests - name: Run tests with rp2040js @@ -112,7 +132,7 @@ jobs: npx tsx run-tests.ts ../build.nosmp/tests/rp2_tests.uf2 ../build.nosmp/tests/test_erl_sources/rp2_test_modules.uf2 - name: Build atomvmlib.uf2 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | set -euo pipefail @@ -122,7 +142,7 @@ jobs: make atomvmlib-${{ matrix.board }}.uf2 - name: Rename AtomVM and write sha256sum - if: startsWith(github.ref, 'refs/tags/') + if: startsWith(github.ref, 'refs/tags/') && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd src/platforms/rp2/build @@ -137,7 +157,7 @@ jobs: popd - name: Rename atomvmlib and write sha256sum - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd build/libs @@ -148,7 +168,7 @@ jobs: - name: Release (Pico & Pico2) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true @@ -160,7 +180,7 @@ jobs: - name: Release (PicoW) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index 6dbcdf7bb8..0732f6e490 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -69,8 +69,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") if (NOT AVM_DISABLE_JIT) set(AVM_JIT_TARGET_ARCH "armv6m") endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^hazard3$") + # Pico2 RISC-V processor (Hazard3) + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "riscv32") + endif() else() - # Typically riscv is not supported yet + # Other processors not supported yet if (NOT AVM_DISABLE_JIT) message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE) From be614f730cdaaaee9cfd6b72f6d345a208bace29 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Tue, 21 Oct 2025 08:00:39 +0200 Subject: [PATCH 17/28] Add missing define for HAVE_GETCWD for rp2 Signed-off-by: Paul Guyot --- src/platforms/rp2/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index 0732f6e490..86e5e6683a 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -54,6 +54,8 @@ set(HAVE_MKFIFO "" CACHE INTERNAL "Have symbol mkfifo" FORCE) set(HAVE_UNLINK "" CACHE INTERNAL "Have symbol unlink" FORCE) # Likewise with EXECVE set(HAVE_EXECVE "" CACHE INTERNAL "Have symbol execve" FORCE) +# getcwd is defined in newlib header but not implemented +set(HAVE_GETCWD "" CACHE INTERNAL "Have symbol getcwd" FORCE) # Options that make sense for this platform option(AVM_DISABLE_SMP "Disable SMP support." OFF) From 3fca23f9d7a72492fda9ed4126bcbdc6725cacd1 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Mon, 20 Oct 2025 09:06:59 +0200 Subject: [PATCH 18/28] riscv32: Implement and use C extension Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 110 +- libs/jit/src/jit_riscv32_asm.erl | 840 ++++++- tests/libs/jit/jit_riscv32_asm_tests.erl | 443 +++- tests/libs/jit/jit_riscv32_tests.erl | 2842 +++++++++++----------- tests/libs/jit/jit_tests_common.erl | 3 +- 5 files changed, 2719 insertions(+), 1519 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 83fed2585c..191fc3cdc8 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -317,7 +317,7 @@ flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> %%----------------------------------------------------------------------------- -spec debugger(state()) -> state(). debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> - Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:bkpt(0)), + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:c_ebreak()), State#state{stream = Stream1}. %%----------------------------------------------------------------------------- @@ -416,12 +416,7 @@ jump_table0( % Create jump table entry: AUIPC + JALR (8 bytes total) % This will be patched later in update_branches/2 Offset = StreamModule:offset(Stream0), - % Placeholder: Load PC + upper20 bits - I1 = jit_riscv32_asm:auipc(a3, 0), - % Placeholder: Jump to a3 + lower12 bits - I2 = jit_riscv32_asm:jalr(zero, a3, 0), - - JumpEntry = <>, + JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, Stream1 = StreamModule:append(Stream0, JumpEntry), % Record both AUIPC and JALR offsets for patching @@ -451,22 +446,37 @@ update_branches( Rel = LabelOffset - Offset, NewInstr = case Type of - {adr, Reg} when Rel rem 4 =:= 0 -> pc_relative_address(Reg, Rel); - {adr, Reg} when Rel rem 4 =:= 2 -> pc_relative_address(Reg, Rel + 2); - {far_branch, Size, TempReg} -> + {adr, Reg} when Rel rem 4 =:= 0 -> + % Generate pc_relative_address and pad to 8 bytes with NOP + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {adr, Reg} when Rel rem 4 =:= 2; Rel rem 4 =:= -2 -> + % Handle 2-byte aligned offsets and pad to 8 bytes + % Handle both positive and negative offsets (Erlang rem can be negative) + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {far_branch, TempReg} -> % Check if branch can now be optimized to near branch if Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 -> % RISC-V jal has ±1MB range % Optimize to near branch: jal + nops to fill original size DirectBranch = jit_riscv32_asm:jal(zero, Rel), - % Fill remaining bytes with NOPs (RISC-V instructions are 4 bytes) - NopCount = (Size - 4) div 4, - Nops = << - <<(jit_riscv32_asm:nop())/binary>> - || _ <- lists:seq(1, NopCount) - >>, - <>; + case byte_size(DirectBranch) of + 2 -> + <>; + 4 -> + <> + end; true -> % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes) % Split the relative offset into upper 20 bits and lower 12 bits @@ -474,7 +484,11 @@ update_branches( Lo12 = Rel - (Hi20 bsl 12), I1 = jit_riscv32_asm:auipc(TempReg, Hi20), I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), - <> + Entry = <>, + case byte_size(Entry) of + 6 -> <>; + 8 -> Entry + end end; jump_table_auipc_jalr -> % Calculate PC-relative offset from AUIPC instruction to target @@ -498,7 +512,12 @@ update_branches( % Encode AUIPC and JALR with computed offsets I1 = jit_riscv32_asm:auipc(a3, Upper20), I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), - <> + % Map to 8 bytes + JumpTableEntry = <>, + case byte_size(JumpTableEntry) of + 6 -> <>; + 8 -> JumpTableEntry + end end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), update_branches(State#state{stream = Stream1, branches = BranchesT}). @@ -783,13 +802,10 @@ branch_to_label_code( % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) % Placeholder: auipc TempReg, 0 - I1 = jit_riscv32_asm:auipc(TempReg, 0), % Placeholder: jalr zero, TempReg, 0 - I2 = jit_riscv32_asm:jalr(zero, TempReg, 0), - CodeBlock = <>, - SequenceSize = byte_size(CodeBlock), + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, % Add relocation entry - Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, + Reloc = {Label, Offset, {far_branch, TempReg}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code( @@ -799,13 +815,10 @@ branch_to_label_code( % Far branch sequence using PC-relative auipc + jalr (8 bytes) % Placeholder: auipc t6, 0 - I1 = jit_riscv32_asm:auipc(t6, 0), % Placeholder: jalr zero, t6, 0 - I2 = jit_riscv32_asm:jalr(zero, t6, 0), - CodeBlock = <>, - SequenceSize = byte_size(CodeBlock), + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, % Add relocation entry - Reloc = {Label, Offset, {far_branch, SequenceSize, t6}}, + Reloc = {Label, Offset, {far_branch, t6}}, State1 = State0#state{branches = [Reloc | Branches]}, {State1, CodeBlock}; branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> @@ -2356,8 +2369,9 @@ set_continuation_to_label( % resolved to point directly to the label's actual address (not the jump table entry) Offset = StreamModule:offset(Stream0), % Emit placeholder for pc_relative_address (auipc + addi) + % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming % The relocation will replace these with the correct offset - I1 = pc_relative_address(Temp, 4), + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, Reloc = {Label, Offset, {adr, Temp}}, % Store continuation (jit_state is in a1) I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), @@ -2379,7 +2393,8 @@ set_continuation_to_offset( ) -> OffsetRef = make_ref(), Offset = StreamModule:offset(Stream0), - I1 = pc_relative_address(Temp, 4), + % Reserve 8 bytes with all-1s placeholder for flash programming + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, Reloc = {OffsetRef, Offset, {adr, Temp}}, % Store continuation (jit_state is in a1) I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), @@ -2659,8 +2674,11 @@ decrement_reductions_and_maybe_schedule_next( NewI5 = case pc_relative_address(Temp, NewI5Offset) of I when byte_size(I) =:= 4 -> - % Only auipc, pad with NOP + % Only auipc, pad with NOP (4 bytes) <>; + I when byte_size(I) =:= 6 -> + % auipc + c.addi, pad with c.nop (2 bytes) + <>; I when byte_size(I) =:= 8 -> % auipc + addi, no padding needed I @@ -2798,10 +2816,11 @@ rewrite_cp_offset( CPValue = NewOffset bsl 2, NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue), % We reserved 8 bytes (2 instructions) for the CP value - % If li generates only 4 bytes, pad with a NOP to maintain alignment + % Pad with NOP if needed to maintain alignment PaddedInstr = case byte_size(NewMoveInstr) of 4 -> <>; + 6 -> <>; 8 -> NewMoveInstr end, Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), @@ -2841,13 +2860,22 @@ return_labels_and_lines( || {Label, LabelOffset} <- Labels, is_integer(Label) ]), - I1 = pc_relative_address(a0, 12), I2 = jit_riscv32_asm:ret(), + % Assume total size is 10 bytes (8-byte I1 + 2-byte c.ret) + % If actual is 8 bytes (6-byte I1 + 2-byte c.ret), we'll pad with 2 bytes + I1 = pc_relative_address(a0, 10), + Prologue = <>, + ProloguePadded = + case byte_size(Prologue) of + 10 -> Prologue; + % 2-byte padding + 8 -> <> + end, LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, LinesTable = <<<> || {Line, Offset} <- SortedLines>>, Stream1 = StreamModule:append( Stream0, - <> ), State#state{stream = Stream1}. @@ -3005,7 +3033,7 @@ args_regs(Args) -> ). %%----------------------------------------------------------------------------- -%% @doc Add a label at the current offset. Eventually align it with a nop. +%% @doc Add a label at the current offset. %% @end %% @param State current backend state %% @param Label the label number or reference @@ -3014,15 +3042,7 @@ args_regs(Args) -> -spec add_label(state(), integer() | reference()) -> state(). add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> Offset0 = StreamModule:offset(Stream0), - {State1, Offset1} = - if - Offset0 rem 4 =:= 0 -> - {State0, Offset0}; - true -> - Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:nop()), - {State0#state{stream = Stream1}, Offset0 + 2} - end, - add_label(State1, Label, Offset1). + add_label(State0, Label, Offset0). %%----------------------------------------------------------------------------- %% @doc Add a label at a specific offset diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl index 64d42c10ad..25bf1ff574 100644 --- a/libs/jit/src/jit_riscv32_asm.erl +++ b/libs/jit/src/jit_riscv32_asm.erl @@ -87,9 +87,40 @@ call/2, % M extension (multiply/divide) mul/3, - % System instructions - bkpt/1, - ebreak/0 + % C extension (compressed) - arithmetic/logical + c_add/2, + c_sub/2, + c_and/2, + c_or/2, + c_xor/2, + c_mv/2, + % C extension - immediate instructions + c_addi/2, + c_andi/2, + c_li/2, + c_lui/2, + c_addi16sp/1, + c_addi4spn/2, + % C extension - shift instructions + c_slli/2, + c_srli/2, + c_srai/2, + % C extension - load/store + c_lw/2, + c_sw/2, + c_lwsp/2, + c_swsp/2, + % C extension - branches and jumps + c_beqz/2, + c_bnez/2, + c_j/1, + c_jal/1, + c_jr/1, + c_jalr/1, + % C extension - system instructions + c_ebreak/0, + % C extension - pseudo-instructions + c_nop/0 ]). -export_type([ @@ -264,6 +295,9 @@ encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) -> %% ADD - Add %% rd = rs1 + rs2 -spec add(riscv_register(), riscv_register(), riscv_register()) -> binary(). +add(Rd, Rs1, Rs2) when Rd =:= Rs1, Rd =/= zero, Rs2 =/= zero -> + % Use c.add when rd == rs1 and neither register is zero + c_add(Rd, Rs2); add(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000 encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00). @@ -271,6 +305,11 @@ add(Rd, Rs1, Rs2) -> %% SUB - Subtract %% rd = rs1 - rs2 -spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sub(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_sub(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20) + end; sub(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000 encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20). @@ -278,6 +317,11 @@ sub(Rd, Rs1, Rs2) -> %% AND - Bitwise AND %% rd = rs1 & rs2 -spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +and_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_and(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00) + end; and_(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000 encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00). @@ -285,6 +329,11 @@ and_(Rd, Rs1, Rs2) -> %% OR - Bitwise OR %% rd = rs1 | rs2 -spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_or(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00) + end; or_(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000 encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00). @@ -298,6 +347,11 @@ or_(Rd, Rs) -> %% XOR - Bitwise XOR %% rd = rs1 ^ rs2 -spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +xor_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_xor(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00) + end; xor_(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000 encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00). @@ -372,6 +426,9 @@ encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) -> %% ADDI - Add Immediate %% rd = rs1 + imm -spec addi(riscv_register(), riscv_register(), integer()) -> binary(). +addi(Rd, Rs1, Imm) when Rd =:= Rs1, Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.addi when rd == rs1, rd != zero, and imm fits in 6 bits (signed) + c_addi(Rd, Imm); addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> % Opcode: 0010011 (0x13), Funct3: 000 encode_i_type(16#13, Rd, 16#0, Rs1, Imm); @@ -381,6 +438,11 @@ addi(_Rd, _Rs1, Imm) -> %% ANDI - AND Immediate %% rd = rs1 & imm -spec andi(riscv_register(), riscv_register(), integer()) -> binary(). +andi(Rd, Rs1, Imm) when Rd =:= Rs1, Imm >= -32, Imm =< 31 -> + case is_compressed_reg(Rd) of + true -> c_andi(Rd, Imm); + false -> encode_i_type(16#13, Rd, 16#7, Rs1, Imm) + end; andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> % Opcode: 0010011 (0x13), Funct3: 111 encode_i_type(16#13, Rd, 16#7, Rs1, Imm); @@ -430,6 +492,9 @@ sltiu(_Rd, _Rs1, Imm) -> %% SLLI - Shift Left Logical Immediate %% rd = rs1 << shamt -spec slli(riscv_register(), riscv_register(), 0..31) -> binary(). +slli(Rd, Rs1, Shamt) when Rd =:= Rs1, Rd =/= zero, Shamt >= 1, Shamt =< 31 -> + % Use c.slli when rd == rs1, rd != zero, and shamt != 0 (c.slli with shamt=0 is reserved) + c_slli(Rd, Shamt); slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000 encode_i_type(16#13, Rd, 16#1, Rs1, Shamt); @@ -439,6 +504,11 @@ slli(_Rd, _Rs1, Shamt) -> %% SRLI - Shift Right Logical Immediate %% rd = rs1 >> shamt (zero-extend) -spec srli(riscv_register(), riscv_register(), 0..31) -> binary(). +srli(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> c_srli(Rd, Shamt); + false -> encode_i_type(16#13, Rd, 16#5, Rs1, Shamt) + end; srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000 encode_i_type(16#13, Rd, 16#5, Rs1, Shamt); @@ -448,6 +518,14 @@ srli(_Rd, _Rs1, Shamt) -> %% SRAI - Shift Right Arithmetic Immediate %% rd = rs1 >> shamt (sign-extend) -spec srai(riscv_register(), riscv_register(), 0..31) -> binary(). +srai(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> + c_srai(Rd, Shamt); + false -> + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30) + end; srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000 % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI @@ -470,6 +548,15 @@ lw(Rd, Rs1) when is_atom(Rs1) -> lw(Rd, Rs1, 0). -spec lw(riscv_register(), riscv_register(), integer()) -> binary(). +lw(Rd, sp, Offset) when Rd =/= zero, Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.lwsp for loads from sp with aligned offset in range + c_lwsp(Rd, Offset); +lw(Rd, Rs1, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.lw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs1) of + true -> c_lw(Rd, {Rs1, Offset}); + false -> encode_i_type(16#03, Rd, 16#2, Rs1, Offset) + end; lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> % Opcode: 0000011 (0x03), Funct3: 010 encode_i_type(16#03, Rd, 16#2, Rs1, Offset); @@ -585,6 +672,15 @@ sw(Rs2, Rs1) when is_atom(Rs1) -> sw(Rs1, Rs2, 0). -spec sw(riscv_register(), riscv_register(), integer()) -> binary(). +sw(sp, Rs2, Offset) when Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.swsp for stores to sp with aligned offset in range + c_swsp(Rs2, Offset); +sw(Rs1, Rs2, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.sw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rs1) andalso is_compressed_reg(Rs2) of + true -> c_sw(Rs2, {Rs1, Offset}); + false -> encode_s_type(16#23, 16#2, Rs1, Rs2, Offset) + end; sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> % Opcode: 0100011 (0x23), Funct3: 010 encode_s_type(16#23, 16#2, Rs1, Rs2, Offset); @@ -674,6 +770,12 @@ encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) -> %% BEQ - Branch if Equal %% if (rs1 == rs2) pc += offset -spec beq(riscv_register(), riscv_register(), integer()) -> binary(). +beq(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.beqz when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_beqz(Rs1, Offset); + false -> encode_b_type(16#63, 16#0, Rs1, zero, Offset) + end; beq(Rs1, Rs2, Offset) when Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 -> @@ -687,6 +789,12 @@ beq(_Rs1, _Rs2, Offset) -> %% BNE - Branch if Not Equal %% if (rs1 != rs2) pc += offset -spec bne(riscv_register(), riscv_register(), integer()) -> binary(). +bne(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.bnez when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_bnez(Rs1, Offset); + false -> encode_b_type(16#63, 16#1, Rs1, zero, Offset) + end; bne(Rs1, Rs2, Offset) when Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 -> @@ -809,6 +917,12 @@ encode_u_type(Opcode, Rd, Imm) -> %% JAL - Jump and Link %% rd = pc + 4; pc += offset -spec jal(riscv_register(), integer()) -> binary(). +jal(zero, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.j when rd is zero (no link) and offset fits + c_j(Offset); +jal(ra, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.jal when rd is ra and offset fits (RV32C only) + c_jal(Offset); jal(Rd, Offset) when Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0 -> @@ -822,6 +936,12 @@ jal(_Rd, Offset) -> %% JALR - Jump and Link Register %% rd = pc + 4; pc = (rs1 + offset) & ~1 -spec jalr(riscv_register(), riscv_register(), integer()) -> binary(). +jalr(zero, Rs1, 0) when Rs1 =/= zero -> + % Use c.jr for jump to register without link (rd=zero, offset=0) + c_jr(Rs1); +jalr(ra, Rs1, 0) when Rs1 =/= zero -> + % Use c.jalr for jump to register with link (rd=ra, offset=0) + c_jalr(Rs1); jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> % Opcode: 1100111 (0x67), Funct3: 000 encode_i_type(16#67, Rd, 16#0, Rs1, Offset); @@ -841,6 +961,9 @@ jalr(Rd, Rs1) -> %% LUI - Load Upper Immediate %% rd = imm << 12 -spec lui(riscv_register(), integer()) -> binary(). +lui(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31, Imm =/= 0 -> + % Use c.lui when rd != zero and imm fits in 6 bits (signed) and imm != 0 + c_lui(Rd, Imm); lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> % Opcode: 0110111 (0x37) encode_u_type(16#37, Rd, Imm bsl 12); @@ -872,6 +995,9 @@ nop() -> %% For small immediates (-2048 to 2047): addi rd, x0, imm %% For larger immediates: lui + addi sequence -spec li(riscv_register(), integer()) -> binary(). +li(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.li when rd != zero and imm fits in 6 bits (signed) + c_li(Rd, Imm); li(Rd, Imm) when Imm >= -2048, Imm =< 2047 -> % Small immediate: addi rd, x0, imm addi(Rd, zero, Imm); @@ -913,8 +1039,11 @@ li(_Rd, Imm) -> error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}). %% MV - Move (copy register) -%% Expands to: addi rd, rs, 0 +%% Expands to: addi rd, rs, 0 or c.mv rd, rs -spec mv(riscv_register(), riscv_register()) -> binary(). +mv(Rd, Rs) when Rd =/= zero, Rs =/= zero -> + % Use c.mv when both rd and rs are not zero + c_mv(Rd, Rs); mv(Rd, Rs) -> addi(Rd, Rs, 0). @@ -975,21 +1104,6 @@ call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF -> call(_Rd, Offset) -> error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}). -%% EBREAK - Environment Breakpoint -%% Causes a breakpoint exception to be raised. -%% This is the RISC-V equivalent of ARM's BKPT instruction. -%% Encoding: 0x00100073 --spec ebreak() -> binary(). -ebreak() -> - <<16#73, 16#00, 16#10, 16#00>>. - -%% BKPT - Breakpoint (for ARM compatibility) -%% In RISC-V, this is implemented as EBREAK. -%% The immediate parameter is ignored for compatibility with ARM. --spec bkpt(integer()) -> binary(). -bkpt(_Imm) -> - ebreak(). - %% MUL - Multiply (RV32M extension) %% Multiplies rs1 by rs2 and places the lower 32 bits in rd %% Format: mul rd, rs1, rs2 @@ -998,3 +1112,691 @@ bkpt(_Imm) -> mul(Rd, Rs1, Rs2) -> % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001 encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01). + +%%----------------------------------------------------------------------------- +%% C Extension (RV32C) - Compressed Instructions +%%----------------------------------------------------------------------------- +%% The C extension adds 16-bit compressed instructions to reduce code size. +%% All compressed instructions are 16 bits (2 bytes) and use a different +%% encoding format from the base 32-bit instructions. +%% +%% Register encoding for compressed instructions: +%% - Some instructions use the full 5-bit register encoding (x0-x31) +%% - Others use 3-bit encoding for registers x8-x15 (s0, s1, a0-a5) +%% This is called the "compressed register set" or "C register set" +%% +%% Instruction formats: +%% - CR (Register): funct4 | rd/rs1 | rs2 | op +%% - CI (Immediate): funct3 | imm | rd/rs1 | imm | op +%% - CSS (Stack Store): funct3 | imm | rs2 | op +%% - CIW (Wide Immediate): funct3 | imm | rd' | op +%% - CL (Load): funct3 | imm | rs1' | imm | rd' | op +%% - CS (Store): funct3 | imm | rs1' | imm | rs2' | op +%% - CA (Arithmetic): funct6 | rd'/rs1' | funct2 | rs2' | op +%% - CB (Branch): funct3 | offset | rs1' | offset | op +%% - CJ (Jump): funct3 | jump target | op +%% +%% See: RISC-V Instruction Set Manual, Volume I, Chapter 16 +%%----------------------------------------------------------------------------- + +%% Convert register to compressed register encoding (3 bits for x8-x15) +%% Returns the 3-bit encoding (0-7 maps to x8-x15) +-spec reg_to_c_num(riscv_register()) -> 0..7. +reg_to_c_num(s0) -> 0; +reg_to_c_num(fp) -> 0; +reg_to_c_num(s1) -> 1; +reg_to_c_num(a0) -> 2; +reg_to_c_num(a1) -> 3; +reg_to_c_num(a2) -> 4; +reg_to_c_num(a3) -> 5; +reg_to_c_num(a4) -> 6; +reg_to_c_num(a5) -> 7; +reg_to_c_num(Reg) -> error({register_not_in_compressed_set, Reg, 's0/fp, s1, a0-a5'}). + +%% Check if a register is in the compressed register set (s0/fp, s1, a0-a5) +-spec is_compressed_reg(riscv_register()) -> boolean(). +is_compressed_reg(s0) -> true; +is_compressed_reg(fp) -> true; +is_compressed_reg(s1) -> true; +is_compressed_reg(a0) -> true; +is_compressed_reg(a1) -> true; +is_compressed_reg(a2) -> true; +is_compressed_reg(a3) -> true; +is_compressed_reg(a4) -> true; +is_compressed_reg(a5) -> true; +is_compressed_reg(_) -> false. + +%%----------------------------------------------------------------------------- +%% CR-type instruction encoding (Compressed Register format) +%%----------------------------------------------------------------------------- +%% CR format: funct4 (4) | rd/rs1 (5) | rs2 (5) | op (2) +%% Bits: 15-12 11-7 6-2 1-0 + +-spec encode_cr_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct4 :: 0..15 +) -> binary(). +encode_cr_type(Opcode, Rd, Rs2, Funct4) -> + RdNum = reg_to_num(Rd), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct4 bsl 12) bor + (RdNum bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CI-type instruction encoding (Compressed Immediate format) +%%----------------------------------------------------------------------------- +%% CI format: funct3 (3) | imm[5] (1) | rd/rs1 (5) | imm[4:0] (5) | op (2) +%% Bits: 15-13 12 11-7 6-2 1-0 + +-spec encode_ci_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ci_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Funct3 bsl 13) bor + (Imm5 bsl 12) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CSS-type instruction encoding (Compressed Stack Store format) +%%----------------------------------------------------------------------------- +%% CSS format: funct3 (3) | imm[5:0] (6) | rs2 (5) | op (2) +%% Bits: 15-13 12-7 6-2 1-0 + +-spec encode_css_type( + Opcode :: 0..3, + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_css_type(Opcode, Rs2, Imm, Funct3) -> + Rs2Num = reg_to_num(Rs2), + % Extract immediate bits (typically scaled for word access) + ImmMasked = Imm band 16#3F, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CIW-type instruction encoding (Compressed Wide Immediate format) +%%----------------------------------------------------------------------------- +%% CIW format: funct3 (3) | imm[7:0] (8) | rd' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 + +-spec encode_ciw_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ciw_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#FF, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CL-type instruction encoding (Compressed Load format) +%%----------------------------------------------------------------------------- +%% CL format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rd' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cl_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs1 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cl_type(Opcode, Rd, Rs1, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + Rs1Num = reg_to_c_num(Rs1), + % For LW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CS-type instruction encoding (Compressed Store format) +%%----------------------------------------------------------------------------- +%% CS format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rs2' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cs_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cs_type(Opcode, Rs1, Rs2, Imm, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + Rs2Num = reg_to_c_num(Rs2), + % For SW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CA-type instruction encoding (Compressed Arithmetic format) +%%----------------------------------------------------------------------------- +%% CA format: funct6 (6) | rd'/rs1' (3) | funct2 (2) | rs2' (3) | op (2) +%% Bits: 15-10 9-7 6-5 4-2 1-0 + +-spec encode_ca_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct2 :: 0..3, + Funct6 :: 0..63 +) -> binary(). +encode_ca_type(Opcode, Rd, Rs2, Funct2, Funct6) -> + RdNum = reg_to_c_num(Rd), + Rs2Num = reg_to_c_num(Rs2), + Instr = + (Funct6 bsl 10) bor + (RdNum bsl 7) bor + (Funct2 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CB-type instruction encoding (Compressed Branch format) +%%----------------------------------------------------------------------------- +%% CB format: funct3 (3) | offset (8) | rs1' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 +%% Offset encoding: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + +-spec encode_cb_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Offset :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cb_type(Opcode, Rs1, Offset, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + % Extract offset bits: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + OffsetMasked = Offset band 16#1FF, + Offset8 = (OffsetMasked bsr 8) band 1, + Offset4_3 = (OffsetMasked bsr 3) band 3, + Offset7_6 = (OffsetMasked bsr 6) band 3, + Offset2_1 = (OffsetMasked bsr 1) band 3, + Offset5 = (OffsetMasked bsr 5) band 1, + Instr = + (Funct3 bsl 13) bor + (Offset8 bsl 12) bor + (Offset4_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Offset7_6 bsl 5) bor + (Offset2_1 bsl 3) bor + (Offset5 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CJ-type instruction encoding (Compressed Jump format) +%%----------------------------------------------------------------------------- +%% CJ format: funct3 (3) | jump target (11) | op (2) +%% Bits: 15-13 12-2 1-0 +%% Target encoding: target[11|4|9:8|10|6|7|3:1|5] -> bits [12|11|10:9|8|7|6|5:3|2] + +-spec encode_cj_type(Opcode :: 0..3, Offset :: integer(), Funct3 :: 0..7) -> binary(). +encode_cj_type(Opcode, Offset, Funct3) -> + % Extract offset bits: offset[11|4|9:8|10|6|7|3:1|5] + OffsetMasked = Offset band 16#FFF, + Offset11 = (OffsetMasked bsr 11) band 1, + Offset4 = (OffsetMasked bsr 4) band 1, + Offset9_8 = (OffsetMasked bsr 8) band 3, + Offset10 = (OffsetMasked bsr 10) band 1, + Offset6 = (OffsetMasked bsr 6) band 1, + Offset7 = (OffsetMasked bsr 7) band 1, + Offset3_1 = (OffsetMasked bsr 1) band 7, + Offset5 = (OffsetMasked bsr 5) band 1, + OffsetBits = + (Offset11 bsl 10) bor + (Offset4 bsl 9) bor + (Offset9_8 bsl 7) bor + (Offset10 bsl 6) bor + (Offset6 bsl 5) bor + (Offset7 bsl 4) bor + (Offset3_1 bsl 1) bor + Offset5, + Instr = + (Funct3 bsl 13) bor + (OffsetBits bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical Instructions +%%----------------------------------------------------------------------------- + +%% C.ADD - Compressed Add +%% rd = rd + rs2 (both rd and rs2 are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), op=10 (0x2) +-spec c_add(riscv_register(), riscv_register()) -> binary(). +c_add(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#9). + +%% C.MV - Compressed Move (copy register) +%% rd = rs2 (both are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), op=10 (0x2) +-spec c_mv(riscv_register(), riscv_register()) -> binary(). +c_mv(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#8). + +%% C.SUB - Compressed Subtract +%% rd' = rd' - rs2' (both use 3-bit compressed register encoding) +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=00, op=01 (0x1) +-spec c_sub(riscv_register(), riscv_register()) -> binary(). +c_sub(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#0, 16#23). + +%% C.AND - Compressed Bitwise AND +%% rd' = rd' & rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=11, op=01 (0x1) +-spec c_and(riscv_register(), riscv_register()) -> binary(). +c_and(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#3, 16#23). + +%% C.OR - Compressed Bitwise OR +%% rd' = rd' | rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=10, op=01 (0x1) +-spec c_or(riscv_register(), riscv_register()) -> binary(). +c_or(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#2, 16#23). + +%% C.XOR - Compressed Bitwise XOR +%% rd' = rd' ^ rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=01, op=01 (0x1) +-spec c_xor(riscv_register(), riscv_register()) -> binary(). +c_xor(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#1, 16#23). + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate Instructions +%%----------------------------------------------------------------------------- + +%% C.ADDI - Compressed Add Immediate +%% rd = rd + imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=000, op=01 (0x1) +-spec c_addi(riscv_register(), integer()) -> binary(). +c_addi(Rd, Imm) when Imm >= -32, Imm =< 31, Rd =/= zero -> + encode_ci_type(16#1, Rd, Imm, 16#0); +c_addi(zero, _Imm) -> + error({invalid_compressed_instruction, c_addi, 'rd cannot be zero'}); +c_addi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ANDI - Compressed AND Immediate +%% rd' = rd' & imm (rd' uses 3-bit encoding, imm is 6-bit signed) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, imm[5]=bit12, funct2=10, imm[4:0]=bits 6:2, op=01 +-spec c_andi(riscv_register(), integer()) -> binary(). +c_andi(Rd, Imm) when Imm >= -32, Imm =< 31 -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (16#4 bsl 13) bor + (Imm5 bsl 12) bor + (16#2 bsl 10) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + 16#1, + <>; +c_andi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LI - Compressed Load Immediate +%% rd = imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=010, op=01 (0x1) +-spec c_li(riscv_register(), integer()) -> binary(). +c_li(Rd, Imm) when Imm >= -32, Imm =< 31 -> + encode_ci_type(16#1, Rd, Imm, 16#2); +c_li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LUI - Compressed Load Upper Immediate +%% rd = imm << 12 (rd is full 5-bit register, imm is 6-bit signed non-zero) +%% Format: CI-type +%% Encoding: funct3=011, op=01 (0x1) +-spec c_lui(riscv_register(), integer()) -> binary(). +c_lui(Rd, Imm) when Imm >= -32, Imm =< 31, Imm =/= 0, Rd =/= zero, Rd =/= sp -> + encode_ci_type(16#1, Rd, Imm, 16#3); +c_lui(Rd, _Imm) when Rd =:= zero; Rd =:= sp -> + error({invalid_compressed_instruction, c_lui, 'rd cannot be zero or sp'}); +c_lui(_Rd, 0) -> + error({invalid_compressed_instruction, c_lui, 'immediate cannot be zero'}); +c_lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ADDI16SP - Compressed Add Immediate to SP (scaled by 16) +%% sp = sp + imm (imm is 10-bit signed, must be multiple of 16, non-zero) +%% Format: CI-type (special encoding) +%% Encoding: funct3=011, rd/rs1=sp (x2), op=01 +-spec c_addi16sp(integer()) -> binary(). +c_addi16sp(Imm) when + Imm >= -512, Imm =< 496, (Imm rem 16) =:= 0, Imm =/= 0 +-> + % Immediate encoding: nzimm[9|4|6|8:7|5] -> bits [12|6|5|4:3|2] + ImmMasked = Imm band 16#3FF, + Imm9 = (ImmMasked bsr 9) band 1, + Imm4 = (ImmMasked bsr 4) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Imm8_7 = (ImmMasked bsr 7) band 3, + Imm5 = (ImmMasked bsr 5) band 1, + ImmBits = (Imm9 bsl 5) bor (Imm4 bsl 4) bor (Imm6 bsl 3) bor (Imm8_7 bsl 1) bor Imm5, + encode_ci_type(16#1, sp, ImmBits, 16#3); +c_addi16sp(0) -> + error({invalid_compressed_instruction, c_addi16sp, 'immediate cannot be zero'}); +c_addi16sp(Imm) when (Imm rem 16) =/= 0 -> + error({immediate_not_aligned, Imm, 16}); +c_addi16sp(Imm) -> + error({immediate_out_of_range, Imm, -512, 496}). + +%% C.ADDI4SPN - Compressed Add Immediate (scaled by 4) to SP, store in rd' +%% rd' = sp + imm (imm is 10-bit unsigned, must be multiple of 4, non-zero) +%% Format: CIW-type +%% Encoding: funct3=000, op=00 (0x0) +-spec c_addi4spn(riscv_register(), integer()) -> binary(). +c_addi4spn(Rd, Imm) when + Imm >= 4, Imm =< 1020, (Imm rem 4) =:= 0 +-> + % Immediate encoding: nzuimm[5:4|9:6|2|3] -> bits [12:11|10:7|6|5] + ImmMasked = Imm band 16#3FF, + Imm5_4 = (ImmMasked bsr 4) band 3, + Imm9_6 = (ImmMasked bsr 6) band 15, + Imm2 = (ImmMasked bsr 2) band 1, + Imm3 = (ImmMasked bsr 3) band 1, + ImmBits = (Imm5_4 bsl 6) bor (Imm9_6 bsl 2) bor (Imm2 bsl 1) bor Imm3, + encode_ciw_type(16#0, Rd, ImmBits, 16#0); +c_addi4spn(_Rd, Imm) when Imm =:= 0 -> + error({invalid_compressed_instruction, c_addi4spn, 'immediate cannot be zero'}); +c_addi4spn(_Rd, Imm) when (Imm rem 4) =/= 0 -> + error({immediate_not_aligned, Imm, 4}); +c_addi4spn(_Rd, Imm) -> + error({immediate_out_of_range, Imm, 4, 1020}). + +%%----------------------------------------------------------------------------- +%% C Extension - Shift Instructions +%%----------------------------------------------------------------------------- + +%% C.SLLI - Compressed Shift Left Logical Immediate +%% rd = rd << shamt (rd is full 5-bit register, shamt is 6-bit unsigned) +%% Format: CI-type +%% Encoding: funct3=000, op=10 (0x2) +-spec c_slli(riscv_register(), 0..63) -> binary(). +c_slli(Rd, Shamt) when Shamt >= 0, Shamt =< 63, Rd =/= zero -> + encode_ci_type(16#2, Rd, Shamt, 16#0); +c_slli(zero, _Shamt) -> + error({invalid_compressed_instruction, c_slli, 'rd cannot be zero'}); +c_slli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRLI - Compressed Shift Right Logical Immediate +%% rd' = rd' >> shamt (rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=00, shamt[4:0]=bits 6:2, op=01 +-spec c_srli(riscv_register(), 0..63) -> binary(). +c_srli(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#0 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRAI - Compressed Shift Right Arithmetic Immediate +%% rd' = rd' >> shamt (sign-extend, rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=01, shamt[4:0]=bits 6:2, op=01 +-spec c_srai(riscv_register(), 0..63) -> binary(). +c_srai(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#1 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srai(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store Instructions +%%----------------------------------------------------------------------------- + +%% C.LW - Compressed Load Word +%% rd' = mem[rs1' + offset] (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CL-type +%% Encoding: funct3=010, op=00 (0x0) +-spec c_lw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_lw(Rd, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cl_type(16#0, Rd, Rs1, Offset, 16#2); +c_lw(_Rd, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lw(_Rd, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.SW - Compressed Store Word +%% mem[rs1' + offset] = rs2' (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CS-type +%% Encoding: funct3=110, op=00 (0x0) +-spec c_sw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_sw(Rs2, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cs_type(16#0, Rs1, Rs2, Offset, 16#6); +c_sw(_Rs2, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_sw(_Rs2, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.LWSP - Compressed Load Word from Stack Pointer +%% rd = mem[sp + offset] (rd is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CI-type (special encoding) +%% Encoding: funct3=010, op=10 (0x2) +-spec c_lwsp(riscv_register(), integer()) -> binary(). +c_lwsp(Rd, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0, Rd =/= zero +-> + % Offset encoding: offset[5|4:2|7:6] -> bits [12|6:4|3:2] + OffsetMasked = Offset band 16#FF, + Offset5 = (OffsetMasked bsr 5) band 1, + Offset4_2 = (OffsetMasked bsr 2) band 7, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5 bsl 5) bor (Offset4_2 bsl 2) bor Offset7_6, + encode_ci_type(16#2, Rd, ImmBits, 16#2); +c_lwsp(zero, _Offset) -> + error({invalid_compressed_instruction, c_lwsp, 'rd cannot be zero'}); +c_lwsp(_Rd, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lwsp(_Rd, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%% C.SWSP - Compressed Store Word to Stack Pointer +%% mem[sp + offset] = rs2 (rs2 is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CSS-type +%% Encoding: funct3=110, op=10 (0x2) +-spec c_swsp(riscv_register(), integer()) -> binary(). +c_swsp(Rs2, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0 +-> + % Offset encoding: offset[5:2|7:6] -> bits [12:9|8:7] + OffsetMasked = Offset band 16#FF, + Offset5_2 = (OffsetMasked bsr 2) band 15, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5_2 bsl 2) bor Offset7_6, + encode_css_type(16#2, Rs2, ImmBits, 16#6); +c_swsp(_Rs2, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_swsp(_Rs2, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump Instructions +%%----------------------------------------------------------------------------- + +%% C.BEQZ - Compressed Branch if Equal to Zero +%% if (rs1' == 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=110, op=01 (0x1) +-spec c_beqz(riscv_register(), integer()) -> binary(). +c_beqz(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#6); +c_beqz(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_beqz(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.BNEZ - Compressed Branch if Not Equal to Zero +%% if (rs1' != 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=111, op=01 (0x1) +-spec c_bnez(riscv_register(), integer()) -> binary(). +c_bnez(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#7); +c_bnez(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_bnez(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.J - Compressed Unconditional Jump +%% pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=101, op=01 (0x1) +-spec c_j(integer()) -> binary(). +c_j(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#5); +c_j(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_j(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JAL - Compressed Jump and Link (RV32C only, rd is implicitly ra) +%% ra = pc + 2; pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=001 (0x1), op=01 (0x1) +-spec c_jal(integer()) -> binary(). +c_jal(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#1); +c_jal(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_jal(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JR - Compressed Jump Register +%% pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), rs2=x0, op=10 (0x2) +-spec c_jr(riscv_register()) -> binary(). +c_jr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#8); +c_jr(zero) -> + error({invalid_compressed_instruction, c_jr, 'rs1 cannot be zero'}). + +%% C.JALR - Compressed Jump and Link Register +%% ra = pc + 2; pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs2=x0, op=10 (0x2) +-spec c_jalr(riscv_register()) -> binary(). +c_jalr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#9); +c_jalr(zero) -> + error({invalid_compressed_instruction, c_jalr, 'rs1 cannot be zero'}). + +%% C.EBREAK - Compressed Environment Breakpoint +%% Causes a breakpoint exception to be raised +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs1/rd=x0, rs2=x0, op=10 (0x2) +-spec c_ebreak() -> binary(). +c_ebreak() -> + encode_cr_type(16#2, zero, zero, 16#9). + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instructions +%%----------------------------------------------------------------------------- + +%% C.NOP - Compressed No Operation +%% Expands to: c.addi x0, 0 +%% Format: CI-type +%% Encoding: funct3=000, rd/rs1=x0, imm=0, op=01 (0x1) +-spec c_nop() -> binary(). +c_nop() -> + encode_ci_type(16#1, zero, 0, 16#0). diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl index 94e4942db5..28a0f4fa58 100644 --- a/tests/libs/jit/jit_riscv32_asm_tests.erl +++ b/tests/libs/jit/jit_riscv32_asm_tests.erl @@ -36,10 +36,10 @@ add_test_() -> <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00a585b3:32/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) + <<16#95aa:16/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) ), ?_assertAsmEqual( - <<16#01e787b3:32/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) + <<16#97fa:16/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) ) ]. @@ -49,7 +49,7 @@ sub_test_() -> <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1) ), ?_assertAsmEqual( - <<16#40a585b3:32/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) + <<16#8d89:16/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) ), ?_assertAsmEqual( <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5) @@ -62,7 +62,7 @@ and_test_() -> <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00c5f5b3:32/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) + <<16#8df1:16/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) ) ]. @@ -72,7 +72,7 @@ or_test_() -> <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00c5e5b3:32/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) + <<16#8dd1:16/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) ) ]. @@ -82,7 +82,7 @@ xor_test_() -> <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1) ), ?_assertAsmEqual( - <<16#00c5c5b3:32/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) + <<16#8db1:16/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) ) ]. @@ -146,7 +146,7 @@ addi_test_() -> <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20) ), ?_assertAsmEqual( - <<16#fff58593:32/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) + <<16#15fd:16/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) ), ?_assertAsmEqual( <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047) @@ -162,7 +162,7 @@ andi_test_() -> <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255) ), ?_assertAsmEqual( - <<16#00f5f593:32/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) + <<16#89bd:16/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) ) ]. @@ -192,7 +192,7 @@ slli_test_() -> <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3) ), ?_assertAsmEqual( - <<16#01f59593:32/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) + <<16#05fe:16/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) ), ?_assertAsmEqual( <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0) @@ -205,7 +205,7 @@ srli_test_() -> <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3) ), ?_assertAsmEqual( - <<16#01f5d593:32/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) + <<16#81fd:16/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) ) ]. @@ -215,7 +215,7 @@ srai_test_() -> <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3) ), ?_assertAsmEqual( - <<16#41f5d593:32/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) + <<16#85fd:16/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) ) ]. @@ -245,9 +245,9 @@ sltiu_test_() -> lw_test_() -> [ - ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), - ?_assertAsmEqual(<<16#00052503:32/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), - ?_assertAsmEqual(<<16#00452583:32/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), + ?_assertAsmEqual(<<16#414c:16/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), ?_assertAsmEqual( <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4) ), @@ -294,9 +294,9 @@ lbu_test_() -> sw_test_() -> [ - ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), - ?_assertAsmEqual(<<16#00b52023:32/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), - ?_assertAsmEqual(<<16#00b52223:32/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), + ?_assertAsmEqual(<<16#c14c:16/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4)) ]. @@ -327,7 +327,7 @@ beq_test_() -> <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4) ), ?_assertAsmEqual( - <<16#00050063:32/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) + <<16#c101:16/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) ) ]. @@ -388,13 +388,13 @@ bgeu_test_() -> jal_test_() -> [ ?_assertAsmEqual( - <<16#008000ef:32/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) + <<16#2021:16/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) ), ?_assertAsmEqual( - <<16#ffdff0ef:32/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) + <<16#3ff5:16/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) ), ?_assertAsmEqual( - <<16#00000517:32/little, 16#000500e7:32/little>>, + <<16#00000517:32/little, 16#9502:16/little>>, "auipc a0, 0\njalr a0", jit_riscv32_asm:call(a0, 0) ), @@ -407,8 +407,8 @@ jal_test_() -> jalr_test_() -> [ - ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), - ?_assertAsmEqual(<<16#000500e7:32/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4)) ]. @@ -418,9 +418,9 @@ jalr_test_() -> lui_test_() -> [ - ?_assertAsmEqual(<<16#000125b7:32/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), - ?_assertAsmEqual(<<16#00001537:32/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), - ?_assertAsmEqual(<<16#fffff5b7:32/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) + ?_assertAsmEqual(<<16#65c9:16/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), + ?_assertAsmEqual(<<16#6505:16/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), + ?_assertAsmEqual(<<16#75fd:16/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) ]. auipc_test_() -> @@ -435,13 +435,14 @@ auipc_test_() -> nop_test_() -> [ - ?_assertAsmEqual(<<16#00000013:32/little>>, "nop", jit_riscv32_asm:nop()) + % We want a 4-byte NOP for padding, so use .option norvc to force non-compressed + ?_assertAsmEqual(<<16#00000013:32/little>>, ".option norvc\nnop", jit_riscv32_asm:nop()) ]. li_small_test_() -> [ - ?_assertAsmEqual(<<16#00a00513:32/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), - ?_assertAsmEqual(<<16#fff00513:32/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), + ?_assertAsmEqual(<<16#4529:16/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), + ?_assertAsmEqual(<<16#557d:16/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047)) ]. @@ -449,19 +450,19 @@ li_large_test_() -> [ % 0x12345 = 74565 - requires lui + addi ?_assertAsmEqual( - <<16#00012537:32/little, 16#34550513:32/little>>, + <<16#6549:16/little, 16#34550513:32/little>>, "lui a0, 0x12\naddi a0, a0, 0x345", jit_riscv32_asm:li(a0, 16#12345) ), % 0x80000000 = -2147483648 (minimum 32-bit signed) ?_assertAsmEqual( - <<16#800005b7:32/little, 16#00058593:32/little>>, - "lui a1, 0x80000\naddi a1, a1, 0", + <<16#800005b7:32/little, 16#0581:16/little>>, + "lui a1, 0x80000\nc.addi a1, 0", jit_riscv32_asm:li(a1, -16#80000000) ), % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed) ?_assertAsmEqual( - <<16#80000537:32/little, 16#fff50513:32/little>>, + <<16#80000537:32/little, 16#157d:16/little>>, "lui a0, 0x80000\naddi a0, a0, -1", jit_riscv32_asm:li(a0, 16#7FFFFFFF) ) @@ -469,8 +470,8 @@ li_large_test_() -> mv_test_() -> [ - ?_assertAsmEqual(<<16#00050513:32/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), - ?_assertAsmEqual(<<16#00058593:32/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) + ?_assertAsmEqual(<<16#852a:16/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), + ?_assertAsmEqual(<<16#85ae:16/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) ]. not_test_() -> @@ -488,22 +489,22 @@ neg_test_() -> j_test_() -> [ ?_assertAsmEqual( - <<16#0080006f:32/little>>, "j .+8", jit_riscv32_asm:j(8) + <<16#a021:16/little>>, "j .+8", jit_riscv32_asm:j(8) ), ?_assertAsmEqual( - <<16#ffdff06f:32/little>>, "j .-4", jit_riscv32_asm:j(-4) + <<16#bff5:16/little>>, "j .-4", jit_riscv32_asm:j(-4) ) ]. jr_test_() -> [ - ?_assertAsmEqual(<<16#00050067:32/little>>, "jr a0", jit_riscv32_asm:jr(a0)), - ?_assertAsmEqual(<<16#00028067:32/little>>, "jr t0", jit_riscv32_asm:jr(t0)) + ?_assertAsmEqual(<<16#8502:16/little>>, "jr a0", jit_riscv32_asm:jr(a0)), + ?_assertAsmEqual(<<16#8282:16/little>>, "jr t0", jit_riscv32_asm:jr(t0)) ]. ret_test_() -> [ - ?_assertAsmEqual(<<16#00008067:32/little>>, "ret", jit_riscv32_asm:ret()) + ?_assertAsmEqual(<<16#8082:16/little>>, "ret", jit_riscv32_asm:ret()) ]. %%----------------------------------------------------------------------------- @@ -530,24 +531,370 @@ mul_test_() -> %% System instruction tests %%----------------------------------------------------------------------------- -ebreak_test_() -> +c_ebreak_test_() -> [ ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:ebreak() + <<16#9002:16/little>>, "c.ebreak", jit_riscv32_asm:c_ebreak() ) ]. -bkpt_test_() -> +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical instruction tests +%%----------------------------------------------------------------------------- + +c_add_test_() -> + [ + ?_assertAsmEqual( + <<16#9532:16/little>>, "c.add a0, a2", jit_riscv32_asm:c_add(a0, a2) + ), + ?_assertAsmEqual( + <<16#95be:16/little>>, "c.add a1, a5", jit_riscv32_asm:c_add(a1, a5) + ), + ?_assertAsmEqual( + <<16#9522:16/little>>, "c.add a0, s0", jit_riscv32_asm:c_add(a0, s0) + ) + ]. + +c_mv_test_() -> + [ + ?_assertAsmEqual( + <<16#8532:16/little>>, "c.mv a0, a2", jit_riscv32_asm:c_mv(a0, a2) + ), + ?_assertAsmEqual( + <<16#85be:16/little>>, "c.mv a1, a5", jit_riscv32_asm:c_mv(a1, a5) + ), + ?_assertAsmEqual( + <<16#842a:16/little>>, "c.mv s0, a0", jit_riscv32_asm:c_mv(s0, a0) + ) + ]. + +c_sub_test_() -> + [ + ?_assertAsmEqual( + <<16#8d09:16/little>>, "c.sub a0, a0", jit_riscv32_asm:c_sub(a0, a0) + ), + ?_assertAsmEqual( + <<16#8d8d:16/little>>, "c.sub a1, a1", jit_riscv32_asm:c_sub(a1, a1) + ), + ?_assertAsmEqual( + <<16#8c0d:16/little>>, "c.sub s0, a1", jit_riscv32_asm:c_sub(s0, a1) + ) + ]. + +c_and_test_() -> + [ + ?_assertAsmEqual( + <<16#8d6d:16/little>>, "c.and a0, a1", jit_riscv32_asm:c_and(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fed:16/little>>, "c.and a5, a1", jit_riscv32_asm:c_and(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c6d:16/little>>, "c.and s0, a1", jit_riscv32_asm:c_and(s0, a1) + ) + ]. + +c_or_test_() -> + [ + ?_assertAsmEqual( + <<16#8d4d:16/little>>, "c.or a0, a1", jit_riscv32_asm:c_or(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fcd:16/little>>, "c.or a5, a1", jit_riscv32_asm:c_or(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c4d:16/little>>, "c.or s0, a1", jit_riscv32_asm:c_or(s0, a1) + ) + ]. + +c_xor_test_() -> + [ + ?_assertAsmEqual( + <<16#8d2d:16/little>>, "c.xor a0, a1", jit_riscv32_asm:c_xor(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fad:16/little>>, "c.xor a5, a1", jit_riscv32_asm:c_xor(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c2d:16/little>>, "c.xor s0, a1", jit_riscv32_asm:c_xor(s0, a1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate instruction tests +%%----------------------------------------------------------------------------- + +c_addi_test_() -> + [ + ?_assertAsmEqual( + <<16#0511:16/little>>, "c.addi a0, 4", jit_riscv32_asm:c_addi(a0, 4) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "c.addi a1, -1", jit_riscv32_asm:c_addi(a1, -1) + ), + ?_assertAsmEqual( + <<16#0541:16/little>>, "c.addi a0, 16", jit_riscv32_asm:c_addi(a0, 16) + ), + ?_assertAsmEqual( + <<16#1561:16/little>>, "c.addi a0, -8", jit_riscv32_asm:c_addi(a0, -8) + ) + ]. + +c_andi_test_() -> + [ + ?_assertAsmEqual( + <<16#8929:16/little>>, "c.andi a0, 10", jit_riscv32_asm:c_andi(a0, 10) + ), + ?_assertAsmEqual( + <<16#99fd:16/little>>, "c.andi a1, -1", jit_riscv32_asm:c_andi(a1, -1) + ), + ?_assertAsmEqual( + <<16#8941:16/little>>, "c.andi a0, 16", jit_riscv32_asm:c_andi(a0, 16) + ) + ]. + +c_li_test_() -> + [ + ?_assertAsmEqual( + <<16#4529:16/little>>, "c.li a0, 10", jit_riscv32_asm:c_li(a0, 10) + ), + ?_assertAsmEqual( + <<16#55fd:16/little>>, "c.li a1, -1", jit_riscv32_asm:c_li(a1, -1) + ), + ?_assertAsmEqual( + <<16#4505:16/little>>, "c.li a0, 1", jit_riscv32_asm:c_li(a0, 1) + ), + ?_assertAsmEqual( + <<16#5501:16/little>>, "c.li a0, -32", jit_riscv32_asm:c_li(a0, -32) + ) + ]. + +c_lui_test_() -> + [ + ?_assertAsmEqual( + <<16#6529:16/little>>, "c.lui a0, 10", jit_riscv32_asm:c_lui(a0, 10) + ), + ?_assertAsmEqual( + <<16#75fd:16/little>>, "c.lui a1, 0xfffff", jit_riscv32_asm:c_lui(a1, -1) + ), + ?_assertAsmEqual( + <<16#6505:16/little>>, "c.lui a0, 1", jit_riscv32_asm:c_lui(a0, 1) + ) + ]. + +c_addi16sp_test_() -> + [ + ?_assertAsmEqual( + <<16#6141:16/little>>, "c.addi16sp sp, 16", jit_riscv32_asm:c_addi16sp(16) + ), + ?_assertAsmEqual( + <<16#7101:16/little>>, "c.addi16sp sp, -512", jit_riscv32_asm:c_addi16sp(-512) + ), + ?_assertAsmEqual( + <<16#6161:16/little>>, "c.addi16sp sp, 80", jit_riscv32_asm:c_addi16sp(80) + ) + ]. + +c_addi4spn_test_() -> + [ + ?_assertAsmEqual( + <<16#0048:16/little>>, "c.addi4spn a0, sp, 4", jit_riscv32_asm:c_addi4spn(a0, 4) + ), + ?_assertAsmEqual( + <<16#1010:16/little>>, "c.addi4spn a2, sp, 32", jit_riscv32_asm:c_addi4spn(a2, 32) + ), + ?_assertAsmEqual( + <<16#1ffc:16/little>>, + "c.addi4spn a5, sp, 1020", + jit_riscv32_asm:c_addi4spn(a5, 1020) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Shift instruction tests +%%----------------------------------------------------------------------------- + +c_slli_test_() -> [ - % bkpt is an ARM compatibility wrapper that generates ebreak - % The immediate parameter is ignored ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(0) + <<16#050e:16/little>>, "c.slli a0, 3", jit_riscv32_asm:c_slli(a0, 3) ), ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(42) + <<16#05fe:16/little>>, "c.slli a1, 31", jit_riscv32_asm:c_slli(a1, 31) ), ?_assertAsmEqual( - <<16#00100073:32/little>>, "ebreak", jit_riscv32_asm:bkpt(255) + <<16#0542:16/little>>, "c.slli a0, 16", jit_riscv32_asm:c_slli(a0, 16) + ) + ]. + +c_srli_test_() -> + [ + ?_assertAsmEqual( + <<16#810d:16/little>>, "c.srli a0, 3", jit_riscv32_asm:c_srli(a0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "c.srli a1, 31", jit_riscv32_asm:c_srli(a1, 31) + ), + ?_assertAsmEqual( + <<16#8141:16/little>>, "c.srli a0, 16", jit_riscv32_asm:c_srli(a0, 16) + ) + ]. + +c_srai_test_() -> + [ + ?_assertAsmEqual( + <<16#850d:16/little>>, "c.srai a0, 3", jit_riscv32_asm:c_srai(a0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "c.srai a1, 31", jit_riscv32_asm:c_srai(a1, 31) + ), + ?_assertAsmEqual( + <<16#8541:16/little>>, "c.srai a0, 16", jit_riscv32_asm:c_srai(a0, 16) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store instruction tests +%%----------------------------------------------------------------------------- + +c_lw_test_() -> + [ + ?_assertAsmEqual( + <<16#4188:16/little>>, "c.lw a0, 0(a1)", jit_riscv32_asm:c_lw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#41d8:16/little>>, "c.lw a4, 4(a1)", jit_riscv32_asm:c_lw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#5ffc:16/little>>, "c.lw a5, 124(a5)", jit_riscv32_asm:c_lw(a5, {a5, 124}) + ) + ]. + +c_sw_test_() -> + [ + ?_assertAsmEqual( + <<16#c188:16/little>>, "c.sw a0, 0(a1)", jit_riscv32_asm:c_sw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#c1d8:16/little>>, "c.sw a4, 4(a1)", jit_riscv32_asm:c_sw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#dffc:16/little>>, "c.sw a5, 124(a5)", jit_riscv32_asm:c_sw(a5, {a5, 124}) + ) + ]. + +c_lwsp_test_() -> + [ + ?_assertAsmEqual( + <<16#4502:16/little>>, "c.lwsp a0, 0(sp)", jit_riscv32_asm:c_lwsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#4512:16/little>>, "c.lwsp a0, 4(sp)", jit_riscv32_asm:c_lwsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#50fe:16/little>>, "c.lwsp ra, 252(sp)", jit_riscv32_asm:c_lwsp(ra, 252) + ) + ]. + +c_swsp_test_() -> + [ + ?_assertAsmEqual( + <<16#c02a:16/little>>, "c.swsp a0, 0(sp)", jit_riscv32_asm:c_swsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#c22a:16/little>>, "c.swsp a0, 4(sp)", jit_riscv32_asm:c_swsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#dfe6:16/little>>, "c.swsp s9, 252(sp)", jit_riscv32_asm:c_swsp(s9, 252) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump instruction tests +%%----------------------------------------------------------------------------- + +c_beqz_test_() -> + [ + ?_assertAsmEqual( + <<16#c111:16/little>>, "c.beqz a0, .+4", jit_riscv32_asm:c_beqz(a0, 4) + ), + ?_assertAsmEqual( + <<16#dced:16/little>>, "c.beqz s1, .-6", jit_riscv32_asm:c_beqz(s1, -6) + ), + ?_assertAsmEqual( + <<16#c101:16/little>>, "c.beqz a0, .", jit_riscv32_asm:c_beqz(a0, 0) + ) + ]. + +c_bnez_test_() -> + [ + ?_assertAsmEqual( + <<16#e111:16/little>>, "c.bnez a0, .+4", jit_riscv32_asm:c_bnez(a0, 4) + ), + ?_assertAsmEqual( + <<16#fced:16/little>>, "c.bnez s1, .-6", jit_riscv32_asm:c_bnez(s1, -6) + ), + ?_assertAsmEqual( + <<16#e101:16/little>>, "c.bnez a0, .", jit_riscv32_asm:c_bnez(a0, 0) + ) + ]. + +c_j_test_() -> + [ + ?_assertAsmEqual( + <<16#a011:16/little>>, "c.j .+4", jit_riscv32_asm:c_j(4) + ), + ?_assertAsmEqual( + <<16#bfed:16/little>>, "c.j .-6", jit_riscv32_asm:c_j(-6) + ), + ?_assertAsmEqual( + <<16#a001:16/little>>, "c.j .", jit_riscv32_asm:c_j(0) + ) + ]. + +c_jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "c.jal .+8", jit_riscv32_asm:c_jal(8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "c.jal .-4", jit_riscv32_asm:c_jal(-4) + ), + ?_assertAsmEqual( + <<16#2001:16/little>>, "c.jal .", jit_riscv32_asm:c_jal(0) + ) + ]. + +c_jr_test_() -> + [ + ?_assertAsmEqual( + <<16#8502:16/little>>, "c.jr a0", jit_riscv32_asm:c_jr(a0) + ), + ?_assertAsmEqual( + <<16#8402:16/little>>, "c.jr s0", jit_riscv32_asm:c_jr(s0) + ), + ?_assertAsmEqual( + <<16#8082:16/little>>, "c.jr ra", jit_riscv32_asm:c_jr(ra) + ) + ]. + +c_jalr_test_() -> + [ + ?_assertAsmEqual( + <<16#9502:16/little>>, "c.jalr a0", jit_riscv32_asm:c_jalr(a0) + ), + ?_assertAsmEqual( + <<16#9402:16/little>>, "c.jalr s0", jit_riscv32_asm:c_jalr(s0) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +c_nop_test_() -> + [ + ?_assertAsmEqual( + <<16#0001:16/little>>, "c.nop", jit_riscv32_asm:c_nop() ) ]. diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index 475e96bd5d..bc31df75b8 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -41,19 +41,19 @@ call_primitive_0_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00062f83 lw t6,0(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16" + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -64,19 +64,19 @@ call_primitive_1_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00462f83 lw t6,4(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16" + " 0: 00462f83 lw t6,4(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -87,22 +87,22 @@ call_primitive_2_args_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00862f83 lw t6,8(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 02a00593 li a1,42\n" - " 1c: 02b00613 li a2,43\n" - " 20: 02c00693 li a3,44\n" - " 24: 000f80e7 jalr t6\n" - " 28: 00050f93 mv t6,a0\n" - " 2c: 00012083 lw ra,0(sp)\n" - " 30: 00412503 lw a0,4(sp)\n" - " 34: 00812583 lw a1,8(sp)\n" - " 38: 00c12603 lw a2,12(sp)\n" - " 3c: 01010113 addi sp,sp,16" + " 0: 00862f83 lw t6,8(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 02b00613 li a2,43\n" + " 16: 02c00693 li a3,44\n" + " 1a: 9f82 jalr t6\n" + " 1c: 8faa mv t6,a0\n" + " 1e: 4082 lw ra,0(sp)\n" + " 20: 4512 lw a0,4(sp)\n" + " 22: 45a2 lw a1,8(sp)\n" + " 24: 4632 lw a2,12(sp)\n" + " 26: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -112,11 +112,11 @@ call_primitive_5_args_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01462f83 lw t6,20(a2)\n" - " 4: 01000613 li a2,16\n" - " 8: 02000693 li a3,32\n" - " c: 00200713 li a4,2\n" - " 10: 000f8067 jr t6" + " 0: 01462f83 lw t6,20(a2)\n" + " 4: 4641 li a2,16\n" + " 6: 02000693 li a3,32\n" + " a: 4709 li a4,2\n" + " c: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -134,30 +134,30 @@ call_primitive_6_args_test() -> Stream = ?BACKEND:stream(State4), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 00300f13 li t5,3\n" - " 8: ffff4f13 not t5,t5\n" - " c: 01efffb3 and t6,t6,t5\n" - " 10: 01c52f03 lw t5,28(a0)\n" - " 14: 0b800e93 li t4,184\n" - " 18: 00ce8eb3 add t4,t4,a2\n" - " 1c: 000eae83 lw t4,0(t4)\n" - " 20: ff010113 addi sp,sp,-16\n" - " 24: 00112023 sw ra,0(sp)\n" - " 28: 00a12223 sw a0,4(sp)\n" - " 2c: 00b12423 sw a1,8(sp)\n" - " 30: 00c12623 sw a2,12(sp)\n" - " 34: 000f8613 mv a2,t6\n" - " 38: 04000693 li a3,64\n" - " 3c: 00800713 li a4,8\n" - " 40: 000f0793 mv a5,t5\n" - " 44: 000e80e7 jalr t4\n" - " 48: 00050e93 mv t4,a0\n" - " 4c: 00012083 lw ra,0(sp)\n" - " 50: 00412503 lw a0,4(sp)\n" - " 54: 00812583 lw a1,8(sp)\n" - " 58: 00c12603 lw a2,12(sp)\n" - " 5c: 01010113 addi sp,sp,16" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 4f0d li t5,3\n" + " 6: ffff4f13 not t5,t5\n" + " a: 01efffb3 and t6,t6,t5\n" + " e: 01c52f03 lw t5,28(a0)\n" + " 12: 0b800e93 li t4,184\n" + " 16: 9eb2 add t4,t4,a2\n" + " 18: 000eae83 lw t4,0(t4)\n" + " 1c: 1141 addi sp,sp,-16\n" + " 1e: c006 sw ra,0(sp)\n" + " 20: c22a sw a0,4(sp)\n" + " 22: c42e sw a1,8(sp)\n" + " 24: c632 sw a2,12(sp)\n" + " 26: 867e mv a2,t6\n" + " 28: 04000693 li a3,64\n" + " 2c: 4721 li a4,8\n" + " 2e: 87fa mv a5,t5\n" + " 30: 9e82 jalr t4\n" + " 32: 8eaa mv t4,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -174,72 +174,72 @@ call_primitive_extended_regs_test() -> ?BACKEND:assert_all_native_free(State6), Stream = ?BACKEND:stream(State6), Dump = << - " 0: 04862f83 lw t6,72(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 01300593 li a1,19\n" - " 1c: 000f80e7 jalr t6\n" - " 20: 00050f93 mv t6,a0\n" - " 24: 00012083 lw ra,0(sp)\n" - " 28: 00412503 lw a0,4(sp)\n" - " 2c: 00812583 lw a1,8(sp)\n" - " 30: 00c12603 lw a2,12(sp)\n" - " 34: 01010113 addi sp,sp,16\n" - " 38: 04862f03 lw t5,72(a2)\n" - " 3c: fe010113 addi sp,sp,-32\n" - " 40: 00112023 sw ra,0(sp)\n" - " 44: 00a12223 sw a0,4(sp)\n" - " 48: 00b12423 sw a1,8(sp)\n" - " 4c: 00c12623 sw a2,12(sp)\n" - " 50: 01f12823 sw t6,16(sp)\n" - " 54: 01400593 li a1,20\n" - " 58: 000f00e7 jalr t5\n" - " 5c: 00050f13 mv t5,a0\n" - " 60: 00012083 lw ra,0(sp)\n" - " 64: 00412503 lw a0,4(sp)\n" - " 68: 00812583 lw a1,8(sp)\n" - " 6c: 00c12603 lw a2,12(sp)\n" - " 70: 01012f83 lw t6,16(sp)\n" - " 74: 02010113 addi sp,sp,32\n" - " 78: 04862e83 lw t4,72(a2)\n" - " 7c: fe010113 addi sp,sp,-32\n" - " 80: 00112023 sw ra,0(sp)\n" - " 84: 00a12223 sw a0,4(sp)\n" - " 88: 00b12423 sw a1,8(sp)\n" - " 8c: 00c12623 sw a2,12(sp)\n" - " 90: 01e12823 sw t5,16(sp)\n" - " 94: 01f12a23 sw t6,20(sp)\n" - " 98: 01300593 li a1,19\n" - " 9c: 000e80e7 jalr t4\n" - " a0: 00050e93 mv t4,a0\n" - " a4: 00012083 lw ra,0(sp)\n" - " a8: 00412503 lw a0,4(sp)\n" - " ac: 00812583 lw a1,8(sp)\n" - " b0: 00c12603 lw a2,12(sp)\n" - " b4: 01012f03 lw t5,16(sp)\n" - " b8: 01412f83 lw t6,20(sp)\n" - " bc: 02010113 addi sp,sp,32\n" - " c0: 03462e03 lw t3,52(a2)\n" - " c4: fe010113 addi sp,sp,-32\n" - " c8: 00112023 sw ra,0(sp)\n" - " cc: 00a12223 sw a0,4(sp)\n" - " d0: 00b12423 sw a1,8(sp)\n" - " d4: 00c12623 sw a2,12(sp)\n" - " d8: 01d12823 sw t4,16(sp)\n" - " dc: 000fa583 lw a1,0(t6)\n" - " e0: 000f2603 lw a2,0(t5)\n" - " e4: 000e00e7 jalr t3\n" - " e8: 00050e13 mv t3,a0\n" - " ec: 00012083 lw ra,0(sp)\n" - " f0: 00412503 lw a0,4(sp)\n" - " f4: 00812583 lw a1,8(sp)\n" - " f8: 00c12603 lw a2,12(sp)\n" - " fc: 01012e83 lw t4,16(sp)\n" - " 100: 02010113 addi sp,sp,32\n" - " 104: 01cea023 sw t3,0(t4)" + " 0: 04862f83 lw t6,72(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 45cd li a1,19\n" + " 10: 9f82 jalr t6\n" + " 12: 8faa mv t6,a0\n" + " 14: 4082 lw ra,0(sp)\n" + " 16: 4512 lw a0,4(sp)\n" + " 18: 45a2 lw a1,8(sp)\n" + " 1a: 4632 lw a2,12(sp)\n" + " 1c: 0141 addi sp,sp,16\n" + " 1e: 04862f03 lw t5,72(a2)\n" + " 22: 1101 addi sp,sp,-32\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: c87e sw t6,16(sp)\n" + " 2e: 45d1 li a1,20\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 02010113 addi sp,sp,32\n" + " 42: 04862e83 lw t4,72(a2)\n" + " 46: 1101 addi sp,sp,-32\n" + " 48: c006 sw ra,0(sp)\n" + " 4a: c22a sw a0,4(sp)\n" + " 4c: c42e sw a1,8(sp)\n" + " 4e: c632 sw a2,12(sp)\n" + " 50: c87a sw t5,16(sp)\n" + " 52: ca7e sw t6,20(sp)\n" + " 54: 45cd li a1,19\n" + " 56: 9e82 jalr t4\n" + " 58: 8eaa mv t4,a0\n" + " 5a: 4082 lw ra,0(sp)\n" + " 5c: 4512 lw a0,4(sp)\n" + " 5e: 45a2 lw a1,8(sp)\n" + " 60: 4632 lw a2,12(sp)\n" + " 62: 4f42 lw t5,16(sp)\n" + " 64: 4fd2 lw t6,20(sp)\n" + " 66: 02010113 addi sp,sp,32\n" + " 6a: 03462e03 lw t3,52(a2)\n" + " 6e: 1101 addi sp,sp,-32\n" + " 70: c006 sw ra,0(sp)\n" + " 72: c22a sw a0,4(sp)\n" + " 74: c42e sw a1,8(sp)\n" + " 76: c632 sw a2,12(sp)\n" + " 78: c876 sw t4,16(sp)\n" + " 7a: 000fa583 lw a1,0(t6)\n" + " 7e: 000f2603 lw a2,0(t5)\n" + " 82: 9e02 jalr t3\n" + " 84: 8e2a mv t3,a0\n" + " 86: 4082 lw ra,0(sp)\n" + " 88: 4512 lw a0,4(sp)\n" + " 8a: 45a2 lw a1,8(sp)\n" + " 8c: 4632 lw a2,12(sp)\n" + " 8e: 4ec2 lw t4,16(sp)\n" + " 90: 02010113 addi sp,sp,32\n" + " 94: 01cea023 sw t3,0(t4)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -257,37 +257,37 @@ call_primitive_few_free_regs_test() -> ?BACKEND:assert_all_native_free(State7), Stream = ?BACKEND:stream(State7), Dump = << - " 0: 00100f93 li t6,1\n" - " 4: 00200f13 li t5,2\n" - " 8: 00300e93 li t4,3\n" - " c: 00400e13 li t3,4\n" - " 10: 00500393 li t2,5\n" - " 14: 0e400313 li t1,228\n" - " 18: 00c30333 add t1,t1,a2\n" - " 1c: 00032303 lw t1,0(t1)\n" - " 20: fe010113 addi sp,sp,-32\n" - " 24: 00112023 sw ra,0(sp)\n" - " 28: 00a12223 sw a0,4(sp)\n" - " 2c: 00b12423 sw a1,8(sp)\n" - " 30: 00c12623 sw a2,12(sp)\n" - " 34: 01d12823 sw t4,16(sp)\n" - " 38: 01e12a23 sw t5,20(sp)\n" - " 3c: 01f12c23 sw t6,24(sp)\n" - " 40: 000f0513 mv a0,t5\n" - " 44: 000f8593 mv a1,t6\n" - " 48: 000e0613 mv a2,t3\n" - " 4c: 000e8693 mv a3,t4\n" - " 50: 00038713 mv a4,t2\n" - " 54: 000300e7 jalr t1\n" - " 58: 00050313 mv t1,a0\n" - " 5c: 00012083 lw ra,0(sp)\n" - " 60: 00412503 lw a0,4(sp)\n" - " 64: 00812583 lw a1,8(sp)\n" - " 68: 00c12603 lw a2,12(sp)\n" - " 6c: 01012e83 lw t4,16(sp)\n" - " 70: 01412f03 lw t5,20(sp)\n" - " 74: 01812f83 lw t6,24(sp)\n" - " 78: 02010113 addi sp,sp,32" + " 0: 4f85 li t6,1\n" + " 2: 4f09 li t5,2\n" + " 4: 4e8d li t4,3\n" + " 6: 4e11 li t3,4\n" + " 8: 4395 li t2,5\n" + " a: 0e400313 li t1,228\n" + " e: 9332 add t1,t1,a2\n" + " 10: 00032303 lw t1,0(t1)\n" + " 14: 1101 addi sp,sp,-32\n" + " 16: c006 sw ra,0(sp)\n" + " 18: c22a sw a0,4(sp)\n" + " 1a: c42e sw a1,8(sp)\n" + " 1c: c632 sw a2,12(sp)\n" + " 1e: c876 sw t4,16(sp)\n" + " 20: ca7a sw t5,20(sp)\n" + " 22: cc7e sw t6,24(sp)\n" + " 24: 857a mv a0,t5\n" + " 26: 85fe mv a1,t6\n" + " 28: 8672 mv a2,t3\n" + " 2a: 86f6 mv a3,t4\n" + " 2c: 871e mv a4,t2\n" + " 2e: 9302 jalr t1\n" + " 30: 832a mv t1,a0\n" + " 32: 4082 lw ra,0(sp)\n" + " 34: 4512 lw a0,4(sp)\n" + " 36: 45a2 lw a1,8(sp)\n" + " 38: 4632 lw a2,12(sp)\n" + " 3a: 4ec2 lw t4,16(sp)\n" + " 3c: 4f52 lw t5,20(sp)\n" + " 3e: 4fe2 lw t6,24(sp)\n" + " 40: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -297,21 +297,22 @@ call_ext_only_test() -> State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 01062f83 lw t6,16(a2)\n" - " 28: 02800613 li a2,40\n" - " 2c: 00200693 li a3,2\n" - " 30: 00200713 li a4,2\n" - " 34: fff00793 li a5,-1\n" - " 38: 000f8067 jr t6" + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 57fd li a5,-1\n" + " 2e: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -323,12 +324,12 @@ call_primitive_last_5_args_test() -> ]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04c62f03 lw t5,76(a2)\n" - " 8: 00800613 li a2,8\n" - " c: 2cb00693 li a3,715\n" - " 10: 000f8713 mv a4,t6\n" - " 14: 000f0067 jr t5" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04c62f03 lw t5,76(a2)\n" + " 8: 4621 li a2,8\n" + " a: 2cb00693 li a3,715\n" + " e: 877e mv a4,t6\n" + " 10: 8f02 jr t5" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -338,21 +339,22 @@ call_ext_last_test() -> State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 01062f83 lw t6,16(a2)\n" - " 28: 02800613 li a2,40\n" - " 2c: 00200693 li a3,2\n" - " 30: 00200713 li a4,2\n" - " 34: 00a00793 li a5,10\n" - " 38: 000f8067 jr t6" + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 47a9 li a5,10\n" + " 2e: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -362,9 +364,9 @@ call_primitive_last_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00062f83 lw t6,0(a2)\n" - " 4: 02a00613 li a2,42\n" - " 8: 000f8067 jr t6" + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 02a00613 li a2,42\n" + " 8: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -386,22 +388,22 @@ return_if_not_equal_to_ctx_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 05462f83 lw t6,84(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16\n" - " 34: 00af8663 beq t6,a0,0x40\n" - " 38: 000f8513 mv a0,t6\n" - " 3c: 00008067 ret" + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 00af8463 beq t6,a0,0x24\n" + " 20: 857e mv a0,t6\n" + " 22: 8082 ret" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -418,23 +420,23 @@ return_if_not_equal_to_ctx_test_() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 05462f83 lw t6,84(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 000f80e7 jalr t6\n" - " 1c: 00050f93 mv t6,a0\n" - " 20: 00012083 lw ra,0(sp)\n" - " 24: 00412503 lw a0,4(sp)\n" - " 28: 00812583 lw a1,8(sp)\n" - " 2c: 00c12603 lw a2,12(sp)\n" - " 30: 01010113 addi sp,sp,16\n" - " 34: 000f8f13 mv t5,t6\n" - " 38: 00af0663 beq t5,a0,0x44\n" - " 3c: 000f0513 mv a0,t5\n" - " 40: 00008067 ret" + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 8f7e mv t5,t6\n" + " 1e: 00af0463 beq t5,a0,0x26\n" + " 22: 857a mv a0,t5\n" + " 24: 8082 ret" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -447,9 +449,9 @@ move_to_cp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 000f2f83 lw t6,0(t5)\n" - " 8: 05f52e23 sw t6,92(a0)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 05f52e23 sw t6,92(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -459,9 +461,9 @@ increment_sp_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f83 lw t6,20(a0)\n" - " 4: 01cf8f93 addi t6,t6,28\n" - " 8: 01f52a23 sw t6,20(a0)" + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 0ff1 addi t6,t6,28\n" + " 6: 01f52a23 sw t6,20(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -485,10 +487,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000fd463 bgez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000fd363 bgez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -503,10 +505,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01efd463 bge t6,t5,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -521,11 +523,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01dfd463 bge t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -541,12 +543,12 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 40000e93 li t4,1024\n" - " c: 01dfd463 bge t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2\n" - " 14: 0ec0006f j 0x100" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2\n" + " 12: a0fd j 0x100" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -561,10 +563,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -579,10 +581,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -597,11 +599,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: fff00e93 li t4,-1\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 5efd li t4,-1\n" + " a: 01df9363 bne t6,t4,0x10\n" + " e: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -616,10 +618,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -634,10 +636,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f9463 bnez t6,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -652,11 +654,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -671,11 +673,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -690,11 +692,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -711,12 +713,12 @@ if_block_test_() -> State2 = ?BACKEND:jump_to_offset(State1, 16#100), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 7cb00e93 li t4,1995\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 001f0f13 addi t5,t5,1\n" - " 14: 0ec0006f j 0x100" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 7cb00e93 li t4,1995\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f05 addi t5,t5,1\n" + " 12: a0fd j 0x100" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -730,11 +732,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df8463 beq t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -749,11 +751,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -768,11 +770,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -787,11 +789,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -806,11 +808,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02a00e93 li t4,42\n" - " c: 01df9463 bne t6,t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -825,11 +827,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ec463 bltz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -844,11 +846,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ec463 bltz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -863,11 +865,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ed463 bgez t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -882,11 +884,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01ff9e93 slli t4,t6,0x1f\n" - " c: 000ed463 bgez t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -901,11 +903,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 007ffe93 andi t4,t6,7\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -920,11 +922,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 005ffe93 andi t4,t6,5\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 005ffe93 andi t4,t6,5\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -939,11 +941,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 007ffe93 andi t4,t6,7\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -958,12 +960,12 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: ffffce93 not t4,t6\n" - " c: 01ce9e93 slli t4,t4,0x1c\n" - " 10: 000e8463 beqz t4,0x18\n" - " 14: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffce93 not t4,t6\n" + " c: 0ef2 slli t4,t4,0x1c\n" + " e: 000e8363 beqz t4,0x14\n" + " 12: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -978,12 +980,12 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: ffffcf93 not t6,t6\n" - " c: 01cf9f93 slli t6,t6,0x1c\n" - " 10: 000f8463 beqz t6,0x18\n" - " 14: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffcf93 not t6,t6\n" + " c: 0ff2 slli t6,t6,0x1c\n" + " e: 000f8363 beqz t6,0x14\n" + " 12: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -998,14 +1000,14 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 000f8e93 mv t4,t6\n" - " c: 03f00e13 li t3,63\n" - " 10: 01cefeb3 and t4,t4,t3\n" - " 14: 00800e13 li t3,8\n" - " 18: 01ce8463 beq t4,t3,0x20\n" - " 1c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 8efe mv t4,t6\n" + " a: 03f00e13 li t3,63\n" + " e: 01cefeb3 and t4,t4,t3\n" + " 12: 4e21 li t3,8\n" + " 14: 01ce8363 beq t4,t3,0x1a\n" + " 18: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -1020,10 +1022,10 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 01efd463 bge t6,t5,0x10\n" - " c: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -1044,13 +1046,13 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03f00e93 li t4,63\n" - " c: 01dfffb3 and t6,t6,t4\n" - " 10: 00800e93 li t4,8\n" - " 14: 01df8463 beq t6,t4,0x1c\n" - " 18: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03f00e93 li t4,63\n" + " c: 01dfffb3 and t6,t6,t4\n" + " 10: 4ea1 li t4,8\n" + " 12: 01df8363 beq t6,t4,0x18\n" + " 16: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB], ?BACKEND:used_regs(State1)) @@ -1066,11 +1068,11 @@ if_block_test_() -> ), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 003ffe93 andi t4,t6,3\n" - " c: 000e8463 beqz t4,0x14\n" - " 10: 002f0f13 addi t5,t5,2" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 003ffe93 andi t4,t6,3\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) @@ -1095,13 +1097,13 @@ if_else_block_test() -> Stream = ?BACKEND:stream(State3), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 03b00e93 li t4,59\n" - " c: 01df9663 bne t6,t4,0x18\n" - " 10: 002f0f13 addi t5,t5,2\n" - " 14: 0080006f j 0x1c\n" - " 18: 004f0f13 addi t5,t5,4" + "0: 01852f83 lw t6,24(a0)\n" + "4: 01c52f03 lw t5,28(a0)\n" + "8: 03b00e93 li t4,59\n" + "c: 01df9463 bne t6,t4,0x14\n" + "10: 0f09 addi t5,t5,2\n" + "12: a011 j 0x16\n" + "14: 0f11 addi t5,t5,4" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1114,8 +1116,8 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 003fdf93 srli t6,t6,0x3" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf93 srli t6,t6,0x3" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -1127,8 +1129,8 @@ shift_right_test_() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 003fdf13 srli t5,t6,0x3" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf13 srli t5,t6,0x3" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -1141,8 +1143,8 @@ shift_left_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 003f9f93 slli t6,t6,0x3" + "0: 01852f83 lw t6,24(a0)\n" + "4: 0f8e slli t6,t6,0x3" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1160,27 +1162,29 @@ call_only_or_schedule_next_and_label_relocation_test() -> Stream = ?BACKEND:stream(State8), Dump = << - " 0: 00000697 auipc a3,0x0\n" - " 4: 04c68067 jr 76(a3) # 0x4c\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 01068067 jr 16(a3) # 0x18\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 03468067 jr 52(a3) # 0x44\n" - " 18: 0085af83 lw t6,8(a1)\n" - " 1c: ffff8f93 addi t6,t6,-1\n" - " 20: 01f5a423 sw t6,8(a1)\n" - " 24: 000f8663 beqz t6,0x30\n" - " 28: 01c0006f j 0x44\n" - " 2c: 00000013 nop\n" - " 30: 00000f97 auipc t6,0x0\n" - " 34: 014f8f93 addi t6,t6,20 # 0x44\n" - " 38: 01f5a223 sw t6,4(a1)\n" - " 3c: 00862f83 lw t6,8(a2)\n" - " 40: 000f8067 jr t6\n" - " 44: 00062f83 lw t6,0(a2)\n" - " 48: 000f8067 jr t6\n" - " 4c: 00462f83 lw t6,4(a2)\n" - " 50: 000f8067 jr t6" + " 0: 00000697 auipc a3,0x0\n" + " 4: 04668067 jr 70(a3) # 0x46\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 03068067 jr 48(a3) # 0x40\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f8663 beqz t6,0x2e\n" + " 26: a829 j 0x40\n" + " 28: 0001 nop\n" + " 2a: 00000013 nop\n" + " 2e: 00000f97 auipc t6,0x0\n" + " 32: 0fc9 addi t6,t6,18 # 0x40\n" + " 34: 0001 nop\n" + " 36: 01f5a223 sw t6,4(a1)\n" + " 3a: 00862f83 lw t6,8(a2)\n" + " 3e: 8f82 jr t6\n" + " 40: 00062f83 lw t6,0(a2)\n" + " 44: 8f82 jr t6\n" + " 46: 00462f83 lw t6,4(a2)\n" + " 4a: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1207,26 +1211,28 @@ call_only_or_schedule_next_and_label_relocation_large_gap_test() -> State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), State8 = ?BACKEND:update_branches(State7), Stream = ?BACKEND:stream(State8), - % Extract the final section starting at 0x218 (after jump table 24 bytes + 128 loads 512 bytes) - % RISC-V: Jump table is 3×8=24 bytes, loads are 4 bytes each + % Extract the final section starting at 0x118 (after jump table 24 bytes + 128 loads 256 bytes) + % RISC-V: Jump table is 3×8=24 bytes, loads are 2 bytes each (compressed) Dump = << - " 218: 0085af83 lw t6,8(a1)\n" - " 21c: ffff8f93 addi t6,t6,-1\n" - " 220: 01f5a423 sw t6,8(a1)\n" - " 224: 000f8663 beqz t6,0x230\n" - " 228: 01c0006f j 0x244\n" - " 22c: 00000013 nop\n" - " 230: 00000f97 auipc t6,0x0\n" - " 234: 014f8f93 addi t6,t6,20 # 0x244\n" - " 238: 01f5a223 sw t6,4(a1)\n" - " 23c: 00862f83 lw t6,8(a2)\n" - " 240: 000f8067 jr t6\n" - " 244: 00062f83 lw t6,0(a2)\n" - " 248: 000f8067 jr t6\n" - " 24c: 00462f83 lw t6,4(a2)\n" - " 250: 000f8067 jr t6" + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f8663 beqz t6,0x16\n" + " e: a829 j 0x28\n" + " 10: 0001 nop\n" + " 12: 00000013 nop\n" + " 16: 00000f97 auipc t6,0x0\n" + " 1a: 0fc9 addi t6,t6,18 # 0x28\n" + " 1c: 0001 nop\n" + " 1e: 01f5a223 sw t6,4(a1)\n" + " 22: 00862f83 lw t6,8(a2)\n" + " 26: 8f82 jr t6\n" + " 28: 00062f83 lw t6,0(a2)\n" + " 2c: 8f82 jr t6\n" + " 2e: 00462f83 lw t6,4(a2)\n" + " 32: 8f82 jr t6" >>, - {_, RelevantBinary} = split_binary(Stream, 16#218), + {_, RelevantBinary} = split_binary(Stream, 16#118), ?assertEqual(dump_to_bin(Dump), RelevantBinary). call_bif_with_large_literal_integer_test() -> @@ -1245,59 +1251,59 @@ call_bif_with_large_literal_integer_test() -> Stream = ?BACKEND:stream(State6), Dump = << - " 0: 02062f83 lw t6,32(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 00058513 mv a0,a1\n" - " 1c: 00200593 li a1,2\n" - " 20: 000f80e7 jalr t6\n" - " 24: 00050f93 mv t6,a0\n" - " 28: 00012083 lw ra,0(sp)\n" - " 2c: 00412503 lw a0,4(sp)\n" - " 30: 00812583 lw a1,8(sp)\n" - " 34: 00c12603 lw a2,12(sp)\n" - " 38: 01010113 addi sp,sp,16\n" - " 3c: 03c62f03 lw t5,60(a2)\n" - " 40: fe010113 addi sp,sp,-32\n" - " 44: 00112023 sw ra,0(sp)\n" - " 48: 00a12223 sw a0,4(sp)\n" - " 4c: 00b12423 sw a1,8(sp)\n" - " 50: 00c12623 sw a2,12(sp)\n" - " 54: 01f12823 sw t6,16(sp)\n" - " 58: 3b7ff5b7 lui a1,0x3b7ff\n" - " 5c: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" - " 60: 000f00e7 jalr t5\n" - " 64: 00050f13 mv t5,a0\n" - " 68: 00012083 lw ra,0(sp)\n" - " 6c: 00412503 lw a0,4(sp)\n" - " 70: 00812583 lw a1,8(sp)\n" - " 74: 00c12603 lw a2,12(sp)\n" - " 78: 01012f83 lw t6,16(sp)\n" - " 7c: 02010113 addi sp,sp,32\n" - " 80: ff010113 addi sp,sp,-16\n" - " 84: 00112023 sw ra,0(sp)\n" - " 88: 00a12223 sw a0,4(sp)\n" - " 8c: 00b12423 sw a1,8(sp)\n" - " 90: 00c12623 sw a2,12(sp)\n" - " 94: 00000593 li a1,0\n" - " 98: 00100613 li a2,1\n" - " 9c: 01852683 lw a3,24(a0)\n" - " a0: 000f0713 mv a4,t5\n" - " a4: 000f80e7 jalr t6\n" - " a8: 00050f93 mv t6,a0\n" - " ac: 00012083 lw ra,0(sp)\n" - " b0: 00412503 lw a0,4(sp)\n" - " b4: 00812583 lw a1,8(sp)\n" - " b8: 00c12603 lw a2,12(sp)\n" - " bc: 01010113 addi sp,sp,16\n" - " c0: 000f9863 bnez t6,0xd0\n" - " c4: 01862f83 lw t6,24(a2)\n" - " c8: 0c800613 li a2,200\n" - " cc: 000f8067 jr t6\n" - " d0: 01f52c23 sw t6,24(a0)" + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 4589 li a1,2\n" + " 12: 9f82 jalr t6\n" + " 14: 8faa mv t6,a0\n" + " 16: 4082 lw ra,0(sp)\n" + " 18: 4512 lw a0,4(sp)\n" + " 1a: 45a2 lw a1,8(sp)\n" + " 1c: 4632 lw a2,12(sp)\n" + " 1e: 0141 addi sp,sp,16\n" + " 20: 03c62f03 lw t5,60(a2)\n" + " 24: 1101 addi sp,sp,-32\n" + " 26: c006 sw ra,0(sp)\n" + " 28: c22a sw a0,4(sp)\n" + " 2a: c42e sw a1,8(sp)\n" + " 2c: c632 sw a2,12(sp)\n" + " 2e: c87e sw t6,16(sp)\n" + " 30: 3b7ff5b7 lui a1,0x3b7ff\n" + " 34: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" + " 38: 9f02 jalr t5\n" + " 3a: 8f2a mv t5,a0\n" + " 3c: 4082 lw ra,0(sp)\n" + " 3e: 4512 lw a0,4(sp)\n" + " 40: 45a2 lw a1,8(sp)\n" + " 42: 4632 lw a2,12(sp)\n" + " 44: 4fc2 lw t6,16(sp)\n" + " 46: 02010113 addi sp,sp,32\n" + " 4a: 1141 addi sp,sp,-16\n" + " 4c: c006 sw ra,0(sp)\n" + " 4e: c22a sw a0,4(sp)\n" + " 50: c42e sw a1,8(sp)\n" + " 52: c632 sw a2,12(sp)\n" + " 54: 4581 li a1,0\n" + " 56: 4605 li a2,1\n" + " 58: 4d14 lw a3,24(a0)\n" + " 5a: 877a mv a4,t5\n" + " 5c: 9f82 jalr t6\n" + " 5e: 8faa mv t6,a0\n" + " 60: 4082 lw ra,0(sp)\n" + " 62: 4512 lw a0,4(sp)\n" + " 64: 45a2 lw a1,8(sp)\n" + " 66: 4632 lw a2,12(sp)\n" + " 68: 0141 addi sp,sp,16\n" + " 6a: 000f9763 bnez t6,0x78\n" + " 6e: 01862f83 lw t6,24(a2)\n" + " 72: 07200613 li a2,114\n" + " 76: 8f82 jr t6\n" + " 78: 01f52c23 sw t6,24(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1310,18 +1316,19 @@ get_list_test() -> State5 = ?BACKEND:free_native_registers(State4, [Reg]), ?BACKEND:assert_all_native_free(State5), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 00300f13 li t5,3\n" - " 8: ffff4f13 not t5,t5\n" - " c: 01efffb3 and t6,t6,t5\n" - " 10: 004fae83 lw t4,4(t6)\n" - " 14: 01452f03 lw t5,20(a0)\n" - " 18: 01df2223 sw t4,4(t5)\n" - " 1c: 000fae83 lw t4,0(t6)\n" - " 20: 01452f03 lw t5,20(a0)\n" - " 24: 01df2023 sw t4,0(t5)" - >>, + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 4f0d li t5,3\n" + "6: ffff4f13 not t5,t5\n" + "a: 01efffb3 and t6,t6,t5\n" + "e: 004fae83 lw t4,4(t6)\n" + "12: 01452f03 lw t5,20(a0)\n" + "16: 01df2223 sw t4,4(t5)\n" + "1a: 000fae83 lw t4,0(t6)\n" + "1e: 01452f03 lw t5,20(a0)\n" + "22: 01df2023 sw t4,0(t5)" + >>, ?assertEqual(dump_to_bin(Dump), Stream). is_integer_test() -> @@ -1352,29 +1359,32 @@ is_integer_test() -> State4 = ?BACKEND:add_label(State3, Label, 16#100), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 040f0463 beqz t5,0x54\n" - " 10: 000f8f13 mv t5,t6\n" - " 14: 00300e93 li t4,3\n" - " 18: 01df7f33 and t5,t5,t4\n" - " 1c: 00200e93 li t4,2\n" - " 20: 01df0663 beq t5,t4,0x2c\n" - " 24: 0dc0006f j 0x100\n" - " 28: 00000013 nop\n" - " 2c: 00300f13 li t5,3\n" - " 30: ffff4f13 not t5,t5\n" - " 34: 01efffb3 and t6,t6,t5\n" - " 38: 000faf83 lw t6,0(t6)\n" - " 3c: 03f00f13 li t5,63\n" - " 40: 01efffb3 and t6,t6,t5\n" - " 44: 00800f13 li t5,8\n" - " 48: 01ef8663 beq t6,t5,0x54\n" - " 4c: 0b40006f j 0x100\n" - " 50: 00000013 nop" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 020f0f63 beqz t5,0x48\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 03f00f13 li t5,63\n" + " 36: 01efffb3 and t6,t6,t5\n" + " 3a: 4f21 li t5,8\n" + " 3c: 01ef8663 beq t6,t5,0x48\n" + " 40: a0c1 j 0x100\n" + " 42: 0001 nop\n" + " 44: 00000013 nop" + >>, ?assertEqual(dump_to_bin(Dump), Stream). cond_jump_to_label(Cond, Label, MMod, MSt0) -> @@ -1411,34 +1421,37 @@ is_number_test() -> State4 = ?BACKEND:add_label(State3, Label, 16#100), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 01cf1f13 slli t5,t5,0x1c\n" - " c: 040f0e63 beqz t5,0x68\n" - " 10: 000f8f13 mv t5,t6\n" - " 14: 00300e93 li t4,3\n" - " 18: 01df7f33 and t5,t5,t4\n" - " 1c: 00200e93 li t4,2\n" - " 20: 01df0663 beq t5,t4,0x2c\n" - " 24: 0dc0006f j 0x100\n" - " 28: 00000013 nop\n" - " 2c: 00300f13 li t5,3\n" - " 30: ffff4f13 not t5,t5\n" - " 34: 01efffb3 and t6,t6,t5\n" - " 38: 000faf83 lw t6,0(t6)\n" - " 3c: 000f8f13 mv t5,t6\n" - " 40: 03f00e93 li t4,63\n" - " 44: 01df7f33 and t5,t5,t4\n" - " 48: 00800e93 li t4,8\n" - " 4c: 01df0e63 beq t5,t4,0x68\n" - " 50: 03f00f13 li t5,63\n" - " 54: 01efffb3 and t6,t6,t5\n" - " 58: 01800f13 li t5,24\n" - " 5c: 01ef8663 beq t6,t5,0x68\n" - " 60: 0a00006f j 0x100\n" - " 64: 00000013 nop" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 040f0763 beqz t5,0x58\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 8f7e mv t5,t6\n" + " 34: 03f00e93 li t4,63\n" + " 38: 01df7f33 and t5,t5,t4\n" + " 3c: 4ea1 li t4,8\n" + " 3e: 01df0d63 beq t5,t4,0x58\n" + " 42: 03f00f13 li t5,63\n" + " 46: 01efffb3 and t6,t6,t5\n" + " 4a: 4f61 li t5,24\n" + " 4c: 01ef8663 beq t6,t5,0x58\n" + " 50: a845 j 0x100\n" + " 52: 0001 nop\n" + " 54: 00000013 nop" + >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> @@ -1456,13 +1469,14 @@ is_boolean_test() -> State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8a63 beq t6,t5,0x1c\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8663 beq t6,t5,0x1c\n" - " 14: 0ec0006f j 0x100\n" - " 18: 00000013 nop" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: a0fd j 0x100\n" + " 14: 0001 nop\n" + " 16: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1480,15 +1494,16 @@ is_boolean_far_test() -> State4 = ?BACKEND:add_label(State3, Label, 16#1000), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8a63 beq t6,t5,0x1c\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8663 beq t6,t5,0x1c\n" - " 14: 7ed0006f j 0x1000\n" - " 18: 00000013 nop" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 7ef0006f j 0x1000\n" + " 16: 00000013 nop" + >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_known_test() -> @@ -1505,15 +1520,16 @@ is_boolean_far_known_test() -> ?BACKEND:assert_all_native_free(State4), State5 = ?BACKEND:update_branches(State4), Stream = ?BACKEND:stream(State5), - Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8a63 beq t6,t5,0x1c\n" - " c: 00b00f13 li t5,11\n" - " 10: 01ef8663 beq t6,t5,0x1c\n" - " 14: 00001f17 auipc t5,0x1\n" - " 18: fecf0067 jr -20(t5) # 0x1000" - >>, + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 00001f17 auipc t5,0x1\n" + " 16: feef0067 jr -18(t5) # 0x1000" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point @@ -1542,53 +1558,55 @@ wait_timeout_test() -> State10 = ?BACKEND:update_branches(State9), Stream = ?BACKEND:stream(State10), - Dump = << - " 0: 00000f97 auipc t6,0x0\n" - " 4: 024f8f93 addi t6,t6,36\n" - " 8: 01f5a223 sw t6,4(a1)\n" - " c: 00001fb7 lui t6,0x1\n" - " 10: 388f8f93 addi t6,t6,904\n" - " 14: 07862f03 lw t5,120(a2)\n" - " 18: 000f8613 mv a2,t6\n" - " 1c: 02a00693 li a3,42\n" - " 20: 000f0067 jr t5\n" - " 24: 05462f83 lw t6,84(a2)\n" - " 28: ff010113 addi sp,sp,-16\n" - " 2c: 00112023 sw ra,0(sp)\n" - " 30: 00a12223 sw a0,4(sp)\n" - " 34: 00b12423 sw a1,8(sp)\n" - " 38: 00c12623 sw a2,12(sp)\n" - " 3c: 000f80e7 jalr t6\n" - " 40: 00050f93 mv t6,a0\n" - " 44: 00012083 lw ra,0(sp)\n" - " 48: 00412503 lw a0,4(sp)\n" - " 4c: 00812583 lw a1,8(sp)\n" - " 50: 00c12603 lw a2,12(sp)\n" - " 54: 01010113 addi sp,sp,16\n" - " 58: 00af8663 beq t6,a0,0x60\n" - " 5c: 000f8513 mv a0,t6\n" - " 60: 00008067 ret\n" - " 64: 08400f93 li t6,132\n" - " 68: 00cf8fb3 add t6,t6,a2\n" - " 6c: 000faf83 lw t6,0(t6)\n" - " 70: ff010113 addi sp,sp,-16\n" - " 74: 00112023 sw ra,0(sp)\n" - " 78: 00a12223 sw a0,4(sp)\n" - " 7c: 00b12423 sw a1,8(sp)\n" - " 80: 00c12623 sw a2,12(sp)\n" - " 84: 00200593 li a1,2\n" - " 88: 000f80e7 jalr t6\n" - " 8c: 00050f93 mv t6,a0\n" - " 90: 00012083 lw ra,0(sp)\n" - " 94: 00412503 lw a0,4(sp)\n" - " 98: 00812583 lw a1,8(sp)\n" - " 9c: 00c12603 lw a2,12(sp)\n" - " a0: 01010113 addi sp,sp,16\n" - " a4: 000f9863 bnez t6,0xb4\n" - " a8: 07c62f83 lw t6,124(a2)\n" - " ac: 02a00613 li a2,42\n" - " b0: 000f8067 jr t6" - >>, + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 0ff9 addi t6,t6,30 # 0x1e\n" + " 6: 0001 nop\n" + " 8: 01f5a223 sw t6,4(a1)\n" + " c: 6f85 lui t6,0x1\n" + " e: 388f8f93 addi t6,t6,904 # 0x1388\n" + " 12: 07862f03 lw t5,120(a2)\n" + " 16: 867e mv a2,t6\n" + " 18: 02a00693 li a3,42\n" + " 1c: 8f02 jr t5\n" + " 1e: 05462f83 lw t6,84(a2)\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 9f82 jalr t6\n" + " 2e: 8faa mv t6,a0\n" + " 30: 4082 lw ra,0(sp)\n" + " 32: 4512 lw a0,4(sp)\n" + " 34: 45a2 lw a1,8(sp)\n" + " 36: 4632 lw a2,12(sp)\n" + " 38: 0141 addi sp,sp,16\n" + " 3a: 00af8463 beq t6,a0,0x42\n" + " 3e: 857e mv a0,t6\n" + " 40: 8082 ret\n" + " 42: 08400f93 li t6,132\n" + " 46: 9fb2 add t6,t6,a2\n" + " 48: 000faf83 lw t6,0(t6)\n" + " 4c: 1141 addi sp,sp,-16\n" + " 4e: c006 sw ra,0(sp)\n" + " 50: c22a sw a0,4(sp)\n" + " 52: c42e sw a1,8(sp)\n" + " 54: c632 sw a2,12(sp)\n" + " 56: 4589 li a1,2\n" + " 58: 9f82 jalr t6\n" + " 5a: 8faa mv t6,a0\n" + " 5c: 4082 lw ra,0(sp)\n" + " 5e: 4512 lw a0,4(sp)\n" + " 60: 45a2 lw a1,8(sp)\n" + " 62: 4632 lw a2,12(sp)\n" + " 64: 0141 addi sp,sp,16\n" + " 66: 000f9763 bnez t6,0x74\n" + " 6a: 07c62f83 lw t6,124(a2)\n" + " 6e: 02a00613 li a2,42\n" + " 72: 8f82 jr t6" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test OP_WAIT pattern that uses set_continuation_to_label @@ -1602,25 +1620,26 @@ wait_test() -> State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), Stream = ?BACKEND:stream(State4), - Dump = << - " 0: 00000697 auipc a3,0x0\n" - " 4: 00068067 jr a3\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 00068067 jr a3\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 00068067 jr a3\n" - " 18: 00000697 auipc a3,0x0\n" - " 1c: 00068067 jr a3\n" - " 20: 00000697 auipc a3,0x0\n" - " 24: 00068067 jr a3\n" - " 28: 00000697 auipc a3,0x0\n" - " 2c: 00068067 jr a3\n" - " 30: 00000f97 auipc t6,0x0\n" - " 34: 004f8f93 addi t6,t6,4\n" - " 38: 01f5a223 sw t6,4(a1)\n" - " 3c: 07462f83 lw t6,116(a2)\n" - " 40: 000f8067 jr t6" - >>, + Dump = + << + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 6: ffffffff .insn 4, 0xffffffff\n" + " a: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 12: ffffffff .insn 4, 0xffffffff\n" + " 16: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 1e: ffffffff .insn 4, 0xffffffff\n" + " 22: ffffffff .insn 4, 0xffffffff\n" + " 24: ffffffff .insn 4, 0xffffffff\n" + " 28: ffffffff .insn 4, 0xffffffff\n" + " 2c: 01f5a223 sw t6,4(a1)\n" + " 30: 07462f83 lw t6,116(a2)\n" + " 34: 8f82 jr t6" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test return_labels_and_lines/2 function @@ -1638,31 +1657,32 @@ return_labels_and_lines_test() -> Stream = ?BACKEND:stream(State3), % Should have generated auipc + addi + ret + labels table + lines table - % auipc = 4 bytes, addi = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes - % Total minimum: 36 bytes - ?assert(byte_size(Stream) >= 36), + % auipc = 4 bytes, addi = 2 bytes (compressed), ret = 2 bytes, labels table = 3*2*2 = 12 bytes, lines table = 3*2*2 = 12 bytes + % Total: 4 + 2 + 2 + 12 + 12 = 32 bytes + ?assert(byte_size(Stream) >= 32), - % Expected: auipc a0, 0 + addi a0, a0, 12 + ret + labels table + lines table - % The data tables start at offset 0xC (12), so we load PC + 12 into a0 - Dump = << - " 0: 00000517 auipc a0,0x0\n" - " 4: 00c50513 addi a0,a0,12\n" - " 8: 00008067 ret\n" - " c: 0200 addi s0,sp,256\n" - " e: 0100 addi s0,sp,128\n" - " 10: 0000 unimp\n" - " 12: 1000 addi s0,sp,32\n" - " 14: 0200 addi s0,sp,256\n" - " 16: 0000 unimp\n" - " 18: 2000 fld fs0,0(s0)\n" - " 1a: 0200 addi s0,sp,256\n" - " 1c: 0a00 addi s0,sp,272\n" - " 1e: 0000 unimp\n" - " 20: 1000 addi s0,sp,32\n" - " 22: 1400 addi s0,sp,544\n" - " 24: 0000 unimp\n" - " 26: 2000 fld fs0,0(s0)" - >>, + % Expected: auipc a0, 0 + addi a0, a0, 10 + ret + padding + labels table + lines table + % The data tables start at offset 0xa (10) because of alignment padding + Dump = + << + " 0: 00000517 auipc a0,0x0\n" + " 4: 0529 addi a0,a0,10 # 0xa\n" + " 6: 8082 ret\n" + " 8: 0200ffff .insn 4, 0x0200ffff\n" + " c: 0100 addi s0,sp,128\n" + " e: 0000 unimp\n" + " 10: 1000 addi s0,sp,32\n" + " 12: 0200 addi s0,sp,256\n" + " 14: 0000 unimp\n" + " 16: 2000 fld fs0,0(s0)\n" + " 18: 0200 addi s0,sp,256\n" + " 1a: 0a00 addi s0,sp,272\n" + " 1c: 0000 unimp\n" + " 1e: 1000 addi s0,sp,32\n" + " 20: 1400 addi s0,sp,544\n" + " 22: 0000 unimp\n" + " 24: 2000 fld fs0,0(s0)" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test call_primitive with {free, {x_reg, X}} @@ -1674,40 +1694,41 @@ gc_bif2_test() -> ]), Stream = ?BACKEND:stream(State2), - Dump = << - " 0: 02062f83 lw t6,32(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 00058513 mv a0,a1\n" - " 1c: 02a00593 li a1,42\n" - " 20: 000f80e7 jalr t6\n" - " 24: 00050f93 mv t6,a0\n" - " 28: 00012083 lw ra,0(sp)\n" - " 2c: 00412503 lw a0,4(sp)\n" - " 30: 00812583 lw a1,8(sp)\n" - " 34: 00c12603 lw a2,12(sp)\n" - " 38: 01010113 addi sp,sp,16\n" - " 3c: ff010113 addi sp,sp,-16\n" - " 40: 00112023 sw ra,0(sp)\n" - " 44: 00a12223 sw a0,4(sp)\n" - " 48: 00b12423 sw a1,8(sp)\n" - " 4c: 00c12623 sw a2,12(sp)\n" - " 50: 00000593 li a1,0\n" - " 54: 00300613 li a2,3\n" - " 58: 01452f03 lw t5,20(a0)\n" - " 5c: 000f2683 lw a3,0(t5)\n" - " 60: 01852703 lw a4,24(a0)\n" - " 64: 000f80e7 jalr t6\n" - " 68: 00050f93 mv t6,a0\n" - " 6c: 00012083 lw ra,0(sp)\n" - " 70: 00412503 lw a0,4(sp)\n" - " 74: 00812583 lw a1,8(sp)\n" - " 78: 00c12603 lw a2,12(sp)\n" - " 7c: 01010113 addi sp,sp,16" - >>, + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 02a00593 li a1,42\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 4581 li a1,0\n" + " 2e: 460d li a2,3\n" + " 30: 01452f03 lw t5,20(a0)\n" + " 34: 000f2683 lw a3,0(t5)\n" + " 38: 4d18 lw a4,24(a0)\n" + " 3a: 9f82 jalr t6\n" + " 3c: 8faa mv t6,a0\n" + " 3e: 4082 lw ra,0(sp)\n" + " 40: 4512 lw a0,4(sp)\n" + " 42: 45a2 lw a1,8(sp)\n" + " 44: 4632 lw a2,12(sp)\n" + " 46: 0141 addi sp,sp,16" + >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test case where parameter value is in a1 @@ -1718,27 +1739,28 @@ memory_ensure_free_with_roots_test() -> ]), Stream = ?BACKEND:stream(State1), - Dump = << - " 0: 0b000f93 li t6,176\n" - " 4: 00cf8fb3 add t6,t6,a2\n" - " 8: 000faf83 lw t6,0(t6)\n" - " c: ff010113 addi sp,sp,-16\n" - " 10: 00112023 sw ra,0(sp)\n" - " 14: 00a12223 sw a0,4(sp)\n" - " 18: 00b12423 sw a1,8(sp)\n" - " 1c: 00c12623 sw a2,12(sp)\n" - " 20: 00058f13 mv t5,a1\n" - " 24: 000f0613 mv a2,t5\n" - " 28: 00400693 li a3,4\n" - " 2c: 00100713 li a4,1\n" - " 30: 000f80e7 jalr t6\n" - " 34: 00050f93 mv t6,a0\n" - " 38: 00012083 lw ra,0(sp)\n" - " 3c: 00412503 lw a0,4(sp)\n" - " 40: 00812583 lw a1,8(sp)\n" - " 44: 00c12603 lw a2,12(sp)\n" - " 48: 01010113 addi sp,sp,16" - >>, + Dump = + << + " 0: 0b000f93 li t6,176\n" + " 4: 9fb2 add t6,t6,a2\n" + " 6: 000faf83 lw t6,0(t6)\n" + " a: 1141 addi sp,sp,-16\n" + " c: c006 sw ra,0(sp)\n" + " e: c22a sw a0,4(sp)\n" + " 10: c42e sw a1,8(sp)\n" + " 12: c632 sw a2,12(sp)\n" + " 14: 8f2e mv t5,a1\n" + " 16: 867a mv a2,t5\n" + " 18: 4691 li a3,4\n" + " 1a: 4705 li a4,1\n" + " 1c: 9f82 jalr t6\n" + " 1e: 8faa mv t6,a0\n" + " 20: 4082 lw ra,0(sp)\n" + " 22: 4512 lw a0,4(sp)\n" + " 24: 45a2 lw a1,8(sp)\n" + " 26: 4632 lw a2,12(sp)\n" + " 28: 0141 addi sp,sp,16" + >>, ?assertEqual(dump_to_bin(Dump), Stream). call_ext_test() -> @@ -1747,29 +1769,31 @@ call_ext_test() -> State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), ?BACKEND:assert_all_native_free(State2), Stream = ?BACKEND:stream(State2), - Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 0005af03 lw t5,0(a1)\n" - " 28: 000f2f03 lw t5,0(t5)\n" - " 2c: 018f1f13 slli t5,t5,0x18\n" - " 30: 15000f93 li t6,336\n" - " 34: 00000013 nop\n" - " 38: 01ff6f33 or t5,t5,t6\n" - " 3c: 05e52e23 sw t5,92(a0)\n" - " 40: 01062f83 lw t6,16(a2)\n" - " 44: 00200613 li a2,2\n" - " 48: 00500693 li a3,5\n" - " 4c: fff00713 li a4,-1\n" - " 50: 000f8067 jr t6" - >>, + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 0005af03 lw t5,0(a1)\n" + " 24: 000f2f03 lw t5,0(t5)\n" + " 28: 0f62 slli t5,t5,0x18\n" + " 2a: 11800f93 li t6,280\n" + " 2e: 00000013 nop\n" + " 32: 01ff6f33 or t5,t5,t6\n" + " 36: 05e52e23 sw t5,92(a0)\n" + " 3a: 01062f83 lw t6,16(a2)\n" + " 3e: 4609 li a2,2\n" + " 40: 4695 li a3,5\n" + " 42: 577d li a4,-1\n" + " 44: 8f82 jr t6" + >>, ?assertEqual(dump_to_bin(Dump), Stream). call_fun_test() -> @@ -1801,56 +1825,58 @@ call_fun_test() -> ]), ?BACKEND:assert_all_native_free(State9), Stream = ?BACKEND:stream(State9), - Dump = << - " 0: 0085af83 lw t6,8(a1)\n" - " 4: ffff8f93 addi t6,t6,-1\n" - " 8: 01f5a423 sw t6,8(a1)\n" - " c: 000f9c63 bnez t6,0x24\n" - " 10: 00000f97 auipc t6,0x0\n" - " 14: 014f8f93 addi t6,t6,20 # 0x24\n" - " 18: 01f5a223 sw t6,4(a1)\n" - " 1c: 00862f83 lw t6,8(a2)\n" - " 20: 000f8067 jr t6\n" - " 24: 01852f83 lw t6,24(a0)\n" - " 28: 000f8f13 mv t5,t6\n" - " 2c: 000f0e93 mv t4,t5\n" - " 30: 00300e13 li t3,3\n" - " 34: 01cefeb3 and t4,t4,t3\n" - " 38: 00200e13 li t3,2\n" - " 3c: 01ce8c63 beq t4,t3,0x54\n" - " 40: 04c62f83 lw t6,76(a2)\n" - " 44: 04400613 li a2,68\n" - " 48: 18b00693 li a3,395\n" - " 4c: 000f0713 mv a4,t5\n" - " 50: 000f8067 jr t6\n" - " 54: 00300e93 li t4,3\n" - " 58: fffece93 not t4,t4\n" - " 5c: 01df7f33 and t5,t5,t4\n" - " 60: 000f2f03 lw t5,0(t5)\n" - " 64: 000f0e93 mv t4,t5\n" - " 68: 03f00e13 li t3,63\n" - " 6c: 01cefeb3 and t4,t4,t3\n" - " 70: 01400e13 li t3,20\n" - " 74: 01ce8c63 beq t4,t3,0x8c\n" - " 78: 04c62f83 lw t6,76(a2)\n" - " 7c: 07c00613 li a2,124\n" - " 80: 18b00693 li a3,395\n" - " 84: 000f0713 mv a4,t5\n" - " 88: 000f8067 jr t6\n" - " 8c: 0005ae83 lw t4,0(a1)\n" - " 90: 000eae83 lw t4,0(t4)\n" - " 94: 018e9e93 slli t4,t4,0x18\n" - " 98: 30000f13 li t5,768\n" - " 9c: 00000013 nop\n" - " a0: 01eeeeb3 or t4,t4,t5\n" - " a4: 05d52e23 sw t4,92(a0)\n" - " a8: 08000f13 li t5,128\n" - " ac: 00cf0f33 add t5,t5,a2\n" - " b0: 000f2f03 lw t5,0(t5)\n" - " b4: 000f8613 mv a2,t6\n" - " b8: 00000693 li a3,0\n" - " bc: 000f0067 jr t5" - >>, + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01852f83 lw t6,24(a0)\n" + " 24: 8f7e mv t5,t6\n" + " 26: 8efa mv t4,t5\n" + " 28: 4e0d li t3,3\n" + " 2a: 01cefeb3 and t4,t4,t3\n" + " 2e: 4e09 li t3,2\n" + " 30: 01ce8a63 beq t4,t3,0x44\n" + " 34: 04c62f83 lw t6,76(a2)\n" + " 38: 03800613 li a2,56\n" + " 3c: 18b00693 li a3,395\n" + " 40: 877a mv a4,t5\n" + " 42: 8f82 jr t6\n" + " 44: 4e8d li t4,3\n" + " 46: fffece93 not t4,t4\n" + " 4a: 01df7f33 and t5,t5,t4\n" + " 4e: 000f2f03 lw t5,0(t5)\n" + " 52: 8efa mv t4,t5\n" + " 54: 03f00e13 li t3,63\n" + " 58: 01cefeb3 and t4,t4,t3\n" + " 5c: 4e51 li t3,20\n" + " 5e: 01ce8a63 beq t4,t3,0x72\n" + " 62: 04c62f83 lw t6,76(a2)\n" + " 66: 06600613 li a2,102\n" + " 6a: 18b00693 li a3,395\n" + " 6e: 877a mv a4,t5\n" + " 70: 8f82 jr t6\n" + " 72: 0005ae83 lw t4,0(a1)\n" + " 76: 000eae83 lw t4,0(t4)\n" + " 7a: 0ee2 slli t4,t4,0x18\n" + " 7c: 27000f13 li t5,624\n" + " 80: 00000013 nop\n" + " 84: 01eeeeb3 or t4,t4,t5\n" + " 88: 05d52e23 sw t4,92(a0)\n" + " 8c: 08000f13 li t5,128\n" + " 90: 9f32 add t5,t5,a2\n" + " 92: 000f2f03 lw t5,0(t5)\n" + " 96: 867e mv a2,t6\n" + " 98: 4681 li a3,0\n" + " 9a: 8f02 jr t5" + >>, ?assertEqual(dump_to_bin(Dump), Stream). move_to_vm_register_test0(State, Source, Dest, Dump) -> @@ -1868,252 +1894,252 @@ move_to_vm_register_test_() -> [ ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, 0}, << - " 0: 00000f93 li t6,0\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 4f81 li t6,0\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {x_reg, extra}, << - " 0: 00000f93 li t6,0\n" - " 4: 05f52c23 sw t6,88(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 4f81 li t6,0\n" + " 2: 05f52c23 sw t6,88(a0)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {ptr, t5}, << - " 0: 00000f93 li t6,0\n" - " 4: 01ff2023 sw t6,0(t5)\n" - " 8: 0f80006f j 0x100" + " 0: 4f81 li t6,0\n" + " 2: 01ff2023 sw t6,0(t5)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 2}, << - " 0: 00000f13 li t5,0\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 01efa423 sw t5,8(t6)\n" - " c: 0f40006f j 0x100" + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 01efa423 sw t5,8(t6)\n" + " a: a8dd j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 0, {y_reg, 20}, << - " 0: 00000f13 li t5,0\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 05efa823 sw t5,80(t6)\n" - " c: 0f40006f j 0x100" + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 05efa823 sw t5,80(t6)\n" + " a: a8dd j 0x100" >>) end), %% Test: Immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, 0}, << - " 0: 02a00f93 li t6,42\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {x_reg, extra}, << - " 0: 02a00f93 li t6,42\n" - " 4: 05f52c23 sw t6,88(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 02a00f93 li t6,42\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 2}, << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 01efa423 sw t5,8(t6)\n" - " c: 0f40006f j 0x100" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: a8d5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 20}, << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 05efa823 sw t5,80(t6)\n" - " c: 0f40006f j 0x100" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: a8d5 j 0x100" >>) end), %% Test: Immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 99, {ptr, a3}, << - " 0: 06300f93 li t6,99\n" - " 4: 01f6a023 sw t6,0(a3)\n" - " 8: 0f80006f j 0x100" + " 0: 06300f93 li t6,99\n" + " 4: 01f6a023 sw t6,0(a3)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << - " 0: 01c52f83 lw t6,28(a0)\n" - " 4: 03f52023 sw t6,32(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 03f52023 sw t6,32(a0)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to ptr ?_test(begin move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << - " 0: 01c52f83 lw t6,28(a0)\n" - " 4: 01f5a023 sw t6,0(a1)\n" - " 8: 0f80006f j 0x100" + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f5a023 sw t6,0(a1)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: ptr to x_reg ?_test(begin move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << - " 0: 000e2f83 lw t6,0(t3)\n" - " 4: 03f52223 sw t6,36(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 000e2f83 lw t6,0(t3)\n" + " 4: 03f52223 sw t6,36(a0)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: x_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01452f03 lw t5,20(a0)\n" - " 8: 01ff2223 sw t6,4(t5)\n" - " c: 0f40006f j 0x100" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 01ff2223 sw t6,4(t5)\n" + " c: a8d5 j 0x100" >>) end), %% Test: y_reg to x_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 000f2f83 lw t6,0(t5)\n" - " 8: 03f52223 sw t6,36(a0)\n" - " c: 0f40006f j 0x100" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" >>) end), %% Test: y_reg to y_reg ?_test(begin move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 004f2f83 lw t6,4(t5)\n" - " 8: 03f52223 sw t6,36(a0)\n" - " c: 0f40006f j 0x100" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 004f2f83 lw t6,4(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" >>) end), %% Test: Native register to x_reg ?_test(begin move_to_vm_register_test0(State0, t4, {x_reg, 0}, << - " 0: 01d52c23 sw t4,24(a0)\n" - " 4: 0fc0006f j 0x100" + " 0: 01d52c23 sw t4,24(a0)\n" + " 4: a8f5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, t5, {x_reg, extra}, << - " 0: 05e52c23 sw t5,88(a0)\n" - " 4: 0fc0006f j 0x100" + " 0: 05e52c23 sw t5,88(a0)\n" + " 4: a8f5 j 0x100" >>) end), %% Test: Native register to ptr ?_test(begin move_to_vm_register_test0(State0, t3, {ptr, a3}, << - " 0: 01c6a023 sw t3,0(a3)\n" - " 4: 0fc0006f j 0x100" + " 0: 01c6a023 sw t3,0(a3)\n" + " 4: a8f5 j 0x100" >>) end), %% Test: Native register to y_reg ?_test(begin move_to_vm_register_test0(State0, a1, {y_reg, 0}, << - " 0: 01452f83 lw t6,20(a0)\n" - " 4: 00bfa023 sw a1,0(t6)\n" - " 8: 0f80006f j 0x100" + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 00bfa023 sw a1,0(t6)\n" + " 8: a8e5 j 0x100" >>) end), %% Test: Large immediate to x_reg (uses lui + addi in RISC-V) ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01f52c23 sw t6,24(a0)\n" - " c: 0f40006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f52c23 sw t6,24(a0)\n" + " c: a8d5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 05f52c23 sw t6,88(a0)\n" - " c: 0f40006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 05f52c23 sw t6,88(a0)\n" + " c: a8d5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01452f03 lw t5,20(a0)\n" - " c: 01ff2423 sw t6,8(t5)\n" - " 10: 0f00006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 01ff2423 sw t6,8(t5)\n" + " 10: a8c5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01452f03 lw t5,20(a0)\n" - " c: 05ff2823 sw t6,80(t5)\n" - " 10: 0f00006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 05ff2823 sw t6,80(t5)\n" + " 10: a8c5 j 0x100" >>) end), %% Test: Large immediate to ptr ?_test(begin move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, << - " 0: 12345fb7 lui t6,0x12345\n" - " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" - " 8: 01f6a023 sw t6,0(a3)\n" - " c: 0f40006f j 0x100" + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f6a023 sw t6,0(a3)\n" + " c: a8d5 j 0x100" >>) end), %% Test: x_reg to y_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << - " 0: 05452f83 lw t6,84(a0)\n" - " 4: 01452f03 lw t5,20(a0)\n" - " 8: 07ff2e23 sw t6,124(t5)\n" - " c: 0f40006f j 0x100" + " 0: 05452f83 lw t6,84(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 07ff2e23 sw t6,124(t5)\n" + " c: a8d5 j 0x100" >>) end), %% Test: y_reg to x_reg (high index) ?_test(begin move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 07cf2f83 lw t6,124(t5)\n" - " 8: 05f52a23 sw t6,84(a0)\n" - " c: 0f40006f j 0x100" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)\n" + " 8: 05f52a23 sw t6,84(a0)\n" + " c: a8d5 j 0x100" >>) end), %% Test: Large y_reg index (32) that exceeds str immediate offset limit ?_test(begin move_to_vm_register_test0(State0, 42, {y_reg, 32}, << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 08000e93 li t4,128\n" - " c: 01fe8eb3 add t4,t4,t6\n" - " 10: 01eea023 sw t5,0(t4)\n" - " 14: 0ec0006f j 0x100" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 08000e93 li t4,128\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)\n" + " 12: a0fd j 0x100" >>) end), %% Test: Negative immediate to x_reg ?_test(begin move_to_vm_register_test0(State0, -1, {x_reg, 0}, << - " 0: fff00f93 li t6,-1\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: 5ffd li t6,-1\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, -100, {x_reg, 0}, << - " 0: f9c00f93 li t6,-100\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: f9c00f93 li t6,-100\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << - " 0: c1800f93 li t6,-1000\n" - " 4: 01f52c23 sw t6,24(a0)\n" - " 8: 0f80006f j 0x100" + " 0: c1800f93 li t6,-1000\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" >>) end) ] @@ -2134,87 +2160,87 @@ move_array_element_test_() -> %% move_array_element: reg[x] to x_reg ?_test(begin move_array_element_test0(State0, a3, 2, {x_reg, 0}, << - " 0: 0086af83 lw t6,8(a3)\n" - " 4: 01f52c23 sw t6,24(a0)" + " 0: 0086af83 lw t6,8(a3)\n" + " 4: 01f52c23 sw t6,24(a0)" >>) end), %% move_array_element: reg[x] to ptr ?_test(begin move_array_element_test0(State0, a3, 3, {ptr, t4}, << - " 0: 00c6af83 lw t6,12(a3)\n" - " 4: 01fea023 sw t6,0(t4)" + " 0: 00c6af83 lw t6,12(a3)\n" + " 4: 01fea023 sw t6,0(t4)" >>) end), %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, a3, 1, {y_reg, 2}, << - " 0: 0046af03 lw t5,4(a3)\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 01efa423 sw t5,8(t6)" + " 0: 0046af03 lw t5,4(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)" >>) end), %% move_array_element: reg[x] to native reg (t4) ?_test(begin move_array_element_test0(State0, a3, 1, t4, << - " 0: 0046ae83 lw t4,4(a3)" + " 0: 0046ae83 lw t4,4(a3)" >>) end), %% move_array_element: reg[x] to y_reg ?_test(begin move_array_element_test0(State0, a3, 7, {y_reg, 31}, << - " 0: 01c6af03 lw t5,28(a3)\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 07efae23 sw t5,124(t6)" + " 0: 01c6af03 lw t5,28(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 07efae23 sw t5,124(t6)" >>) end), %% move_array_element: reg[x] to x_reg ?_test(begin move_array_element_test0(State0, a3, 7, {x_reg, 15}, << - " 0: 01c6af83 lw t6,28(a3)\n" - " 4: 05f52a23 sw t6,84(a0)" + " 0: 01c6af83 lw t6,28(a3)\n" + " 4: 05f52a23 sw t6,84(a0)" >>) end), %% move_array_element: reg_x[reg_y] to x_reg ?_test(begin {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, << - " 0: 0106af83 lw t6,16(a3)\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 01f68fb3 add t6,a3,t6\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: 03f52023 sw t6,32(a0)" + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 03f52023 sw t6,32(a0)" >>) end), %% move_array_element: reg_x[reg_y] to pointer (large x reg) ?_test(begin {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, << - " 0: 0106af83 lw t6,16(a3)\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 01f68fb3 add t6,a3,t6\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: 01fea023 sw t6,0(t4)" + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01fea023 sw t6,0(t4)" >>) end), %% move_array_element: reg_x[reg_y] to y_reg ?_test(begin {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, << - " 0: 0106af83 lw t6,16(a3)\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 01f68fb3 add t6,a3,t6\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: 01452f03 lw t5,20(a0)\n" - " 14: 07ff2e23 sw t6,124(t5)" + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01452f03 lw t5,20(a0)\n" + " 12: 07ff2e23 sw t6,124(t5)" >>) end), %% move_array_element with integer index and x_reg destination ?_test(begin {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 008faf03 lw t5,8(t6)\n" - " 8: 03e52623 sw t5,44(a0)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 008faf03 lw t5,8(t6)\n" + " 8: 03e52623 sw t5,44(a0)" >>) end) ] @@ -2232,7 +2258,7 @@ get_array_element_test_() -> {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 010e2f83 lw t6,16(t3)" + " 0: 010e2f83 lw t6,16(t3)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t6, Reg) @@ -2252,8 +2278,8 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01f6a423 sw t6,8(a3)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2262,11 +2288,11 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 000e0f13 mv t5,t3\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2275,11 +2301,11 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 000faf83 lw t6,0(t6)\n" - " 4: 000e0f13 mv t5,t3\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 000faf83 lw t6,0(t6)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2288,12 +2314,12 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 008f2f83 lw t6,8(t5)\n" - " 8: 000e0f13 mv t5,t3\n" - " c: 002f1f13 slli t5,t5,0x2\n" - " 10: 01e68f33 add t5,a3,t5\n" - " 14: 01ff2023 sw t6,0(t5)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 008f2f83 lw t6,8(t5)\n" + " 8: 8f72 mv t5,t3\n" + " a: 0f0a slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2302,8 +2328,8 @@ move_to_array_element_test_() -> State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01f6a423 sw t6,8(a3)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2315,11 +2341,11 @@ move_to_array_element_test_() -> State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 001e0f13 addi t5,t3,1\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2331,11 +2357,11 @@ move_to_array_element_test_() -> State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), Stream = ?BACKEND:stream(State3), Dump = << - " 0: 02a00f93 li t6,42\n" - " 4: 001e0f13 addi t5,t3,1\n" - " 8: 002f1f13 slli t5,t5,0x2\n" - " c: 01e68f33 add t5,a3,t5\n" - " 10: 01ff2023 sw t6,0(t5)" + " 0: 02a00f93 li t6,42\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -2355,7 +2381,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 02a00f93 li t6,42" + " 0: 02a00f93 li t6,42" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2365,7 +2391,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: fd600f93 li t6,-42" + " 0: fd600f93 li t6,-42" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2375,7 +2401,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: f0100f93 li t6,-255" + " 0: f0100f93 li t6,-255" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2386,8 +2412,8 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State2), ?assertEqual(t6, Reg), Dump = << - " 0: f0000f93 li t6,-256\n" - " 4: 0fc0006f j 0x100" + " 0: f0000f93 li t6,-256\n" + " 4: a8f5 j 0x100" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2397,7 +2423,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t5, Reg), Dump = << - " 0: 000f2f03 lw t5,0(t5)" + " 0: 000f2f03 lw t5,0(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2407,7 +2433,7 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 02c52f83 lw t6,44(a0)" + " 0: 02c52f83 lw t6,44(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2417,8 +2443,8 @@ move_to_native_register_test_() -> Stream = ?BACKEND:stream(State1), ?assertEqual(t6, Reg), Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 00cf2f83 lw t6,12(t5)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 00cf2f83 lw t6,12(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2427,7 +2453,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, 42, t5), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 02a00f13 li t5,42" + " 0: 02a00f13 li t5,42" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2436,7 +2462,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, t6, t4), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 000f8e93 mv t4,t6" + " 0: 8efe mv t4,t6" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2445,7 +2471,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 000fae03 lw t3,0(t6)" + " 0: 000fae03 lw t3,0(t6)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2454,7 +2480,7 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 02052683 lw a3,32(a0)" + " 0: 5114 lw a3,32(a0)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2463,8 +2489,8 @@ move_to_native_register_test_() -> State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 01452f83 lw t6,20(a0)\n" - " 4: 008fa583 lw a1,8(t6)" + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008fa583 lw a1,8(t6)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -2476,12 +2502,12 @@ move_to_native_register_test_() -> ), Stream = ?BACKEND:stream(State2), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 06052f03 lw t5,96(a0)\n" - " 8: 004fae83 lw t4,4(t6)\n" - " c: 01df2c23 sw t4,24(t5)\n" - " 10: 008fae83 lw t4,8(t6)\n" - " 14: 01df2e23 sw t4,28(t5)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 06052f03 lw t5,96(a0)\n" + " 8: 004fae83 lw t4,4(t6)\n" + " c: 01df2c23 sw t4,24(t5)\n" + " 10: 008fae83 lw t4,8(t6)\n" + " 14: 01df2e23 sw t4,28(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -2504,21 +2530,21 @@ add_test_() -> [ ?_test(begin add_test0(State0, a2, 2, << - " 0: 00260613 addi a2,a2,2\n" - " 4: 0fc0006f j 0x100" + " 0: 0609 addi a2,a2,2\n" + " 2: a8fd j 0x100" >>) end), ?_test(begin add_test0(State0, a2, 256, << - " 0: 10000f93 li t6,256\n" - " 4: 01f60633 add a2,a2,t6\n" - " 8: 0f80006f j 0x100" + " 0: 10000f93 li t6,256\n" + " 4: 967e add a2,a2,t6\n" + " 6: a8ed j 0x100" >>) end), ?_test(begin add_test0(State0, a2, a3, << - " 0: 00d60633 add a2,a2,a3\n" - " 4: 0fc0006f j 0x100" + " 0: 9636 add a2,a2,a3\n" + " 2: a8fd j 0x100" >>) end) ] @@ -2540,21 +2566,21 @@ sub_test_() -> [ ?_test(begin sub_test0(State0, a2, 2, << - " 0: ffe60613 addi a2,a2,-2\n" - " 4: 0fc0006f j 0x100" + " 0: 1679 addi a2,a2,-2\n" + " 2: a8fd j 0x100" >>) end), ?_test(begin sub_test0(State0, a2, 256, << - " 0: 10000f93 li t6,256\n" - " 4: 41f60633 sub a2,a2,t6\n" - " 8: 0f80006f j 0x100" + " 0: 10000f93 li t6,256\n" + " 4: 41f60633 sub a2,a2,t6\n" + " 8: a8e5 j 0x100" >>) end), ?_test(begin sub_test0(State0, a2, a3, << - " 0: 40d60633 sub a2,a2,a3\n" - " 4: 0fc0006f j 0x100" + " 0: 8e15 sub a2,a2,a3\n" + " 2: a8fd j 0x100" >>) end) ] @@ -2574,61 +2600,61 @@ mul_test_() -> [ ?_test(begin mul_test0(State0, a2, 2, << - " 0: 00161613 slli a2,a2,0x1" + " 0: 0606 slli a2,a2,0x1" >>) end), ?_test(begin mul_test0(State0, a2, 3, << - " 0: 00161f93 slli t6,a2,0x1\n" - " 4: 00cf8633 add a2,t6,a2" + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 4, << - " 0: 00261613 slli a2,a2,0x2" + " 0: 060a slli a2,a2,0x2" >>) end), ?_test(begin mul_test0(State0, a2, 5, << - " 0: 00261f93 slli t6,a2,0x2\n" - " 4: 00cf8633 add a2,t6,a2" + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 6, << - " 0: 00161f93 slli t6,a2,0x1\n" - " 4: 00cf8633 add a2,t6,a2\n" - " 8: 00161613 slli a2,a2,0x1" + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" >>) end), ?_test(begin mul_test0(State0, a2, 7, << - " 0: 00361f93 slli t6,a2,0x3\n" - " 4: 40cf8633 sub a2,t6,a2" + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 40cf8633 sub a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 8, << - " 0: 00361613 slli a2,a2,0x3" + " 0: 060e slli a2,a2,0x3" >>) end), ?_test(begin mul_test0(State0, a2, 9, << - " 0: 00361f93 slli t6,a2,0x3\n" - " 4: 00cf8633 add a2,t6,a2" + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 00cf8633 add a2,t6,a2" >>) end), ?_test(begin mul_test0(State0, a2, 10, << - " 0: 00261f93 slli t6,a2,0x2\n" - " 4: 00cf8633 add a2,t6,a2\n" - " 8: 00161613 slli a2,a2,0x1" + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" >>) end), ?_test(begin mul_test0(State0, a2, 11, << - " 0: 00b00f93 li t6,11\n" - " 4: 03f60633 mul a2,a2,t6" + " 0: 4fad li t6,11\n" + " 2: 03f60633 mul a2,a2,t6" >>) end) ] @@ -2648,24 +2674,24 @@ set_args1_y_reg_test() -> Stream = ?BACKEND:stream(State1), % Expected disassembly for loading from y_reg and calling primitive Dump = << - " 0: 04300f93 li t6,67\n" - " 4: 002f9f93 slli t6,t6,0x2\n" - " 8: 00cf8fb3 add t6,t6,a2\n" - " c: 000faf83 lw t6,0(t6)\n" - " 10: ff010113 addi sp,sp,-16\n" - " 14: 00112023 sw ra,0(sp)\n" - " 18: 00a12223 sw a0,4(sp)\n" - " 1c: 00b12423 sw a1,8(sp)\n" - " 20: 00c12623 sw a2,12(sp)\n" - " 24: 01452f03 lw t5,20(a0)\n" - " 28: 014f2503 lw a0,20(t5)\n" - " 2c: 000f80e7 jalr t6\n" - " 30: 00050f93 mv t6,a0\n" - " 34: 00012083 lw ra,0(sp)\n" - " 38: 00412503 lw a0,4(sp)\n" - " 3c: 00812583 lw a1,8(sp)\n" - " 40: 00c12603 lw a2,12(sp)\n" - " 44: 01010113 addi sp,sp,16" + " 0: 04300f93 li t6,67\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 9fb2 add t6,t6,a2\n" + " 8: 000faf83 lw t6,0(t6)\n" + " c: 1141 addi sp,sp,-16\n" + " e: c006 sw ra,0(sp)\n" + " 10: c22a sw a0,4(sp)\n" + " 12: c42e sw a1,8(sp)\n" + " 14: c632 sw a2,12(sp)\n" + " 16: 01452f03 lw t5,20(a0)\n" + " 1a: 014f2503 lw a0,20(t5)\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2677,10 +2703,10 @@ large_y_reg_read_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses helper with temp register for large offset Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 1ec00f93 li t6,492\n" - " 8: 01ef8fb3 add t6,t6,t5\n" - " c: 000faf83 lw t6,0(t6)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 1ec00f93 li t6,492\n" + " 8: 9ffa add t6,t6,t5\n" + " a: 000faf83 lw t6,0(t6)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t6, Reg). @@ -2693,11 +2719,11 @@ large_y_reg_write_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses helper with temp registers for large offset Dump = << - " 0: 02a00f13 li t5,42\n" - " 4: 01452f83 lw t6,20(a0)\n" - " 8: 1ec00e93 li t4,492\n" - " c: 01fe8eb3 add t4,t4,t6\n" - " 10: 01eea023 sw t5,0(t4)" + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 1ec00e93 li t4,492\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2715,15 +2741,15 @@ large_y_reg_read_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses t0+t1 fallback sequence when temps are exhausted Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 01452283 lw t0,20(a0)\n" - " 18: 08c00313 li t1,140\n" - " 1c: 00530333 add t1,t1,t0\n" - " 20: 00032303 lw t1,0(t1)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452283 lw t0,20(a0)\n" + " 18: 08c00313 li t1,140\n" + " 1c: 9316 add t1,t1,t0\n" + " 1e: 00032303 lw t1,0(t1)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t1, ResultReg). @@ -2743,15 +2769,15 @@ large_y_reg_write_register_exhaustion_test() -> Stream = ?BACKEND:stream(StateFinal), % Expected: uses t1/t0 fallback sequence Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 01452303 lw t1,20(a0)\n" - " 18: 0c800293 li t0,200\n" - " 1c: 006282b3 add t0,t0,t1\n" - " 20: 01f2a023 sw t6,0(t0)" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452303 lw t1,20(a0)\n" + " 18: 0c800293 li t0,200\n" + " 1c: 929a add t0,t0,t1\n" + " 1e: 01f2a023 sw t6,0(t0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2762,8 +2788,8 @@ y_reg_boundary_direct_test() -> Stream = ?BACKEND:stream(State1), % Expected: uses direct addressing since 31 * 4 = 124 < 2048 Dump = << - " 0: 01452f03 lw t5,20(a0)\n" - " 4: 07cf2f83 lw t6,124(t5)" + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t6, Reg). @@ -2774,7 +2800,7 @@ debugger_test() -> State1 = ?BACKEND:debugger(State0), Stream = ?BACKEND:stream(State1), Dump = << - " 0: 00100073 ebreak" + " 0: 9002 ebreak" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2791,15 +2817,15 @@ and_register_exhaustion_negative_test() -> StateResult = ?BACKEND:and_(StateNoRegs, t6, -4), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: 00300293 li t0,3\n" - " 1c: fff2c293 not t0,t0\n" - " 20: 005fffb3 and t6,t6,t0" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 428d li t0,3\n" + " 1a: fff2c293 not t0,t0\n" + " 1e: 005fffb3 and t6,t6,t0" >>, ?assertEqual(dump_to_bin(ExpectedDump), Stream). @@ -2816,14 +2842,14 @@ and_register_exhaustion_positive_test() -> StateResult = ?BACKEND:and_(StateNoRegs, t6, 16#3F), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: 03f00293 li t0,63\n" - " 1c: 005fffb3 and t6,t6,t0" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 03f00293 li t0,63\n" + " 1c: 005fffb3 and t6,t6,t0" >>, ?assertEqual(dump_to_bin(ExpectedDump), Stream). @@ -2843,21 +2869,21 @@ alloc_boxed_integer_fragment_small_test() -> Stream = ?BACKEND:stream(State1), Dump = << - " 0: 03c62f83 lw t6,60(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 02a00593 li a1,42\n" - " 1c: 00000613 li a2,0\n" - " 20: 000f80e7 jalr t6\n" - " 24: 00050f93 mv t6,a0\n" - " 28: 00012083 lw ra,0(sp)\n" - " 2c: 00412503 lw a0,4(sp)\n" - " 30: 00812583 lw a1,8(sp)\n" - " 34: 00c12603 lw a2,12(sp)\n" - " 38: 01010113 addi sp,sp,16" + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 4601 li a2,0\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2874,28 +2900,28 @@ alloc_boxed_integer_fragment_large_test() -> Stream = ?BACKEND:stream(State2), Dump = << - " 0: 03c62f83 lw t6,60(a2)\n" - " 4: ff010113 addi sp,sp,-16\n" - " 8: 00112023 sw ra,0(sp)\n" - " c: 00a12223 sw a0,4(sp)\n" - " 10: 00b12423 sw a1,8(sp)\n" - " 14: 00c12623 sw a2,12(sp)\n" - " 18: 9abce5b7 lui a1,0x9abce\n" - " 1c: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" - " 20: 12345637 lui a2,0x12345\n" - " 24: 67860613 addi a2,a2,1656 # 0x12345678\n" - " 28: 000f80e7 jalr t6\n" - " 2c: 00050f93 mv t6,a0\n" - " 30: 00012083 lw ra,0(sp)\n" - " 34: 00412503 lw a0,4(sp)\n" - " 38: 00812583 lw a1,8(sp)\n" - " 3c: 00c12603 lw a2,12(sp)\n" - " 40: 01010113 addi sp,sp,16\n" - " 44: 04c62f03 lw t5,76(a2)\n" - " 48: 04800613 li a2,72\n" - " 4c: 28b00693 li a3,651\n" - " 50: 000f8713 mv a4,t6\n" - " 54: 000f0067 jr t5" + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9abce5b7 lui a1,0x9abce\n" + " 12: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" + " 16: 12345637 lui a2,0x12345\n" + " 1a: 67860613 addi a2,a2,1656 # 0x12345678\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16\n" + " 2c: 04c62f03 lw t5,76(a2)\n" + " 30: 03000613 li a2,48\n" + " 34: 28b00693 li a3,651\n" + " 38: 877e mv a4,t6\n" + " 3a: 8f02 jr t5" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2911,29 +2937,29 @@ call_func_ptr_stack_alignment_test() -> Stream = ?BACKEND:stream(State5), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: fe010113 addi sp,sp,-32\n" - " 14: 00112023 sw ra,0(sp)\n" - " 18: 00a12223 sw a0,4(sp)\n" - " 1c: 00b12423 sw a1,8(sp)\n" - " 20: 00c12623 sw a2,12(sp)\n" - " 24: 01d12823 sw t4,16(sp)\n" - " 28: 01e12a23 sw t5,20(sp)\n" - " 2c: 01f12c23 sw t6,24(sp)\n" - " 30: 02a00513 li a0,42\n" - " 34: 000e00e7 jalr t3\n" - " 38: 00050e13 mv t3,a0\n" - " 3c: 00012083 lw ra,0(sp)\n" - " 40: 00412503 lw a0,4(sp)\n" - " 44: 00812583 lw a1,8(sp)\n" - " 48: 00c12603 lw a2,12(sp)\n" - " 4c: 01012e83 lw t4,16(sp)\n" - " 50: 01412f03 lw t5,20(sp)\n" - " 54: 01812f83 lw t6,24(sp)\n" - " 58: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 1101 addi sp,sp,-32\n" + " 12: c006 sw ra,0(sp)\n" + " 14: c22a sw a0,4(sp)\n" + " 16: c42e sw a1,8(sp)\n" + " 18: c632 sw a2,12(sp)\n" + " 1a: c876 sw t4,16(sp)\n" + " 1c: ca7a sw t5,20(sp)\n" + " 1e: cc7e sw t6,24(sp)\n" + " 20: 02a00513 li a0,42\n" + " 24: 9e02 jalr t3\n" + " 26: 8e2a mv t3,a0\n" + " 28: 4082 lw ra,0(sp)\n" + " 2a: 4512 lw a0,4(sp)\n" + " 2c: 45a2 lw a1,8(sp)\n" + " 2e: 4632 lw a2,12(sp)\n" + " 30: 4ec2 lw t4,16(sp)\n" + " 32: 4f52 lw t5,20(sp)\n" + " 34: 4fe2 lw t6,24(sp)\n" + " 36: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2965,35 +2991,35 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fe010113 addi sp,sp,-32\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 01c12a23 sw t3,20(sp)\n" - " 34: 01d12c23 sw t4,24(sp)\n" - " 38: 01f12e23 sw t6,28(sp)\n" - " 3c: 00038613 mv a2,t2\n" - " 40: 00300693 li a3,3\n" - " 44: 00100713 li a4,1\n" - " 48: 000f00e7 jalr t5\n" - " 4c: 00050f13 mv t5,a0\n" - " 50: 00012083 lw ra,0(sp)\n" - " 54: 00412503 lw a0,4(sp)\n" - " 58: 00812583 lw a1,8(sp)\n" - " 5c: 00c12603 lw a2,12(sp)\n" - " 60: 01012303 lw t1,16(sp)\n" - " 64: 01412e03 lw t3,20(sp)\n" - " 68: 01812e83 lw t4,24(sp)\n" - " 6c: 01c12f83 lw t6,28(sp)\n" - " 70: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 468d li a3,3\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3006,35 +3032,35 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fe010113 addi sp,sp,-32\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 01c12a23 sw t3,20(sp)\n" - " 34: 01d12c23 sw t4,24(sp)\n" - " 38: 01f12e23 sw t6,28(sp)\n" - " 3c: 00038613 mv a2,t2\n" - " 40: 00100693 li a3,1\n" - " 44: 00030713 mv a4,t1\n" - " 48: 000f00e7 jalr t5\n" - " 4c: 00050f13 mv t5,a0\n" - " 50: 00012083 lw ra,0(sp)\n" - " 54: 00412503 lw a0,4(sp)\n" - " 58: 00812583 lw a1,8(sp)\n" - " 5c: 00c12603 lw a2,12(sp)\n" - " 60: 01012303 lw t1,16(sp)\n" - " 64: 01412e03 lw t3,20(sp)\n" - " 68: 01812e83 lw t4,24(sp)\n" - " 6c: 01c12f83 lw t6,28(sp)\n" - " 70: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 4685 li a3,1\n" + " 2e: 871a mv a4,t1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3047,35 +3073,35 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fe010113 addi sp,sp,-32\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 01c12a23 sw t3,20(sp)\n" - " 34: 01d12c23 sw t4,24(sp)\n" - " 38: 01f12e23 sw t6,28(sp)\n" - " 3c: 00038613 mv a2,t2\n" - " 40: 00030693 mv a3,t1\n" - " 44: 00100713 li a4,1\n" - " 48: 000f00e7 jalr t5\n" - " 4c: 00050f13 mv t5,a0\n" - " 50: 00012083 lw ra,0(sp)\n" - " 54: 00412503 lw a0,4(sp)\n" - " 58: 00812583 lw a1,8(sp)\n" - " 5c: 00c12603 lw a2,12(sp)\n" - " 60: 01012303 lw t1,16(sp)\n" - " 64: 01412e03 lw t3,20(sp)\n" - " 68: 01812e83 lw t4,24(sp)\n" - " 6c: 01c12f83 lw t6,28(sp)\n" - " 70: 02010113 addi sp,sp,32" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 869a mv a3,t1\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" >>, ?assertEqual(dump_to_bin(Dump), Stream), ?assertEqual(t5, ResultReg) @@ -3089,39 +3115,39 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fd010113 addi sp,sp,-48\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 00712a23 sw t2,20(sp)\n" - " 34: 01c12c23 sw t3,24(sp)\n" - " 38: 01d12e23 sw t4,28(sp)\n" - " 3c: 03e12023 sw t5,32(sp)\n" - " 40: 03f12223 sw t6,36(sp)\n" - " 44: 00058313 mv t1,a1\n" - " 48: 000f0513 mv a0,t5\n" - " 4c: 00068593 mv a1,a3\n" - " 50: 000300e7 jalr t1\n" - " 54: 00a12423 sw a0,8(sp)\n" - " 58: 00012083 lw ra,0(sp)\n" - " 5c: 00412503 lw a0,4(sp)\n" - " 60: 00812583 lw a1,8(sp)\n" - " 64: 00c12603 lw a2,12(sp)\n" - " 68: 01012303 lw t1,16(sp)\n" - " 6c: 01412383 lw t2,20(sp)\n" - " 70: 01812e03 lw t3,24(sp)\n" - " 74: 01c12e83 lw t4,28(sp)\n" - " 78: 02012f03 lw t5,32(sp)\n" - " 7c: 02412f83 lw t6,36(sp)\n" - " 80: 03010113 addi sp,sp,48" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07a sw t5,32(sp)\n" + " 2e: d27e sw t6,36(sp)\n" + " 30: 832e mv t1,a1\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: c42a sw a0,8(sp)\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f02 lw t5,32(sp)\n" + " 4c: 5f92 lw t6,36(sp)\n" + " 4e: 03010113 addi sp,sp,48" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3135,37 +3161,37 @@ call_func_ptr_register_exhaustion_test_() -> Stream = ?BACKEND:stream(State7), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 01c52f03 lw t5,28(a0)\n" - " 8: 02052e83 lw t4,32(a0)\n" - " c: 02452e03 lw t3,36(a0)\n" - " 10: 02852383 lw t2,40(a0)\n" - " 14: 02c52303 lw t1,44(a0)\n" - " 18: fd010113 addi sp,sp,-48\n" - " 1c: 00112023 sw ra,0(sp)\n" - " 20: 00a12223 sw a0,4(sp)\n" - " 24: 00b12423 sw a1,8(sp)\n" - " 28: 00c12623 sw a2,12(sp)\n" - " 2c: 00612823 sw t1,16(sp)\n" - " 30: 00712a23 sw t2,20(sp)\n" - " 34: 01c12c23 sw t3,24(sp)\n" - " 38: 01d12e23 sw t4,28(sp)\n" - " 3c: 03f12023 sw t6,32(sp)\n" - " 40: 00862303 lw t1,8(a2)\n" - " 44: 000f0513 mv a0,t5\n" - " 48: 00068593 mv a1,a3\n" - " 4c: 000300e7 jalr t1\n" - " 50: 00050f13 mv t5,a0\n" - " 54: 00012083 lw ra,0(sp)\n" - " 58: 00412503 lw a0,4(sp)\n" - " 5c: 00812583 lw a1,8(sp)\n" - " 60: 00c12603 lw a2,12(sp)\n" - " 64: 01012303 lw t1,16(sp)\n" - " 68: 01412383 lw t2,20(sp)\n" - " 6c: 01812e03 lw t3,24(sp)\n" - " 70: 01c12e83 lw t4,28(sp)\n" - " 74: 02012f83 lw t6,32(sp)\n" - " 78: 03010113 addi sp,sp,48" + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07e sw t6,32(sp)\n" + " 2e: 00862303 lw t1,8(a2)\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: 8f2a mv t5,a0\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f82 lw t6,32(sp)\n" + " 4c: 03010113 addi sp,sp,48" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -3180,12 +3206,12 @@ jump_to_continuation_test_() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}), Stream = ?BACKEND:stream(State1), - % Expected: riscv32 PIC sequence - simpler than ARM, no prolog/epilog needed + % Expected: riscv32 PIC sequence Dump = << - " 0: 00000f97 auipc t6,0x0\n" - " 4: 00af8fb3 add t6,t6,a0\n" - " 8: 000f8067 jr t6" + " 0: 00000f97 auipc t6,0x0\n" + " 4: 9faa add t6,t6,a0\n" + " 6: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream) end), @@ -3200,18 +3226,18 @@ jump_to_continuation_test_() -> % NetOffset = 0 - 32 = -32 (0xFFFFFFE0) Dump = << - " 0: 00000697 auipc a3,0x0\n" - " 4: 00068067 jr a3\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 00068067 jr a3\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 00068067 jr a3\n" - " 18: 00000697 auipc a3,0x0\n" - " 1c: 00068067 jr a3\n" - " 20: 00000f97 auipc t6,0x0\n" - " 24: fe0f8f93 addi t6,t6,-32\n" - " 28: 00af8fb3 add t6,t6,a0\n" - " 2c: 000f8067 jr t6" + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 8: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 14: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 20: 00000f97 auipc t6,0x0\n" + " 24: 1f81 addi t6,t6,-32 # 0x0\n" + " 26: 9faa add t6,t6,a0\n" + " 28: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream) end) @@ -3246,88 +3272,92 @@ add_beam_test() -> Dump = << % jump table (new 8-byte format) - " 0: 00000697 auipc a3,0x0\n" - " 4: 10c68067 jr 268(a3) # 0x10c\n" - " 8: 00000697 auipc a3,0x0\n" - " c: 01868067 jr 24(a3) # 0x20\n" - " 10: 00000697 auipc a3,0x0\n" - " 14: 04c68067 jr 76(a3) # 0x5c\n" - " 18: 00000697 auipc a3,0x0\n" - " 1c: 0ec68067 jr 236(a3) # 0x104\n" + " 0: 00000697 auipc a3,0x0\n" + " 4: 0e068067 jr 224(a3) # 0xe0\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01868067 jr 24(a3) # 0x20\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 04868067 jr 72(a3) # 0x58\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 0c268067 jr 194(a3) # 0xda\n" % label 1 % {move,{integer,9},{x,1}}. - " 20: 09f00f93 li t6,159\n" - " 24: 01f52e23 sw t6,28(a0)\n" + " 20: 09f00f93 li t6,159\n" + " 24: 01f52e23 sw t6,28(a0)\n" % {move,{integer,8},{x,0}} - " 28: 08f00f93 li t6,143\n" - " 2c: 01f52c23 sw t6,24(a0)\n" + " 28: 08f00f93 li t6,143\n" + " 2c: 01f52c23 sw t6,24(a0)\n" % {call_only,2,{f,2}}. - " 30: 0085af83 lw t6,8(a1)\n" - " 34: ffff8f93 addi t6,t6,-1\n" - " 38: 01f5a423 sw t6,8(a1)\n" - " 3c: 000f8663 beqz t6,0x48\n" - " 40: 01c0006f j 0x5c\n" - " 44: 00000013 nop\n" - " 48: 00000f97 auipc t6,0x0\n" - " 4c: 014f8f93 addi t6,t6,20 # 0x5c\n" - " 50: 01f5a223 sw t6,4(a1)\n" - " 54: 00862f83 lw t6,8(a2)\n" - " 58: 000f8067 jr t6\n" + " 30: 0085af83 lw t6,8(a1)\n" + " 34: 1ffd addi t6,t6,-1\n" + " 36: 01f5a423 sw t6,8(a1)\n" + " 3a: 000f8663 beqz t6,0x46\n" + " 3e: a829 j 0x58\n" + " 40: 0001 nop\n" + " 42: 00000013 nop\n" + " 46: 00000f97 auipc t6,0x0\n" + " 4a: 0fc9 addi t6,t6,18 # 0x58\n" + " 4c: 0001 nop\n" + " 4e: 01f5a223 sw t6,4(a1)\n" + " 52: 00862f83 lw t6,8(a2)\n" + " 56: 8f82 jr t6\n" % label 2 % {allocate,1,1}. - " 5c: 01462f83 lw t6,20(a2)\n" - " 60: ff010113 addi sp,sp,-16\n" - " 64: 00112023 sw ra,0(sp)\n" - " 68: 00a12223 sw a0,4(sp)\n" - " 6c: 00b12423 sw a1,8(sp)\n" - " 70: 00c12623 sw a2,12(sp)\n" - " 74: 00100613 li a2,1\n" - " 78: 00000693 li a3,0\n" - " 7c: 00100713 li a4,1\n" - " 80: 000f80e7 jalr t6\n" - " 84: 00050f93 mv t6,a0\n" - " 88: 00012083 lw ra,0(sp)\n" - " 8c: 00412503 lw a0,4(sp)\n" - " 90: 00812583 lw a1,8(sp)\n" - " 94: 00c12603 lw a2,12(sp)\n" - " 98: 01010113 addi sp,sp,16\n" - " 9c: 01ff9f13 slli t5,t6,0x1f\n" - " a0: 000f4863 bltz t5,0xb0\n" - " a4: 01862f83 lw t6,24(a2)\n" - " a8: 0a800613 li a2,168\n" - " ac: 000f8067 jr t6\n" + " 58: 01462f83 lw t6,20(a2)\n" + " 5c: 1141 addi sp,sp,-16\n" + " 5e: c006 sw ra,0(sp)\n" + " 60: c22a sw a0,4(sp)\n" + " 62: c42e sw a1,8(sp)\n" + " 64: c632 sw a2,12(sp)\n" + " 66: 4605 li a2,1\n" + " 68: 4681 li a3,0\n" + " 6a: 4705 li a4,1\n" + " 6c: 9f82 jalr t6\n" + " 6e: 8faa mv t6,a0\n" + " 70: 4082 lw ra,0(sp)\n" + " 72: 4512 lw a0,4(sp)\n" + " 74: 45a2 lw a1,8(sp)\n" + " 76: 4632 lw a2,12(sp)\n" + " 78: 0141 addi sp,sp,16\n" + " 7a: 01ff9f13 slli t5,t6,0x1f\n" + " 7e: 000f4763 bltz t5,0x8c\n" + " 82: 01862f83 lw t6,24(a2)\n" + " 86: 08600613 li a2,134\n" + " 8a: 8f82 jr t6\n" % {init_yregs,{list,[{y,0}]}}. %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), - " b0: 03b00f13 li t5,59\n" - " b4: 01452f83 lw t6,20(a0)\n" - " b8: 01efa023 sw t5,0(t6)\n" + " 8c: 03b00f13 li t5,59\n" + " 90: 01452f83 lw t6,20(a0)\n" + " 94: 01efa023 sw t5,0(t6)\n" % {call,1,{f,3}} %% call_or_schedule_next(State9, 3), - " bc: 0005af03 lw t5,0(a1)\n" - " c0: 000f2f03 lw t5,0(t5)\n" - " c4: 018f1f13 slli t5,t5,0x18\n" - " c8: 41000f93 li t6,1040\n" - " cc: 00000013 nop\n" - " d0: 01ff6f33 or t5,t5,t6\n" - " d4: 05e52e23 sw t5,92(a0)\n" - " d8: 0085af83 lw t6,8(a1)\n" - " dc: ffff8f93 addi t6,t6,-1\n" - " e0: 01f5a423 sw t6,8(a1)\n" - " e4: 000f8663 beqz t6,0xf0\n" - " e8: 01c0006f j 0x104\n" - " ec: 00000013 nop\n" - " f0: 00000f97 auipc t6,0x0\n" - " f4: 014f8f93 addi t6,t6,20 # 0x104\n" - " f8: 01f5a223 sw t6,4(a1)\n" - " fc: 00862f83 lw t6,8(a2)\n" - " 100: 000f8067 jr t6\n" + " 98: 0005af03 lw t5,0(a1)\n" + " 9c: 000f2f03 lw t5,0(t5)\n" + " a0: 0f62 slli t5,t5,0x18\n" + " a2: 36800f93 li t6,872\n" + " a6: 00000013 nop\n" + " aa: 01ff6f33 or t5,t5,t6\n" + " ae: 05e52e23 sw t5,92(a0)\n" + " b2: 0085af83 lw t6,8(a1)\n" + " b6: 1ffd addi t6,t6,-1\n" + " b8: 01f5a423 sw t6,8(a1)\n" + " bc: 000f8663 beqz t6,0xc8\n" + " c0: a829 j 0xda\n" + " c2: 0001 nop\n" + " c4: 00000013 nop\n" + " c8: 00000f97 auipc t6,0x0\n" + " cc: 0fc9 addi t6,t6,18 # 0xda\n" + " ce: 0001 nop\n" + " d0: 01f5a223 sw t6,4(a1)\n" + " d4: 00862f83 lw t6,8(a2)\n" + " d8: 8f82 jr t6\n" %% (continuation) % label 3 - " 104: 00462f83 lw t6,4(a2)\n" - " 108: 000f8067 jr t6\n" + " da: 00462f83 lw t6,4(a2)\n" + " de: 8f82 jr t6\n" % label 0 - " 10c: 00462f83 lw t6,4(a2)\n" - " 110: 000f8067 jr t6\n" + " e0: 00462f83 lw t6,4(a2)\n" + " e4: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -3346,15 +3376,15 @@ dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> dump_to_bin0(Tail, addr, Acc); dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> dump_to_bin0(Tail, addr, Acc); -dump_to_bin0(<<$ , Tail/binary>>, addr, Acc) -> +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> dump_to_bin0(Tail, addr, Acc); dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> dump_to_bin0(Tail, hex, Acc); -dump_to_bin0(<<$ , Tail/binary>>, hex, Acc) -> +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> dump_to_bin0(Tail, hex, Acc); %% Handle RISC-V 32-bit instructions (8 consecutive hex digits) dump_to_bin0(<>, hex, Acc) when - (Sp =:= $ orelse Sp =:= $\s) andalso + (Sp =:= $\t orelse Sp =:= $\s) andalso ?IS_HEX_DIGIT(H1) andalso ?IS_HEX_DIGIT(H2) andalso ?IS_HEX_DIGIT(H3) andalso @@ -3369,7 +3399,7 @@ dump_to_bin0(<>, hex, Acc) when dump_to_bin0(Rest, instr, [<> | Acc]); %% Handle 32-bits undefined instruction (ARM format with space: "1234 5678") dump_to_bin0(<>, hex, Acc) when - (Sp =:= $ orelse Sp =:= $\s) andalso + (Sp =:= $\t orelse Sp =:= $\s) andalso ?IS_HEX_DIGIT(H1) andalso ?IS_HEX_DIGIT(H2) andalso ?IS_HEX_DIGIT(H3) andalso @@ -3384,7 +3414,7 @@ dump_to_bin0(<>, hex, Acc) dump_to_bin0(Rest, instr, [<>, <> | Acc]); %% Handle 16-bit ARM32 Thumb instructions (4 hex digits) dump_to_bin0(<>, hex, Acc) when - (Sp =:= $ orelse Sp =:= $\s) andalso + (Sp =:= $\t orelse Sp =:= $\s) andalso ?IS_HEX_DIGIT(H1) andalso ?IS_HEX_DIGIT(H2) andalso ?IS_HEX_DIGIT(H3) andalso diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index 7117ee5f69..cf989e746d 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -78,6 +78,7 @@ find_binutils(Arch) -> ArchStr = atom_to_list(Arch), BinutilsList = [ {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"}, + {ArchStr ++ "-unknown-elf-as", ArchStr ++ "-unknown-elf-objdump"}, {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"}, {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"}, {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"} @@ -118,7 +119,7 @@ get_as_flags(aarch64) -> get_as_flags(x86_64) -> "--64"; get_as_flags(riscv32) -> - "-march=rv32ima". + "-march=rv32imac". %% Parse objdump output lines and extract binary data -spec asm_lines([binary()], binary(), atom()) -> binary(). From f75c87a281ec84b92e965a83e0ef79bf157f1ae0 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 23 Oct 2025 22:08:14 +0200 Subject: [PATCH 19/28] riscv32: add it to the documentation Signed-off-by: Paul Guyot --- doc/src/atomvm-internals.md | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/src/atomvm-internals.md b/doc/src/atomvm-internals.md index 62ad888a71..4d30e6767d 100644 --- a/doc/src/atomvm-internals.md +++ b/doc/src/atomvm-internals.md @@ -137,7 +137,7 @@ Following BEAM, there are two flavors of the emulator: jit and emu, but eventual - Native: the VM only runs native code and all code must be precompiled on the desktop using the JIT compiler (which effectively is a AOT or Ahead-of-Time compiler). In this mode, it is not necessary to bundle the jit compiler on the embedded target. - Hybrid: the VM can run native code as well as emulated BEAM code and some code is precompiled on the desktop. -JIT is available on some platforms (currently only x86_64, aarch64 and armv6m) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. +JIT is available on some platforms (currently x86_64, aarch64, armv6m and riscv32) and compiles Erlang bytecode at runtime. Erlang bytecode is never interpreted. EMU is available on all platforms and Erlang bytecode is interpreted. Modules can include precompiled code in a dedicated beam chunk with name 'avmN'. The chunk can contain native code for several architectures, however it may only contain native code for a given version of the native interface. Current version is 1. This native code is executed by the jit-flavor of the emulator as well as the emu flavor if execution of precompiled is enabled. @@ -154,9 +154,16 @@ The JIT compiler is written in Erlang and is therefore precompiled. When a proce JIT compiler is composed of two main interfaces : backend and stream. -A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are `jit_x86_64` and `jit_aarch64` which are suitable for systems with System V X86 64 ABI or AArch64 ABI. +A backend implementation is required for each architecture. The backend is called by jit module as it translates bytecodes to machine code. The current implementations are : +- `jit_x86_64` for System V X86 64 ABI +- `jit_aarch64` for AArch64 ABI +- `jit_armv6m` for AArch32 ABI +- `jit_riscv32` for rv32imc ilp32 ABI. -A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Two implementations currently exist: `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop, and `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix. +A stream implementation is responsible for streaming the machine code, especially in the context of low memory. Three implementations currently exist: +- `jit_stream_binary` that streams assembly code to an Erlang binary, suitable for tests and precompilation on the desktop +- `jit_stream_mmap` that streams assembly code in an `mmap(2)` allocated page, suitable for JIT compilation on Unix +- `jit_stream_flash` available on Pico that allows for embedded JIT. ### Embedded JIT and Native From 8ad79a2b635515740c517138c945b6a1cc7df28b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 23 Oct 2025 22:38:40 +0200 Subject: [PATCH 20/28] flush fixup Signed-off-by: Paul Guyot --- libs/jit/src/jit_stream_binary.erl | 14 +++++++++++++- libs/jit/src/jit_stream_mmap.erl | 14 +++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/libs/jit/src/jit_stream_binary.erl b/libs/jit/src/jit_stream_binary.erl index db433c12ea..26e32bafa0 100644 --- a/libs/jit/src/jit_stream_binary.erl +++ b/libs/jit/src/jit_stream_binary.erl @@ -27,7 +27,8 @@ offset/1, append/2, replace/3, - map/4 + map/4, + flush/1 ]). -export_type([stream/0]). @@ -93,3 +94,14 @@ map(Stream, Offset, Length, MapFunction) -> {Prefix, <>} = split_binary(Stream, Offset), Replacement = MapFunction(Previous), <>. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to flush +%% @returns The stream flushed +%% @doc Flush the stream. NOP with binaries. +%% +%% @end +%%----------------------------------------------------------------------------- +-spec flush(stream()) -> stream(). +flush(Stream) -> + Stream. diff --git a/libs/jit/src/jit_stream_mmap.erl b/libs/jit/src/jit_stream_mmap.erl index d8129f9a41..4429146c4e 100644 --- a/libs/jit/src/jit_stream_mmap.erl +++ b/libs/jit/src/jit_stream_mmap.erl @@ -27,7 +27,8 @@ offset/1, append/2, replace/3, - map/4 + map/4, + flush/1 ]). %% Additional nif @@ -109,3 +110,14 @@ map(Stream, Offset, Length, MapFunction) -> -spec read(stream(), non_neg_integer(), pos_integer()) -> binary(). read(_Stream, _Offset, _Length) -> erlang:nif_error(undefined). + +%%----------------------------------------------------------------------------- +%% @param Stream stream to flush +%% @returns The stream flushed +%% @doc Flush the stream. Typically invalidates instruction cache. +%% +%% @end +%%----------------------------------------------------------------------------- +-spec flush(stream()) -> stream(). +flush(_Stream) -> + erlang:nif_error(undefined). From 510d47370dee5ecd770fb23e3ade78db74adc549 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 23 Oct 2025 23:32:48 +0200 Subject: [PATCH 21/28] riscv32: Convert placeholders to FFFFFFFF for embedded JIT Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 48 ++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 191fc3cdc8..0e0217b5fd 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -934,7 +934,7 @@ if_else_block( }. if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0) - BranchInstr = jit_riscv32_asm:bge(Reg, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream1 = StreamModule:append(Stream0, BranchInstr), State1 = State0#state{stream = Stream1}, {State1, {bge, Reg, zero}, 0}; @@ -949,7 +949,7 @@ if_block_cond( State1 = mov_immediate(State0, Temp, Val), Stream1 = State1#state.stream, BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, - BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = State1#state{stream = Stream2}, {State2, {bge, Reg, Temp}, BranchDelta}; @@ -962,7 +962,7 @@ if_block_cond( State1 = mov_immediate(State0, Temp, Val), Stream1 = State1#state.stream, BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, - BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = State1#state{stream = Stream2}, {State2, {bge, Reg, Temp}, BranchDelta}; @@ -976,7 +976,7 @@ if_block_cond( RegOrTuple -> RegOrTuple end, % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than) - BranchInstr = jit_riscv32_asm:bge(Reg, RegB, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream1 = StreamModule:append(Stream0, BranchInstr), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, @@ -990,7 +990,7 @@ if_block_cond( RegOrTuple -> RegOrTuple end, %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0) - BranchInstr = jit_riscv32_asm:bne(Reg, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream1 = StreamModule:append(Stream0, BranchInstr), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, @@ -1005,7 +1005,7 @@ if_block_cond( RegOrTuple -> RegOrTuple end, %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal) - BranchInstr = jit_riscv32_asm:bne(Reg, RegB, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream1 = StreamModule:append(Stream0, BranchInstr), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, @@ -1029,7 +1029,7 @@ if_block_cond( State1 = mov_immediate(State0, Temp, Val), Stream1 = State1#state.stream, BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, - BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, @@ -1044,7 +1044,7 @@ if_block_cond( RegOrTuple -> RegOrTuple end, %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal) - BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream1 = StreamModule:append(Stream0, BranchInstr), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, @@ -1065,7 +1065,7 @@ if_block_cond( State1 = mov_immediate(State0, Temp, Val), Stream1 = State1#state.stream, BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, - BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, @@ -1075,7 +1075,7 @@ if_block_cond( {{free, RegA}, '==', {free, RegB}} ) -> %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal) - BranchInstr = jit_riscv32_asm:bne(RegA, RegB, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream1 = StreamModule:append(Stream0, BranchInstr), State1 = State0#state{stream = Stream1}, State2 = if_block_free_reg({free, RegA}, State1), @@ -1095,7 +1095,7 @@ if_block_cond( Stream1 = State1#state.stream, BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal) - BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, @@ -1114,7 +1114,7 @@ if_block_cond( Stream1 = State1#state.stream, BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal) - BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = if_block_free_reg(RegOrTuple, State1), State3 = State2#state{stream = Stream2}, @@ -1135,7 +1135,7 @@ if_block_cond( %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false) I1 = jit_riscv32_asm:slli(Temp, Reg, 31), Stream1 = StreamModule:append(Stream0, I1), - BranchInstr = jit_riscv32_asm:blt(Temp, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream2}, @@ -1156,7 +1156,7 @@ if_block_cond( %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true) I1 = jit_riscv32_asm:slli(Temp, Reg, 31), Stream1 = StreamModule:append(Stream0, I1), - BranchInstr = jit_riscv32_asm:bge(Temp, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream2}, @@ -1190,7 +1190,7 @@ if_block_cond( Stream1 = StreamModule:append(Stream0, TestCode), BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, %% Branch if result is zero (no bits set, NOT != 0) - BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream2}, @@ -1207,7 +1207,7 @@ if_block_cond( I1 = jit_riscv32_asm:not_(Temp, Reg), I2 = jit_riscv32_asm:slli(Temp, Temp, 28), Stream1 = StreamModule:append(Stream0, <>), - BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State1 = State0#state{stream = Stream2}, {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)}; @@ -1222,7 +1222,7 @@ if_block_cond( I1 = jit_riscv32_asm:not_(Reg, Reg), I2 = jit_riscv32_asm:slli(Reg, Reg, 28), Stream1 = StreamModule:append(Stream0, <>), - BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State1 = State0#state{stream = Stream2}, State2 = if_block_free_reg(RegTuple, State1), @@ -1247,7 +1247,7 @@ if_block_cond( 0 -> %% Optimize comparison with zero BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, - BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), State3 = State2#state{ stream = Stream3, available_regs = [Temp | State2#state.available_regs] @@ -1256,7 +1256,7 @@ if_block_cond( _ when ?IS_GPR(Val) -> %% Val is a register BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, - BranchInstr = jit_riscv32_asm:beq(Temp, Val, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), State3 = State2#state{ stream = Stream3, available_regs = [Temp | State2#state.available_regs] @@ -1269,7 +1269,7 @@ if_block_cond( State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), Stream3 = State3#state.stream, BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, - BranchInstr = jit_riscv32_asm:beq(Temp, MaskReg, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream4 = StreamModule:append(Stream3, BranchInstr), State4 = State3#state{ stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs] @@ -1293,7 +1293,7 @@ if_block_cond( 0 -> %% Optimize comparison with zero BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, - BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = State1#state{stream = Stream2}, State3 = if_block_free_reg(RegTuple, State2), @@ -1301,7 +1301,7 @@ if_block_cond( _ when ?IS_GPR(Val) -> %% Val is a register BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, - BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream2 = StreamModule:append(Stream1, BranchInstr), State2 = State1#state{stream = Stream2}, State3 = if_block_free_reg(RegTuple, State2), @@ -1313,7 +1313,7 @@ if_block_cond( State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), Stream2 = State2#state.stream, BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, - BranchInstr = jit_riscv32_asm:beq(Reg, MaskReg, 0), + BranchInstr = <<16#FFFFFFFF:32/little>>, Stream3 = StreamModule:append(Stream2, BranchInstr), State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, State4 = if_block_free_reg(RegTuple, State3), @@ -2654,7 +2654,7 @@ decrement_reductions_and_maybe_schedule_next( Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), % Branch if reduction count is not zero - I4 = jit_riscv32_asm:bne(Temp, zero, 0), + I4 = <<16#FFFFFFFF:32/little>>, % Set continuation to the next instruction ADROffset = BNEOffset + byte_size(I4), % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address From a9ab15ce8a19fe896844fcb36ca768ab44ad9a13 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 23 Oct 2025 23:33:07 +0200 Subject: [PATCH 22/28] JIT on esp32: fix CMakeLists Signed-off-by: Paul Guyot --- src/platforms/esp32/CMakeLists.txt | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 4ddc362924..4dc81e0aad 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,6 +51,11 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) +# By default, JIT is disabled +set(AVM_DISABLE_JIT OFF) + +project(atomvm-esp32) + # JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) # Configuration comes from idf.py menuconfig (KConfig), not CMake options if(CONFIG_JIT_ENABLED) @@ -67,8 +72,6 @@ else() message(STATUS "JIT compilation disabled") endif() -project(atomvm-esp32) - # esp-idf does not use compile_feature but instead sets version in # c_compile_options # Ensure project is compiled with at least C11 From f0bfb32acd4897ad53a45cba132743043eae18a3 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 24 Oct 2025 08:02:27 +0200 Subject: [PATCH 23/28] JIT: Optimize memory usage by patching jump table asap Signed-off-by: Paul Guyot --- libs/estdlib/src/code_server.erl | 2 +- libs/jit/src/jit.erl | 49 +++- libs/jit/src/jit_aarch64.erl | 30 ++- libs/jit/src/jit_armv6m.erl | 56 +++-- libs/jit/src/jit_x86_64.erl | 33 ++- tests/libs/jit/jit_aarch64_tests.erl | 184 +++++++-------- tests/libs/jit/jit_armv6m_tests.erl | 321 +++++++++++++++++---------- tests/libs/jit/jit_x86_64_tests.erl | 145 ++++++------ 8 files changed, 514 insertions(+), 306 deletions(-) diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index eee061efba..cdd7e8b372 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -152,7 +152,7 @@ set_native_code(_Module, _LabelsCount, _Stream) -> load(Module) -> case erlang:system_info(emu_flavor) of jit -> - % atomvm_heap_growth, fibonacci divides compilation time by two + % atomvm_heap_growth, fibonacci reduces compilation time {Pid, Ref} = spawn_opt( fun() -> try diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index 823a6b9f97..2980f8dd05 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -114,6 +114,14 @@ -define(ASSERT_ALL_NATIVE_FREE(St), ok). -define(ASSERT(Expr), ok). +%-define(JIT_INSTRUMENT, true). + +-ifdef(JIT_INSTRUMENT). +-define(INSTRUMENT(Tag, State, MSt), instrument(Tag, State, MSt)). +-else. +-define(INSTRUMENT(Tag, State, MSt), ok). +-endif. + %%----------------------------------------------------------------------------- %% @param LabelsCount number of labels %% @param Arch code for the architecture @@ -137,7 +145,6 @@ compile( MMod, MSt0 ) when OpcodeMax =< ?OPCODE_MAX -> - MSt1 = MMod:jump_table(MSt0, LabelsCount), State0 = #state{ line_offsets = [], labels_count = LabelsCount, @@ -146,9 +153,15 @@ compile( type_resolver = TypeResolver, tail_cache = [] }, + ?INSTRUMENT("compile_start", State0, MSt0), + MSt1 = MMod:jump_table(MSt0, LabelsCount), + ?INSTRUMENT("after_jump_table", State0, MSt1), {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), + ?INSTRUMENT("after_first_pass", State1, MSt2), MSt3 = second_pass(MMod, MSt2, State1), + ?INSTRUMENT("after_second_pass", State1, MSt3), MSt4 = MMod:flush(MSt3), + ?INSTRUMENT("after_flush", State1, MSt4), {LabelsCount, MSt4}; compile( <<16:32, 0:32, OpcodeMax:32, _LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>>, @@ -3858,3 +3871,37 @@ backend(StreamModule, Stream) -> Variant = ?MODULE:variant(), BackendState = BackendModule:new(Variant, StreamModule, Stream), {BackendModule, BackendState}. + +-ifdef(JIT_INSTRUMENT). +instrument(Tag, #state{line_offsets = Lines, tail_cache = TC}, MSt) -> + StateSize = erts_debug:flat_size({Lines, TC}), + MStSize = erts_debug:flat_size(MSt), + LinesCount = length(Lines), + TCCount = length(TC), + + % Extract branches count from backend state + % state record: {state, stream_module, stream, offset, branches, jump_table_start, ...} + BranchesCount = + case element(1, MSt) of + state -> length(element(5, MSt)); + _ -> unknown + end, + + {heap_size, HeapSize} = process_info(self(), heap_size), + {total_heap_size, TotalHeapSize} = process_info(self(), total_heap_size), + + io:format( + "~s: mst=~p words, state=~p words (lines=~p, tc=~p, br=~p), " + "heap=~p, total_heap=~p~n", + [ + Tag, + MStSize, + StateSize, + LinesCount, + TCCount, + BranchesCount, + HeapSize, + TotalHeapSize + ] + ). +-endif. diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index fefa004d6d..cb6504c485 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -134,6 +134,7 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [aarch64_register()], used_regs :: [aarch64_register()], labels :: [{integer() | reference(), integer()}], @@ -233,6 +234,7 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], @@ -355,22 +357,21 @@ assert_all_native_free(#state{ %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). -spec jump_table0(state(), non_neg_integer(), pos_integer()) -> state(). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> - Offset = StreamModule:offset(Stream0), BranchInstr = jit_aarch64_asm:b(0), - Reloc = {N, Offset, b}, Stream1 = StreamModule:append(Stream0, BranchInstr), - jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. @@ -2343,5 +2344,22 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) %% @return Updated backend state %%----------------------------------------------------------------------------- -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each b instruction is 4 bytes + JumpTableEntryOffset = JumpTableStart + Label * 4, + RelativeOffset = LabelOffset - JumpTableEntryOffset, + BranchInstr = jit_aarch64_asm:b(RelativeOffset), + Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, BranchInstr), + State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 040b0a668b..04df1709fa 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -134,6 +134,7 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [armv6m_register()], used_regs :: [armv6m_register()], labels :: [{integer() | reference(), integer()}], @@ -247,6 +248,7 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], @@ -380,13 +382,14 @@ assert_all_native_free(#state{ %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> @@ -399,15 +402,7 @@ jump_table0( JumpEntry = <>, Stream1 = StreamModule:append(Stream0, JumpEntry), - % Add relocation for the data entry so update_branches/2 can patch the jump target - DataOffset = StreamModule:offset(Stream1) - 4, - % Calculate the offset of the add instruction (3rd instruction, at offset 4 from entry start) - EntryStartOffset = StreamModule:offset(Stream1) - 12, - AddInstrOffset = EntryStartOffset + 4, - DataReloc = {N, DataOffset, {jump_table_data, AddInstrOffset}}, - UpdatedState = State#state{stream = Stream1, branches = [DataReloc | Branches]}, - - jump_table0(UpdatedState, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. @@ -500,13 +495,7 @@ update_branches( I4 = <>, <> end - end; - {jump_table_data, AddInstrOffset} -> - % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label - % PC when add instruction executes - AddPC = AddInstrOffset + 4, - RelativeOffset = LabelOffset - AddPC, - <> + end end, Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), update_branches(State#state{stream = Stream1, branches = BranchesT}). @@ -3154,5 +3143,34 @@ add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label %% @return Updated backend state %%----------------------------------------------------------------------------- -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each jump table entry is 12 bytes: + % - ldr r3, [pc, 4] (2 bytes) at offset 0 + % - push {...} (2 bytes) at offset 2 + % - add pc, r3 (2 bytes) at offset 4 + % - nop (2 bytes) at offset 6 + % - data (4 bytes) at offset 8 + JumpTableEntryStart = JumpTableStart + Label * 12, + DataOffset = JumpTableEntryStart + 8, + AddInstrOffset = JumpTableEntryStart + 4, + + % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label + % PC when add instruction executes + AddPC = AddInstrOffset + 4, + RelativeOffset = LabelOffset - AddPC, + DataBytes = <>, + + Stream1 = StreamModule:replace(Stream0, DataOffset, DataBytes), + State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 0c722952b3..815dc40d95 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -115,6 +115,7 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [x86_64_register()], used_regs :: [x86_64_register()], labels :: [{integer() | reference(), integer()}], @@ -218,6 +219,7 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], @@ -340,21 +342,21 @@ assert_all_native_free(State) -> %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> - Offset = StreamModule:offset(Stream0), - {RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(1), - Reloc = {N, Offset + RelocOffset, 32}, + % Placeholder, encodes with 0xffffffff + {_RelocOffset, I1} = jit_x86_64_asm:jmp_rel32(4), Stream1 = StreamModule:append(Stream0, I1), - jump_table0(State#state{stream = Stream1, branches = [Reloc | Branches]}, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. @@ -2086,5 +2088,22 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) add_label(State, Label, Offset). -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each jmp_rel32 instruction is 5 bytes + JumpTableEntryOffset = JumpTableStart + Label * 5, + RelativeOffset = LabelOffset - JumpTableEntryOffset, + {_RelocOffset, JmpInstruction} = jit_x86_64_asm:jmp_rel32(RelativeOffset), + Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, JmpInstruction), + State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 6d2a937e53..106aa43790 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -1012,11 +1012,12 @@ get_list_test() -> is_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> MSt1 = ?BACKEND:if_block( MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> ?BACKEND:jump_to_label(BSt0, Label) @@ -1033,27 +1034,29 @@ is_integer_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 92400ce8 and x8, x7, #0xf\n" - " 8: f1003d1f cmp x8, #0xf\n" - " c: 54000160 b.eq 0x38 // b.none\n" - " 10: 924004e8 and x8, x7, #0x3\n" - " 14: f100091f cmp x8, #0x2\n" - " 18: 54000040 b.eq 0x20 // b.none\n" - " 1c: 14000047 b 0x138\n" - " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 24: f94000e7 ldr x7, [x7]\n" - " 28: 924014e7 and x7, x7, #0x3f\n" - " 2c: f10020ff cmp x7, #0x8\n" - " 30: 54000040 b.eq 0x38 // b.none\n" - " 34: 14000041 b 0x138" + " 0: 14000000 b 0x0\n" + " 4: 1400004f b 0x140\n" + " 8: f9401807 ldr x7, [x0, #48]\n" + " c: 92400ce8 and x8, x7, #0xf\n" + " 10: f1003d1f cmp x8, #0xf\n" + " 14: 54000160 b.eq 0x40\n" + " 18: 924004e8 and x8, x7, #0x3\n" + " 1c: f100091f cmp x8, #0x2\n" + " 20: 54000040 b.eq 0x28\n" + " 24: 14000047 b 0x140\n" + " 28: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 2c: f94000e7 ldr x7, [x7]\n" + " 30: 924014e7 and x7, x7, #0x3f\n" + " 34: f10020ff cmp x7, #0x8\n" + " 38: 54000040 b.eq 0x40\n" + " 3c: 14000041 b 0x140" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1064,11 +1067,12 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), @@ -1085,56 +1089,60 @@ is_number_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: 92400ce8 and x8, x7, #0xf\n" - " 8: f1003d1f cmp x8, #0xf\n" - " c: 540001c0 b.eq 0x44 // b.none\n" - " 10: 924004e8 and x8, x7, #0x3\n" - " 14: f100091f cmp x8, #0x2\n" - " 18: 54000040 b.eq 0x20 // b.none\n" - " 1c: 1400004a b 0x144\n" - " 20: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" - " 24: f94000e7 ldr x7, [x7]\n" - " 28: 924014e8 and x8, x7, #0x3f\n" - " 2c: f100211f cmp x8, #0x8\n" - " 30: 540000a0 b.eq 0x44 // b.none\n" - " 34: 924014e7 and x7, x7, #0x3f\n" - " 38: f10060ff cmp x7, #0x18\n" - " 3c: 54000040 b.eq 0x44 // b.none\n" - " 40: 14000041 b 0x144" + " 0: 14000000 b 0x0\n" + " 4: 14000052 b 0x14c\n" + " 8: f9401807 ldr x7, [x0, #48]\n" + " c: 92400ce8 and x8, x7, #0xf\n" + " 10: f1003d1f cmp x8, #0xf\n" + " 14: 540001c0 b.eq 0x4c\n" + " 18: 924004e8 and x8, x7, #0x3\n" + " 1c: f100091f cmp x8, #0x2\n" + " 20: 54000040 b.eq 0x28\n" + " 24: 1400004a b 0x14c\n" + " 28: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 2c: f94000e7 ldr x7, [x7]\n" + " 30: 924014e8 and x8, x7, #0x3f\n" + " 34: f100211f cmp x8, #0x8\n" + " 38: 540000a0 b.eq 0x4c\n" + " 3c: 924014e7 and x7, x7, #0x3f\n" + " 40: f10060ff cmp x7, #0x18\n" + " 44: 54000040 b.eq 0x4c\n" + " 48: 14000041 b 0x14c" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), - Offset = ?BACKEND:offset(State3), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: f1012cff cmp x7, #0x4b\n" - " 8: 54000080 b.eq 0x18 // b.none\n" - " c: f1002cff cmp x7, #0xb\n" - " 10: 54000040 b.eq 0x18 // b.none\n" - " 14: 14000041 b 0x118" + " 0: 14000000 b 0x0\n" + " 4: 14000047 b 0x120\n" + " 8: f9401807 ldr x7, [x0, #48]\n" + " c: f1012cff cmp x7, #0x4b\n" + " 10: 54000080 b.eq 0x20\n" + " 14: f1002cff cmp x7, #0xb\n" + " 18: 54000040 b.eq 0x20\n" + " 1c: 14000041 b 0x120" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1211,7 +1219,7 @@ wait_test() -> Stream = ?BACKEND:stream(State4), Dump = << " 0: 14000000 b 0x0\n" - " 4: 14000000 b 0x4\n" + " 4: 14000005 b 0x18\n" " 8: 14000000 b 0x8\n" " c: 14000000 b 0xc\n" " 10: 14000000 b 0x10\n" @@ -1225,34 +1233,34 @@ wait_test() -> return_labels_and_lines_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), % Test return_labels_and_lines with some sample labels and lines - State1 = ?BACKEND:add_label(State0, 2, 32), State2 = ?BACKEND:add_label(State1, 1, 16), - % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), - Stream = ?BACKEND:stream(State3), + State3 = ?BACKEND:add_label(State2, 0), - % Should have generated adr + ret + labels table + lines table - % adr = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes - % Total minimum: 36 bytes - ?assert(byte_size(Stream) >= 36), + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + + ?assert(byte_size(Stream) >= 44), - % Expected: adr x0, #8 + ret + labels table + lines table - % The data tables start at offset 0x8, so we load PC + 8 into x0 Dump = << - " 0: 10000040 adr x0, 0x8\n" - " 4: d65f03c0 ret\n" - " 8: 01000200 .word 0x01000200\n" - " c: 10000000 adr x0, 0xc\n" - " 10: 00000200 .word 0x00000200\n" - " 14: 02002000 .word 0x02002000\n" - " 18: 00000a00 .word 0x00000a00\n" - " 1c: 14001000 .word 0x14001000\n" - " 20: 20000000 .word 0x20000000" + " 0: 14000003 b 0xc\n" + " 4: 14000003 b 0x10\n" + " 8: 14000000 b 0x8\n" + " c: 10000040 adr x0, 0x14\n" + " 10: d65f03c0 ret\n" + " 14: 00000200 .inst 0x00000200\n" + " 18: 0c000000 st4 {v0.8b-v3.8b}, [x0]\n" + " 1c: 00000100 .inst 0x00000100\n" + " 20: 02001000 .inst 0x02001000\n" + " 24: 00000a00 .inst 0x00000a00\n" + " 28: 14001000 b 0x4028\n" + " 2c: 20000000 .inst 0x20000000" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1809,8 +1817,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(7, State1, [r8, r9]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r8, r9, 1), Stream = ?BACKEND:stream(State3), @@ -1823,8 +1831,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), - State2 = setelement(7, State1, [r8, r9]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r8, r9]), + State2 = setelement(8, State1, [r8, r9]), [r8, r9] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r8, r9, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl index 10b577cc74..e8a9f76222 100644 --- a/tests/libs/jit/jit_armv6m_tests.erl +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -1697,57 +1697,83 @@ is_number_test() -> is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 2f4b cmp r7, #75 ; 0x4b\n" - " 4: d006 beq.n 0x14\n" - " 6: 2f0b cmp r7, #11\n" - " 8: d004 beq.n 0x14\n" - " a: e079 b.n 0x100\n" - " c: 46c0 nop ; (mov r8, r8)\n" - " e: 46c0 nop ; (mov r8, r8)\n" - " 10: 46c0 nop ; (mov r8, r8)\n" - " 12: 46c0 nop ; (mov r8, r8)" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: 00ec lsls r4, r5, #3\n" + " 16: 0000 movs r0, r0\n" + " 18: 6987 ldr r7, [r0, #24]\n" + " 1a: 2f4b cmp r7, #75\n" + " 1c: d006 beq.n 0x2c\n" + " 1e: 2f0b cmp r7, #11\n" + " 20: d004 beq.n 0x2c\n" + " 22: e06d b.n 0x100\n" + " 24: 46c0 nop\n" + " 26: 46c0 nop\n" + " 28: 46c0 nop\n" + " 2a: 46c0 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#1000), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#1000), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 2f4b cmp r7, #75 ; 0x4b\n" - " 4: d006 beq.n 0x14\n" - " 6: 2f0b cmp r7, #11\n" - " 8: d004 beq.n 0x14\n" - " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" - " c: 447e add r6, pc\n" - " e: 4730 bx r6\n" - " 10: 0ff1 lsrs r0, r6, #31\n" - " 12: 0000 movs r0, r0" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: 0fec lsrs r4, r5, #31\n" + " 16: 0000 movs r0, r0\n" + " 18: 6987 ldr r7, [r0, #24]\n" + " 1a: 2f4b cmp r7, #75\n" + " 1c: d006 beq.n 0x2c\n" + " 1e: 2f0b cmp r7, #11\n" + " 20: d004 beq.n 0x2c\n" + " 22: 4e01 ldr r6, [pc, #4]\n" + " 24: 447e add r6, pc\n" + " 26: 4730 bx r6\n" + " 28: 0fd9 lsrs r1, r3, #31\n" + " 2a: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1789,29 +1815,42 @@ is_boolean_far_unaligned_test() -> is_boolean_far_known_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - State1 = ?BACKEND:add_label(State0, Label, 16#1000), - {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), - State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State2 = ?BACKEND:add_label(State1, Label, 16#1000), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State4 = ?BACKEND:free_native_registers(State3, [Reg]), - ?BACKEND:assert_all_native_free(State4), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 6987 ldr r7, [r0, #24]\n" - " 2: 2f4b cmp r7, #75 ; 0x4b\n" - " 4: d006 beq.n 0x14\n" - " 6: 2f0b cmp r7, #11\n" - " 8: d004 beq.n 0x14\n" - " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" - " c: 447e add r6, pc\n" - " e: 4730 bx r6\n" - " 10: 0ff1 lsrs r1, r6, #31\n" - " 12: 0000 movs r0, r0" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: 0fec lsrs r4, r5, #31\n" + " 16: 0000 movs r0, r0\n" + " 18: 6987 ldr r7, [r0, #24]\n" + " 1a: 2f4b cmp r7, #75\n" + " 1c: d006 beq.n 0x2c\n" + " 1e: 2f0b cmp r7, #11\n" + " 20: d004 beq.n 0x2c\n" + " 22: 4e01 ldr r6, [pc, #4]\n" + " 24: 447e add r6, pc\n" + " 26: 4730 bx r6\n" + " 28: 0fd9 lsrs r1, r3, #31\n" + " 2a: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1822,32 +1861,45 @@ is_boolean_far_known_unaligned_test() -> TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - State1 = ?BACKEND:add_label(State0, Label, 16#1000), - {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), - State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State2 = ?BACKEND:add_label(State1, Label, 16#1000), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State4 = ?BACKEND:free_native_registers(State3, [Reg]), - ?BACKEND:assert_all_native_free(State4), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << " 0: 4770 bx lr\n" - " 2: 6987 ldr r7, [r0, #24]\n" - " 4: 2f4b cmp r7, #75 ; 0x4b\n" - " 6: d007 beq.n 0x18\n" - " 8: 2f0b cmp r7, #11\n" - " a: d005 beq.n 0x18\n" - " c: 4e01 ldr r6, [pc, #4] ; (0x14)\n" - " e: 447e add r6, pc\n" - " 10: 4730 bx r6\n" - " 12: 46c0 nop ; (mov r8, r8)\n" - " 14: 0fef lsrs r7, r5, #31\n" - " 16: 0000 movs r0, r0" + " 2: 4b01 ldr r3, [pc, #4]\n" + " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 6: 449f add pc, r3\n" + " 8: 46c0 nop\n" + " a: ffff .short 0xffff\n" + " c: ffff .short 0xffff\n" + " e: 4b01 ldr r3, [pc, #4]\n" + " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 12: 449f add pc, r3\n" + " 14: 46c0 nop\n" + " 16: 0fea lsrs r2, r5, #31\n" + " 18: 0000 movs r0, r0\n" + " 1a: 6987 ldr r7, [r0, #24]\n" + " 1c: 2f4b cmp r7, #75\n" + " 1e: d007 beq.n 0x30\n" + " 20: 2f0b cmp r7, #11\n" + " 22: d005 beq.n 0x30\n" + " 24: 4e01 ldr r6, [pc, #4]\n" + " 26: 447e add r6, pc\n" + " 28: 4730 bx r6\n" + " 2a: 46c0 nop\n" + " 2c: 0fd7 lsrs r7, r2, #31\n" + " 2e: 0000 movs r0, r0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1972,41 +2024,59 @@ wait_test() -> %% Test return_labels_and_lines/2 function return_labels_and_lines_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), % Test return_labels_and_lines with some sample labels and lines - State1 = ?BACKEND:add_label(State0, 2, 32), - State2 = ?BACKEND:add_label(State1, 1, 16), + State2 = ?BACKEND:add_label(State1, 2, 32), + State3 = ?BACKEND:add_label(State2, 1, 16), % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), - Stream = ?BACKEND:stream(State3), + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + Stream = ?BACKEND:stream(State4), % Should have generated adr + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table % adr = 4 bytes, pop = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes % Total minimum: 30 bytes ?assert(byte_size(Stream) >= 30), - % Expected: adr r0, + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table - % The data tables start at offset 4, so adr should be adr r0, 4 not adr r0, 8 + % Expected: jump table (3 entries) + adr r0, + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table Dump = << - " 0: a000 add r0, pc, #0 ; (adr r0, 0x4)\n" - " 2: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - " 4: 0200 lsls r0, r0, #8\n" - " 6: 0100 lsls r0, r0, #4\n" - " 8: 0000 movs r0, r0\n" - " a: 1000 asrs r0, r0, #32\n" - " c: 0200 lsls r0, r0, #8\n" - " e: 0000 movs r0, r0\n" - " 10: 2000 movs r0, #0\n" - " 12: 0200 lsls r0, r0, #8\n" - " 14: 0a00 lsrs r0, r0, #8\n" - " 16: 0000 movs r0, r0\n" - " 18: 1000 asrs r0, r0, #32\n" - " 1a: 1400 asrs r0, r0, #16\n" - " 1c: 0000 movs r0, r0\n" - " 1e: 2000 movs r0, #0" + " 0: 4b01 ldr r3, [pc, #4]\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop\n" + " 8: ffff .short 0xffff\n" + " a: ffff .short 0xffff\n" + " c: 4b01 ldr r3, [pc, #4]\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop\n" + " 14: fffc .short 0xfffc\n" + " 16: ffff .short 0xffff\n" + " 18: 4b01 ldr r3, [pc, #4]\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop\n" + " 20: 0000 movs r0, r0\n" + " 22: 0000 movs r0, r0\n" + " 24: a000 add r0, pc, #0\n" + " 26: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 28: 0200 lsls r0, r0, #8\n" + " 2a: 0100 lsls r0, r0, #4\n" + " 2c: 0000 movs r0, r0\n" + " 2e: 1000 asrs r0, r0, #32\n" + " 30: 0200 lsls r0, r0, #8\n" + " 32: 0000 movs r0, r0\n" + " 34: 2000 movs r0, #0\n" + " 36: 0200 lsls r0, r0, #8\n" + " 38: 0a00 lsrs r0, r0, #8\n" + " 3a: 0000 movs r0, r0\n" + " 3c: 1000 asrs r0, r0, #32\n" + " 3e: 1400 asrs r0, r0, #16\n" + " 40: 0000 movs r0, r0\n" + " 42: 2000 movs r0, #0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2018,36 +2088,55 @@ return_labels_and_lines_unaligned_test() -> TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + State1 = ?BACKEND:jump_table(State0, 2), % Test return_labels_and_lines with some sample labels and lines - State1 = ?BACKEND:add_label(State0, 2, 32), - State2 = ?BACKEND:add_label(State1, 1, 16), + State2 = ?BACKEND:add_label(State1, 2, 32), + State3 = ?BACKEND:add_label(State2, 1, 16), % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), - Stream = ?BACKEND:stream(State3), + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + Stream = ?BACKEND:stream(State4), Dump = << " 0: 4770 bx lr\n" - "2: a001 add r0, pc, #4 ; (adr r0, 0x8)\n" - "4: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" - "6: 0000 movs r0, r0\n" - "8: 0200 lsls r0, r0, #8\n" - "a: 0100 lsls r0, r0, #4\n" - "c: 0000 movs r0, r0\n" - "e: 1000 asrs r0, r0, #32\n" - "10: 0200 lsls r0, r0, #8\n" - "12: 0000 movs r0, r0\n" - "14: 2000 movs r0, #0\n" - "16: 0200 lsls r0, r0, #8\n" - "18: 0a00 lsrs r0, r0, #8\n" - "1a: 0000 movs r0, r0\n" - "1c: 1000 asrs r0, r0, #32\n" - "1e: 1400 asrs r0, r0, #16\n" - "20: 0000 movs r0, r0\n" - "22: 2000 movs r0, #0" + " 2: 4b01 ldr r3, [pc, #4]\n" + " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 6: 449f add pc, r3\n" + " 8: 46c0 nop\n" + " a: ffff .short 0xffff\n" + " c: ffff .short 0xffff\n" + " e: 4b01 ldr r3, [pc, #4]\n" + " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 12: 449f add pc, r3\n" + " 14: 46c0 nop\n" + " 16: fffa .short 0xfffa\n" + " 18: ffff .short 0xffff\n" + " 1a: 4b01 ldr r3, [pc, #4]\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 449f add pc, r3\n" + " 20: 46c0 nop\n" + " 22: fffe .short 0xfffe\n" + " 24: ffff .short 0xffff\n" + " 26: a001 add r0, pc, #4\n" + " 28: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 2a: 0000 movs r0, r0\n" + " 2c: 0200 lsls r0, r0, #8\n" + " 2e: 0100 lsls r0, r0, #4\n" + " 30: 0000 movs r0, r0\n" + " 32: 1000 asrs r0, r0, #32\n" + " 34: 0200 lsls r0, r0, #8\n" + " 36: 0000 movs r0, r0\n" + " 38: 2000 movs r0, #0\n" + " 3a: 0200 lsls r0, r0, #8\n" + " 3c: 0a00 lsrs r0, r0, #8\n" + " 3e: 0000 movs r0, r0\n" + " 40: 1000 asrs r0, r0, #32\n" + " 42: 1400 asrs r0, r0, #16\n" + " 44: 0000 movs r0, r0\n" + " 46: 2000 movs r0, #0" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2725,8 +2814,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), - State2 = setelement(7, State1, [r3, r4]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(8, State1, [r3, r4]), [r3, r4] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1), Stream = ?BACKEND:stream(State3), @@ -2740,8 +2829,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), - State2 = setelement(7, State1, [r3, r4]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(8, State1, [r3, r4]), [r3, r4] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1), Stream = ?BACKEND:stream(State3), diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index 45fc71df51..b6a92365ad 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -977,11 +977,12 @@ get_list_test() -> is_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> MSt1 = ?BACKEND:if_block( MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> ?BACKEND:jump_to_label(BSt0, Label) @@ -998,29 +999,31 @@ is_integer_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 49 89 c3 mov %rax,%r11\n" - " 7: 41 80 e3 0f and $0xf,%r11b\n" - " b: 41 80 fb 0f cmp $0xf,%r11b\n" - " f: 74 25 je 0x36\n" - " 11: 49 89 c3 mov %rax,%r11\n" - " 14: 41 80 e3 03 and $0x3,%r11b\n" - " 18: 41 80 fb 02 cmp $0x2,%r11b\n" - " 1c: 74 05 je 0x23\n" - " 1e: e9 13 01 00 00 jmpq 0x136\n" - " 23: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" - " 27: 48 8b 00 mov (%rax),%rax\n" - " 2a: 24 3f and $0x3f,%al\n" - " 2c: 80 f8 08 cmp $0x8,%al\n" - " 2f: 74 05 je 0x36\n" - " 31: e9 00 01 00 00 jmpq 0x136" + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 36 01 00 00 jmpq 0x140\n" + " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " e: 49 89 c3 mov %rax,%r11\n" + " 11: 41 80 e3 0f and $0xf,%r11b\n" + " 15: 41 80 fb 0f cmp $0xf,%r11b\n" + " 19: 74 25 je 0x40\n" + " 1b: 49 89 c3 mov %rax,%r11\n" + " 1e: 41 80 e3 03 and $0x3,%r11b\n" + " 22: 41 80 fb 02 cmp $0x2,%r11b\n" + " 26: 74 05 je 0x2d\n" + " 28: e9 13 01 00 00 jmpq 0x140\n" + " 2d: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" + " 31: 48 8b 00 mov (%rax),%rax\n" + " 34: 24 3f and $0x3f,%al\n" + " 36: 80 f8 08 cmp $0x8,%al\n" + " 39: 74 05 je 0x40\n" + " 3b: e9 00 01 00 00 jmpq 0x140" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1031,11 +1034,12 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), @@ -1052,58 +1056,63 @@ is_number_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 49 89 c3 mov %rax,%r11\n" - " 7: 41 80 e3 0f and $0xf,%r11b\n" - " b: 41 80 fb 0f cmp $0xf,%r11b\n" - " f: 74 32 je 0x43\n" - " 11: 49 89 c3 mov %rax,%r11\n" - " 14: 41 80 e3 03 and $0x3,%r11b\n" - " 18: 41 80 fb 02 cmp $0x2,%r11b\n" - " 1c: 74 05 je 0x23\n" - " 1e: e9 20 01 00 00 jmpq 0x143\n" - " 23: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" - " 27: 48 8b 00 mov (%rax),%rax\n" - " 2a: 49 89 c3 mov %rax,%r11\n" - " 2d: 41 80 e3 3f and $0x3f,%r11b\n" - " 31: 41 80 fb 08 cmp $0x8,%r11b\n" - " 35: 74 0c je 0x43\n" - " 37: 24 3f and $0x3f,%al\n" - " 39: 80 f8 18 cmp $0x18,%al\n" - " 3c: 74 05 je 0x43\n" - " 3e: e9 00 01 00 00 jmpq 0x143" + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 43 01 00 00 jmpq 0x14d\n" + " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " e: 49 89 c3 mov %rax,%r11\n" + " 11: 41 80 e3 0f and $0xf,%r11b\n" + " 15: 41 80 fb 0f cmp $0xf,%r11b\n" + " 19: 74 32 je 0x4d\n" + " 1b: 49 89 c3 mov %rax,%r11\n" + " 1e: 41 80 e3 03 and $0x3,%r11b\n" + " 22: 41 80 fb 02 cmp $0x2,%r11b\n" + " 26: 74 05 je 0x2d\n" + " 28: e9 20 01 00 00 jmpq 0x14d\n" + " 2d: 48 83 e0 fc and $0xfffffffffffffffc,%rax\n" + " 31: 48 8b 00 mov (%rax),%rax\n" + " 34: 49 89 c3 mov %rax,%r11\n" + " 37: 41 80 e3 3f and $0x3f,%r11b\n" + " 3b: 41 80 fb 08 cmp $0x8,%r11b\n" + " 3f: 74 0c je 0x4d\n" + " 41: 24 3f and $0x3f,%al\n" + " 43: 80 f8 18 cmp $0x18,%al\n" + " 46: 74 05 je 0x4d\n" + " 48: e9 00 01 00 00 jmpq 0x14d" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - Offset = ?BACKEND:offset(State3), - State4 = ?BACKEND:add_label(State3, Label, Offset + 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + Offset = ?BACKEND:offset(State4), + State5 = ?BACKEND:add_label(State4, Label, Offset + 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 48 83 f8 4b cmp $0x4b,%rax\n" - " 8: 74 0b je 0x15\n" - " a: 48 83 f8 0b cmp $0xb,%rax\n" - " e: 74 05 je 0x15\n" - " 10: e9 00 01 00 00 jmpq 0x115\n" + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 15 01 00 00 jmpq 0x11f\n" + " a: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " e: 48 83 f8 4b cmp $0x4b,%rax\n" + " 12: 74 0b je 0x1f\n" + " 14: 48 83 f8 0b cmp $0xb,%rax\n" + " 18: 74 05 je 0x1f\n" + " 1a: e9 00 01 00 00 jmpq 0x11f\n" >>, ?assertEqual(dump_to_bin(Dump), Stream). From 53c60839bdce63c52da3f839e139fd42da7aa94b Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 24 Oct 2025 08:02:42 +0200 Subject: [PATCH 24/28] riscv32: Optimize memory usage by patching jump table asap Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 83 +++++- libs/jit/src/jit_armv6m.erl | 96 +++++-- libs/jit/src/jit_riscv32.erl | 183 +++++++++---- libs/jit/src/jit_x86_64.erl | 71 +++++- tests/libs/jit/jit_riscv32_tests.erl | 367 +++++++++++++++------------ 5 files changed, 556 insertions(+), 244 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index cb6504c485..0814497a5b 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -369,10 +369,67 @@ jump_table0( N, LabelsCount ) -> - BranchInstr = jit_aarch64_asm:b(0), + % Placeholder jumps to next entry (1 instruction forward = 4 bytes) + BranchInstr = jit_aarch64_asm:b(1), Stream1 = StreamModule:append(Stream0, BranchInstr), jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); + {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); + b -> jit_aarch64_asm:b(Rel) + end, + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. %% @end @@ -391,14 +448,7 @@ update_branches( } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), - Rel = LabelOffset - Offset, - NewInstr = - case Type of - {bcc, CC} -> jit_aarch64_asm:bcc(CC, Rel); - {adr, Reg} -> jit_aarch64_asm:adr(Reg, Rel); - b -> jit_aarch64_asm:b(Rel) - end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -2349,6 +2399,7 @@ add_label( stream_module = StreamModule, stream = Stream0, jump_table_start = JumpTableStart, + branches = Branches, labels = Labels } = State, Label, @@ -2360,6 +2411,18 @@ add_label( RelativeOffset = LabelOffset - JumpTableEntryOffset, BranchInstr = jit_aarch64_asm:b(RelativeOffset), Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, BranchInstr), - State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl index 04df1709fa..33b29f2c3d 100644 --- a/libs/jit/src/jit_armv6m.erl +++ b/libs/jit/src/jit_armv6m.erl @@ -405,23 +405,17 @@ jump_table0( jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- -%% @doc Rewrite stream to update all branches for labels. +%% @doc Patch a single branch in the stream %% @end -%% @param State current backend state -%% @return Updated backend state +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream %%----------------------------------------------------------------------------- --spec update_branches(state()) -> state(). -update_branches(#state{branches = []} = State) -> - State; -update_branches( - #state{ - stream_module = StreamModule, - stream = Stream0, - branches = [{Label, Offset, Type} | BranchesT], - labels = Labels - } = State -) -> - {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> Rel = LabelOffset - Offset, NewInstr = case Type of @@ -497,7 +491,62 @@ update_branches( end end end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -3148,6 +3197,7 @@ add_label( stream_module = StreamModule, stream = Stream0, jump_table_start = JumpTableStart, + branches = Branches, labels = Labels } = State, Label, @@ -3171,6 +3221,18 @@ add_label( DataBytes = <>, Stream1 = StreamModule:replace(Stream0, DataOffset, DataBytes), - State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 0e0217b5fd..afbb74eaaa 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -163,6 +163,7 @@ stream :: stream(), offset :: non_neg_integer(), branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + jump_table_start :: non_neg_integer(), available_regs :: [riscv32_register()], used_regs :: [riscv32_register()], labels :: [{integer() | reference(), integer()}], @@ -270,6 +271,7 @@ new(Variant, StreamModule, Stream) -> stream_module = StreamModule, stream = Stream, branches = [], + jump_table_start = 0, offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], @@ -403,46 +405,35 @@ assert_all_native_free(#state{ %% @return Updated backend state %%----------------------------------------------------------------------------- -spec jump_table(state(), pos_integer()) -> state(). -jump_table(State, LabelsCount) -> - jump_table0(State, 0, LabelsCount). +jump_table(#state{stream_module = StreamModule, stream = Stream0} = State, LabelsCount) -> + JumpTableStart = StreamModule:offset(Stream0), + jump_table0(State#state{jump_table_start = JumpTableStart}, 0, LabelsCount). jump_table0(State, N, LabelsCount) when N > LabelsCount -> State; jump_table0( - #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + #state{stream_module = StreamModule, stream = Stream0} = State, N, LabelsCount ) -> % Create jump table entry: AUIPC + JALR (8 bytes total) - % This will be patched later in update_branches/2 - Offset = StreamModule:offset(Stream0), + % This will be patched in add_label when the label offset is known JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, Stream1 = StreamModule:append(Stream0, JumpEntry), - - % Record both AUIPC and JALR offsets for patching - Reloc = {N, Offset, jump_table_auipc_jalr}, - UpdatedState = State#state{stream = Stream1, branches = [Reloc | Branches]}, - - jump_table0(UpdatedState, N + 1, LabelsCount). + jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). %%----------------------------------------------------------------------------- -%% @doc Rewrite stream to update all branches for labels. +%% @doc Patch a single branch in the stream %% @end -%% @param State current backend state -%% @return Updated backend state +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Type type of the branch +%% @param LabelOffset target label offset +%% @return Updated stream %%----------------------------------------------------------------------------- --spec update_branches(state()) -> state(). -update_branches(#state{branches = []} = State) -> - State; -update_branches( - #state{ - stream_module = StreamModule, - stream = Stream0, - branches = [{Label, Offset, Type} | BranchesT], - labels = Labels - } = State -) -> - {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), +-spec patch_branch(module(), stream(), non_neg_integer(), any(), non_neg_integer()) -> stream(). +patch_branch(StreamModule, Stream, Offset, Type, LabelOffset) -> Rel = LabelOffset - Offset, NewInstr = case Type of @@ -489,37 +480,64 @@ update_branches( 6 -> <>; 8 -> Entry end - end; - jump_table_auipc_jalr -> - % Calculate PC-relative offset from AUIPC instruction to target - % AUIPC is at Offset, JALR is at Offset+4 - % Target is at LabelOffset - % Offset from AUIPC PC to target - PCRelOffset = LabelOffset - Offset, - - % Split into upper 20 bits and lower 12 bits - % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) - % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper - Upper20 = (PCRelOffset + 16#800) bsr 12, - Lower12 = PCRelOffset band 16#FFF, - % Sign-extend lower 12 bits for JALR immediate - Lower12Signed = - if - Lower12 >= 16#800 -> Lower12 - 16#1000; - true -> Lower12 - end, - - % Encode AUIPC and JALR with computed offsets - I1 = jit_riscv32_asm:auipc(a3, Upper20), - I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), - % Map to 8 bytes - JumpTableEntry = <>, - case byte_size(JumpTableEntry) of - 6 -> <>; - 8 -> JumpTableEntry end end, - Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + StreamModule:replace(Stream, Offset, NewInstr). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), any()}] +) -> {stream(), [{integer(), non_neg_integer(), any()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Type} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Type, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -3053,5 +3071,60 @@ add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label %% @return Updated backend state %%----------------------------------------------------------------------------- -spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + jump_table_start = JumpTableStart, + branches = Branches, + labels = Labels + } = State, + Label, + LabelOffset +) when is_integer(Label) -> + % Patch the jump table entry immediately + % Each jump table entry is AUIPC + JALR (8 bytes) + JumpTableEntryOffset = JumpTableStart + Label * 8, + + % Calculate PC-relative offset from AUIPC instruction to target + PCRelOffset = LabelOffset - JumpTableEntryOffset, + + % Split into upper 20 bits and lower 12 bits + % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) + % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper + Upper20 = (PCRelOffset + 16#800) bsr 12, + Lower12 = PCRelOffset band 16#FFF, + % Sign-extend lower 12 bits for JALR immediate + Lower12Signed = + if + Lower12 >= 16#800 -> Lower12 - 16#1000; + true -> Lower12 + end, + + % Encode AUIPC and JALR with computed offsets + I1 = jit_riscv32_asm:auipc(a3, Upper20), + I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), + % Create 8-byte jump table entry + JumpTableEntry = <>, + PaddedEntry = + case byte_size(JumpTableEntry) of + 6 -> <>; + 8 -> JumpTableEntry + end, + + Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, PaddedEntry), + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 815dc40d95..1e45c720b3 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -358,6 +358,58 @@ jump_table0( Stream1 = StreamModule:append(Stream0, I1), jump_table0(State#state{stream = Stream1}, N + 1, LabelsCount). +%%----------------------------------------------------------------------------- +%% @doc Patch a single branch in the stream +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param Offset offset of the branch to patch +%% @param Size size of the branch in bits +%% @param LabelOffset target label offset +%% @return Updated stream +%%----------------------------------------------------------------------------- +-spec patch_branch(module(), stream(), non_neg_integer(), non_neg_integer(), non_neg_integer()) -> + stream(). +patch_branch(StreamModule, Stream, Offset, Size, LabelOffset) -> + StreamModule:map(Stream, Offset, Size div 8, fun(<>) -> + <<(Delta + LabelOffset - Offset):Size/little>> + end). + +%%----------------------------------------------------------------------------- +%% @doc Patch all branches targeting a specific label and return remaining branches +%% @end +%% @param StreamModule stream module +%% @param Stream stream state +%% @param TargetLabel label to patch branches for +%% @param LabelOffset offset of the target label +%% @param Branches list of pending branches +%% @return {UpdatedStream, RemainingBranches} +%%----------------------------------------------------------------------------- +-spec patch_branches_for_label( + module(), + stream(), + integer(), + non_neg_integer(), + [{integer(), non_neg_integer(), non_neg_integer()}] +) -> {stream(), [{integer(), non_neg_integer(), non_neg_integer()}]}. +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Branches, []). + +patch_branches_for_label(_StreamModule, Stream, _TargetLabel, _LabelOffset, [], Acc) -> + {Stream, lists:reverse(Acc)}; +patch_branches_for_label( + StreamModule, + Stream0, + TargetLabel, + LabelOffset, + [{Label, Offset, Size} | Rest], + Acc +) when Label =:= TargetLabel -> + Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset), + patch_branches_for_label(StreamModule, Stream1, TargetLabel, LabelOffset, Rest, Acc); +patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, [Branch | Rest], Acc) -> + patch_branches_for_label(StreamModule, Stream, TargetLabel, LabelOffset, Rest, [Branch | Acc]). + %%----------------------------------------------------------------------------- %% @doc Rewrite stream to update all branches for labels. %% @end @@ -376,9 +428,7 @@ update_branches( } = State ) -> {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), - Stream1 = StreamModule:map(Stream0, Offset, Size div 8, fun(<>) -> - <<(Delta + LabelOffset - Offset):Size/little>> - end), + Stream1 = patch_branch(StreamModule, Stream0, Offset, Size, LabelOffset), update_branches(State#state{stream = Stream1, branches = BranchesT}). %%----------------------------------------------------------------------------- @@ -2093,6 +2143,7 @@ add_label( stream_module = StreamModule, stream = Stream0, jump_table_start = JumpTableStart, + branches = Branches, labels = Labels } = State, Label, @@ -2104,6 +2155,18 @@ add_label( RelativeOffset = LabelOffset - JumpTableEntryOffset, {_RelocOffset, JmpInstruction} = jit_x86_64_asm:jmp_rel32(RelativeOffset), Stream1 = StreamModule:replace(Stream0, JumpTableEntryOffset, JmpInstruction), - State#state{stream = Stream1, labels = [{Label, LabelOffset} | Labels]}; + + % Eagerly patch any branches targeting this label + {Stream2, RemainingBranches} = patch_branches_for_label( + StreamModule, + Stream1, + Label, + LabelOffset, + Branches + ), + + State#state{ + stream = Stream2, branches = RemainingBranches, labels = [{Label, LabelOffset} | Labels] + }; add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index bc31df75b8..b98e999ffb 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -1333,11 +1333,12 @@ get_list_test() -> is_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> MSt1 = ?BACKEND:if_block( MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> ?BACKEND:jump_to_label(BSt0, Label) @@ -1354,36 +1355,42 @@ is_integer_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 0f72 slli t5,t5,0x1c\n" - " a: 020f0f63 beqz t5,0x48\n" - " e: 8f7e mv t5,t6\n" - " 10: 4e8d li t4,3\n" - " 12: 01df7f33 and t5,t5,t4\n" - " 16: 4e89 li t4,2\n" - " 18: 01df0663 beq t5,t4,0x24\n" - " 1c: a0d5 j 0x100\n" - " 1e: 0001 nop\n" - " 20: 00000013 nop\n" - " 24: 4f0d li t5,3\n" - " 26: ffff4f13 not t5,t5\n" - " 2a: 01efffb3 and t6,t6,t5\n" - " 2e: 000faf83 lw t6,0(t6)\n" - " 32: 03f00f13 li t5,63\n" - " 36: 01efffb3 and t6,t6,t5\n" - " 3a: 4f21 li t5,8\n" - " 3c: 01ef8663 beq t6,t5,0x48\n" - " 40: a0c1 j 0x100\n" - " 42: 0001 nop\n" - " 44: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: ffffcf13 not t5,t6\n" + " 18: 0f72 slli t5,t5,0x1c\n" + " 1a: 020f0f63 beqz t5,0x58\n" + " 1e: 8f7e mv t5,t6\n" + " 20: 4e8d li t4,3\n" + " 22: 01df7f33 and t5,t5,t4\n" + " 26: 4e89 li t4,2\n" + " 28: 01df0663 beq t5,t4,0x34\n" + " 2c: a8d1 j 0x100\n" + " 2e: 0001 nop\n" + " 30: 00000013 nop\n" + " 34: 4f0d li t5,3\n" + " 36: ffff4f13 not t5,t5\n" + " 3a: 01efffb3 and t6,t6,t5\n" + " 3e: 000faf83 lw t6,0(t6)\n" + " 42: 03f00f13 li t5,63\n" + " 46: 01efffb3 and t6,t6,t5\n" + " 4a: 4f21 li t5,8\n" + " 4c: 01ef8663 beq t6,t5,0x58\n" + " 50: a845 j 0x100\n" + " 52: 0001 nop\n" + " 54: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1395,11 +1402,12 @@ cond_jump_to_label(Cond, Label, MMod, MSt0) -> %% Keep the unoptimized version to test the and case. is_number_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, Arg1 = {x_reg, 0}, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), - State2 = ?BACKEND:if_block( - State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, Arg1), + State3 = ?BACKEND:if_block( + State2, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), @@ -1416,119 +1424,146 @@ is_number_test() -> ) end ), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: ffffcf13 not t5,t6\n" - " 8: 0f72 slli t5,t5,0x1c\n" - " a: 040f0763 beqz t5,0x58\n" - " e: 8f7e mv t5,t6\n" - " 10: 4e8d li t4,3\n" - " 12: 01df7f33 and t5,t5,t4\n" - " 16: 4e89 li t4,2\n" - " 18: 01df0663 beq t5,t4,0x24\n" - " 1c: a0d5 j 0x100\n" - " 1e: 0001 nop\n" - " 20: 00000013 nop\n" - " 24: 4f0d li t5,3\n" - " 26: ffff4f13 not t5,t5\n" - " 2a: 01efffb3 and t6,t6,t5\n" - " 2e: 000faf83 lw t6,0(t6)\n" - " 32: 8f7e mv t5,t6\n" - " 34: 03f00e93 li t4,63\n" - " 38: 01df7f33 and t5,t5,t4\n" - " 3c: 4ea1 li t4,8\n" - " 3e: 01df0d63 beq t5,t4,0x58\n" - " 42: 03f00f13 li t5,63\n" - " 46: 01efffb3 and t6,t6,t5\n" - " 4a: 4f61 li t5,24\n" - " 4c: 01ef8663 beq t6,t5,0x58\n" - " 50: a845 j 0x100\n" - " 52: 0001 nop\n" - " 54: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: ffffcf13 not t5,t6\n" + " 18: 0f72 slli t5,t5,0x1c\n" + " 1a: 040f0763 beqz t5,0x68\n" + " 1e: 8f7e mv t5,t6\n" + " 20: 4e8d li t4,3\n" + " 22: 01df7f33 and t5,t5,t4\n" + " 26: 4e89 li t4,2\n" + " 28: 01df0663 beq t5,t4,0x34\n" + " 2c: a8d1 j 0x100\n" + " 2e: 0001 nop\n" + " 30: 00000013 nop\n" + " 34: 4f0d li t5,3\n" + " 36: ffff4f13 not t5,t5\n" + " 3a: 01efffb3 and t6,t6,t5\n" + " 3e: 000faf83 lw t6,0(t6)\n" + " 42: 8f7e mv t5,t6\n" + " 44: 03f00e93 li t4,63\n" + " 48: 01df7f33 and t5,t5,t4\n" + " 4c: 4ea1 li t4,8\n" + " 4e: 01df0d63 beq t5,t4,0x68\n" + " 52: 03f00f13 li t5,63\n" + " 56: 01efffb3 and t6,t6,t5\n" + " 5a: 4f61 li t5,24\n" + " 5c: 01ef8663 beq t6,t5,0x68\n" + " 60: a045 j 0x100\n" + " 62: 0001 nop\n" + " 64: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#100), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8963 beq t6,t5,0x1a\n" - " c: 4f2d li t5,11\n" - " e: 01ef8663 beq t6,t5,0x1a\n" - " 12: a0fd j 0x100\n" - " 14: 0001 nop\n" - " 16: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 0f868067 jr 248(a3) # 0x100\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: a8f9 j 0x100\n" + " 24: 0001 nop\n" + " 26: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), Label = 1, - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State1 = ?BACKEND:jump_table(State0, 1), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State3 = ?BACKEND:free_native_registers(State2, [Reg]), - ?BACKEND:assert_all_native_free(State3), - State4 = ?BACKEND:add_label(State3, Label, 16#1000), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:add_label(State4, Label, 16#1000), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8963 beq t6,t5,0x1a\n" - " c: 4f2d li t5,11\n" - " e: 01ef8663 beq t6,t5,0x1a\n" - " 12: 7ef0006f j 0x1000\n" - " 16: 00000013 nop" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00001697 auipc a3,0x1\n" + " c: ff868067 jr -8(a3) # 0x1000\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: 7df0006f j 0x1000\n" + " 26: 00000013 nop" >>, ?assertEqual(dump_to_bin(Dump), Stream). is_boolean_far_known_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 1), Label = 1, - State1 = ?BACKEND:add_label(State0, Label, 16#1000), - {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), - State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + State2 = ?BACKEND:add_label(State1, Label, 16#1000), + {State3, Reg} = ?BACKEND:move_to_native_register(State2, {x_reg, 0}), + State4 = ?BACKEND:if_block(State3, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> ?BACKEND:jump_to_label(BSt1, Label) end) end), - State4 = ?BACKEND:free_native_registers(State3, [Reg]), - ?BACKEND:assert_all_native_free(State4), - State5 = ?BACKEND:update_branches(State4), - Stream = ?BACKEND:stream(State5), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + State6 = ?BACKEND:update_branches(State5), + Stream = ?BACKEND:stream(State6), Dump = << - " 0: 01852f83 lw t6,24(a0)\n" - " 4: 04b00f13 li t5,75\n" - " 8: 01ef8963 beq t6,t5,0x1a\n" - " c: 4f2d li t5,11\n" - " e: 01ef8663 beq t6,t5,0x1a\n" - " 12: 00001f17 auipc t5,0x1\n" - " 16: feef0067 jr -18(t5) # 0x1000" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00001697 auipc a3,0x1\n" + " c: ff868067 jr -8(a3) # 0x1000\n" + " 10: 01852f83 lw t6,24(a0)\n" + " 14: 04b00f13 li t5,75\n" + " 18: 01ef8963 beq t6,t5,0x2a\n" + " 1c: 4f2d li t5,11\n" + " 1e: 01ef8663 beq t6,t5,0x2a\n" + " 22: 00001f17 auipc t5,0x1\n" + " 26: fdef0067 jr -34(t5) # 0x1000" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -1618,70 +1653,86 @@ wait_test() -> Label = 2, State3 = ?BACKEND:set_continuation_to_label(State2, Label), State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), - Stream = ?BACKEND:stream(State4), + Stream = ?BACKEND:stream(State5), Dump = << - " 0: ffffffff .insn 4, 0xffffffff\n" - " 4: ffffffff .insn 4, 0xffffffff\n" - " 6: ffffffff .insn 4, 0xffffffff\n" - " a: ffffffff .insn 4, 0xffffffff\n" - " c: ffffffff .insn 4, 0xffffffff\n" - " 10: ffffffff .insn 4, 0xffffffff\n" - " 12: ffffffff .insn 4, 0xffffffff\n" - " 16: ffffffff .insn 4, 0xffffffff\n" - " 18: ffffffff .insn 4, 0xffffffff\n" - " 1c: ffffffff .insn 4, 0xffffffff\n" - " 1e: ffffffff .insn 4, 0xffffffff\n" - " 22: ffffffff .insn 4, 0xffffffff\n" - " 24: ffffffff .insn 4, 0xffffffff\n" - " 28: ffffffff .insn 4, 0xffffffff\n" - " 2c: 01f5a223 sw t6,4(a1)\n" - " 30: 07462f83 lw t6,116(a2)\n" - " 34: 8f82 jr t6" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 02868067 jr 40(a3) # 0x30\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 0f068067 jr 240(a3) # 0x100\n" + " 18: ffff .insn 2, 0xffff\n" + " 1a: ffff .insn 2, 0xffff\n" + " 1c: ffff .insn 2, 0xffff\n" + " 1e: ffff .insn 2, 0xffff\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: ffff .insn 2, 0xffff\n" + " 24: ffff .insn 2, 0xffff\n" + " 26: ffff .insn 2, 0xffff\n" + " 28: ffff .insn 2, 0xffff\n" + " 2a: ffff .insn 2, 0xffff\n" + " 2c: ffff .insn 2, 0xffff\n" + " 2e: ffff .insn 2, 0xffff\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 0d0f8f93 addi t6,t6,208 # 0x100\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 8f82 jr t6" >>, ?assertEqual(dump_to_bin(Dump), Stream). %% Test return_labels_and_lines/2 function return_labels_and_lines_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), % Test return_labels_and_lines with some sample labels and lines - State1 = ?BACKEND:add_label(State0, 2, 32), - State2 = ?BACKEND:add_label(State1, 1, 16), + State2 = ?BACKEND:add_label(State1, 2, 32), + State3 = ?BACKEND:add_label(State2, 1, 16), % {Line, Offset} pairs SortedLines = [{10, 16}, {20, 32}], - State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), - Stream = ?BACKEND:stream(State3), + State4 = ?BACKEND:return_labels_and_lines(State3, SortedLines), + Stream = ?BACKEND:stream(State4), - % Should have generated auipc + addi + ret + labels table + lines table - % auipc = 4 bytes, addi = 2 bytes (compressed), ret = 2 bytes, labels table = 3*2*2 = 12 bytes, lines table = 3*2*2 = 12 bytes - % Total: 4 + 2 + 2 + 12 + 12 = 32 bytes + % Should have jump table + generated code with label/line tables ?assert(byte_size(Stream) >= 32), - % Expected: auipc a0, 0 + addi a0, a0, 10 + ret + padding + labels table + lines table - % The data tables start at offset 0xa (10) because of alignment padding + % Expected: jump table (3 entries, 24 bytes) + auipc + addi + ret + padding + labels table + lines table Dump = << - " 0: 00000517 auipc a0,0x0\n" - " 4: 0529 addi a0,a0,10 # 0xa\n" - " 6: 8082 ret\n" - " 8: 0200ffff .insn 4, 0x0200ffff\n" - " c: 0100 addi s0,sp,128\n" - " e: 0000 unimp\n" - " 10: 1000 addi s0,sp,32\n" - " 12: 0200 addi s0,sp,256\n" - " 14: 0000 unimp\n" - " 16: 2000 fld fs0,0(s0)\n" - " 18: 0200 addi s0,sp,256\n" - " 1a: 0a00 addi s0,sp,272\n" - " 1c: 0000 unimp\n" - " 1e: 1000 addi s0,sp,32\n" - " 20: 1400 addi s0,sp,544\n" - " 22: 0000 unimp\n" - " 24: 2000 fld fs0,0(s0)" + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 00868067 jr 8(a3) # 0x10\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 01068067 jr 16(a3) # 0x20\n" + " 18: 00000517 auipc a0,0x0\n" + " 1c: 0529 addi a0,a0,10 # 0x22\n" + " 1e: 8082 ret\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: 0200 addi s0,sp,256\n" + " 24: 0100 addi s0,sp,128\n" + " 26: 0000 unimp\n" + " 28: 1000 addi s0,sp,32\n" + " 2a: 0200 addi s0,sp,256\n" + " 2c: 0000 unimp\n" + " 2e: 2000 fld fs0,0(s0)\n" + " 30: 0200 addi s0,sp,256\n" + " 32: 0a00 addi s0,sp,272\n" + " 34: 0000 unimp\n" + " 36: 1000 addi s0,sp,32\n" + " 38: 1400 addi s0,sp,544\n" + " 3a: 0000 unimp\n" + " 3c: 2000 fld fs0,0(s0)" >>, ?assertEqual(dump_to_bin(Dump), Stream). @@ -2335,8 +2386,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: x_reg to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), - State2 = setelement(7, State1, [a3, t3]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(8, State1, [a3, t3]), [a3, t3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), Stream = ?BACKEND:stream(State3), @@ -2351,8 +2402,8 @@ move_to_array_element_test_() -> end), %% move_to_array_element/5: imm to reg[x+offset] ?_test(begin - State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), - State2 = setelement(7, State1, [a3, t3]), + State1 = setelement(7, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(8, State1, [a3, t3]), [a3, t3] = ?BACKEND:used_regs(State2), State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), Stream = ?BACKEND:stream(State3), From 36db01a7174d418cb5556098f4fab8e0e380a160 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Fri, 24 Oct 2025 20:13:32 +0200 Subject: [PATCH 25/28] JIT: directly emit code if label address is known This reduces memory usage when call_or_schedule_next or set_continuation_to_label are invoked with known labels Signed-off-by: Paul Guyot --- libs/jit/src/jit_aarch64.erl | 49 ++++++++++++---- libs/jit/src/jit_riscv32.erl | 35 ++++++----- libs/jit/src/jit_x86_64.erl | 63 +++++++++++++++----- tests/libs/jit/jit_aarch64_tests.erl | 67 ++++++++++++++++++++- tests/libs/jit/jit_riscv32_tests.erl | 82 +++++++++++++++++++++++++- tests/libs/jit/jit_x86_64_tests.erl | 87 ++++++++++++++++++++++++++++ 6 files changed, 339 insertions(+), 44 deletions(-) diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 0814497a5b..b96d9f97ad 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -1905,17 +1905,30 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> Offset = StreamModule:offset(Stream0), - I1 = jit_aarch64_asm:adr(Temp, 0), - Reloc = {Label, Offset, {adr, Temp}}, - I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct adr without relocation + Rel = LabelOffset - Offset, + I1 = jit_aarch64_asm:adr(Temp, Rel), + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + I1 = jit_aarch64_asm:adr(Temp, 0), + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_aarch64_asm:str(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. %%----------------------------------------------------------------------------- %% @doc Set the continuation address to the current offset, creating a @@ -2202,6 +2215,7 @@ call_only_or_schedule_next( stream_module = StreamModule, stream = Stream0, branches = Branches, + labels = Labels, available_regs = [Temp | _] } = State0, Label @@ -2214,11 +2228,22 @@ call_only_or_schedule_next( I3 = jit_aarch64_asm:str_w(Temp, ?JITSTATE_REDUCTIONCOUNT), Stream1 = StreamModule:append(Stream0, <>), BNEOffset = StreamModule:offset(Stream1), - % Branch to label if reduction count is not zero - I4 = jit_aarch64_asm:bcc(ne, 0), - Reloc1 = {Label, BNEOffset, {bcc, ne}}, - Stream2 = StreamModule:append(Stream1, I4), - State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]}, + + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct branch with calculated offset + % Calculate relative offset (must be 4-byte aligned) + Rel = LabelOffset - BNEOffset, + I4 = jit_aarch64_asm:bcc(ne, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2}; + false -> + % Label not yet known, emit placeholder and add relocation + I4 = jit_aarch64_asm:bcc(ne, 0), + Reloc1 = {Label, BNEOffset, {bcc, ne}}, + Stream2 = StreamModule:append(Stream1, I4), + State1 = State0#state{stream = Stream2, branches = [Reloc1 | Branches]} + end, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index afbb74eaaa..7c4198f111 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -2379,23 +2379,32 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> - % Similar to AArch64: use pc_relative_address with a relocation that will be - % resolved to point directly to the label's actual address (not the jump table entry) Offset = StreamModule:offset(Stream0), - % Emit placeholder for pc_relative_address (auipc + addi) - % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming - % The relocation will replace these with the correct offset - I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, - Reloc = {Label, Offset, {adr, Temp}}, - % Store continuation (jit_state is in a1) - I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct pc-relative address without relocation + Rel = LabelOffset - Offset, + I1 = pc_relative_address(Temp, Rel), + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming + % The relocation will replace these with the correct offset + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {Label, Offset, {adr, Temp}}, + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. %% @doc Set the contination to a given offset %% Return a reference so the offset will be updated with update_branches diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 1e45c720b3..00559a4ec8 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -1845,17 +1845,31 @@ set_continuation_to_label( stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _], - branches = Branches + branches = Branches, + labels = Labels } = State, Label ) -> Offset = StreamModule:offset(Stream0), - {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), - Reloc = {Label, Offset + RewriteLEAOffset, 32}, - I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State#state{stream = Stream1, branches = [Reloc | Branches]}. + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct leaq without relocation + % leaq instruction is 7 bytes, RIP points to next instruction + RelOffset = LabelOffset - (Offset + 7), + I1 = jit_x86_64_asm:leaq({rip, RelOffset}, Temp), + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + {RewriteLEAOffset, I1} = jit_x86_64_asm:leaq_rel32({-4, rip}, Temp), + Reloc = {Label, Offset + RewriteLEAOffset, 32}, + I2 = jit_x86_64_asm:movq(Temp, ?JITSTATE_CONTINUATION), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]} + end. set_continuation_to_offset( #state{ @@ -2000,19 +2014,38 @@ call_only_or_schedule_next( #state{ stream_module = StreamModule, stream = Stream0, - branches = Branches + branches = Branches, + labels = Labels } = State0, Label ) -> Offset = StreamModule:offset(Stream0), I1 = jit_x86_64_asm:decl(?JITSTATE_REMAINING_REDUCTIONS), - {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1), - I2 = jit_x86_64_asm:jz(byte_size(I3) + 2), - Sz = byte_size(I1) + byte_size(I2), - Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, - Code = <>, - Stream1 = StreamModule:append(Stream0, Code), - State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]}, + I1Size = byte_size(I1), + + case lists:keyfind(Label, 1, Labels) of + {Label, LabelOffset} -> + % Label is already known, emit direct jmp with calculated offset + % jz is 2 bytes, jmp_rel32 is 5 bytes + JmpSize = 5, + I2 = jit_x86_64_asm:jz(JmpSize + 2), + I2Size = byte_size(I2), + % Calculate relative offset: target - current + RelOffset = LabelOffset - (Offset + I1Size + I2Size), + {_RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(RelOffset), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}; + false -> + % Label not yet known, emit placeholder and add relocation + {RewriteJMPOffset, I3} = jit_x86_64_asm:jmp_rel32(1), + I2 = jit_x86_64_asm:jz(byte_size(I3) + 2), + Sz = I1Size + byte_size(I2), + Reloc1 = {Label, Offset + Sz + RewriteJMPOffset, 32}, + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1, branches = [Reloc1 | Branches]} + end, State2 = set_continuation_to_label(State1, Label), call_primitive_last(State2, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 106aa43790..715b1d0f42 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -933,6 +933,38 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#2c), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 1400000d b 0x34\n" + " 4: 14000002 b 0xc\n" + " 8: 14000009 b 0x2c\n" + " c: b9401027 ldr w7, [x1, #16]\n" + " 10: f10004e7 subs x7, x7, #0x1\n" + " 14: b9001027 str w7, [x1, #16]\n" + " 18: 540000a1 b.ne 0x2c // b.any\n" + " 1c: 10000087 adr x7, 0x2c\n" + " 20: f9000427 str x7, [x1, #8]\n" + " 24: f9400847 ldr x7, [x2, #16]\n" + " 28: d61f00e0 br x7\n" + " 2c: f9400047 ldr x7, [x2]\n" + " 30: d61f00e0 br x7\n" + " 34: f9400447 ldr x7, [x2, #8]\n" + " 38: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), @@ -1215,16 +1247,45 @@ wait_test() -> Label = 2, State3 = ?BACKEND:set_continuation_to_label(State2, Label), State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), - Stream = ?BACKEND:stream(State4), + Stream = ?BACKEND:stream(State6), Dump = << " 0: 14000000 b 0x0\n" " 4: 14000005 b 0x18\n" - " 8: 14000000 b 0x8\n" + " 8: 1400003e b 0x100\n" + " c: 14000000 b 0xc\n" + " 10: 14000000 b 0x10\n" + " 14: 14000000 b 0x14\n" + " 18: 10000747 adr x7, 0x100\n" + " 1c: f9000427 str x7, [x1, #8]\n" + " 20: f9407447 ldr x7, [x2, #232]\n" + " 24: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000005 b 0x18\n" + " 8: 1400003e b 0x100\n" " c: 14000000 b 0xc\n" " 10: 14000000 b 0x10\n" " 14: 14000000 b 0x14\n" - " 18: 10000007 adr x7, 0x18\n" + " 18: 10000747 adr x7, 0x100\n" " 1c: f9000427 str x7, [x1, #8]\n" " 20: f9407447 ldr x7, [x2, #232]\n" " 24: d61f00e0 br x7" diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index b98e999ffb..a36420c525 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -1188,6 +1188,42 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#36), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 03c68067 jr 60(a3) # 0x3c\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 02668067 jr 38(a3) # 0x36\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f9a63 bnez t6,0x36\n" + " 26: 00000f97 auipc t6,0x0\n" + " 2a: 0fc1 addi t6,t6,16 # 0x36\n" + " 2c: 01f5a223 sw t6,4(a1)\n" + " 30: 00862f83 lw t6,8(a2)\n" + " 34: 8f82 jr t6\n" + " 36: 00062f83 lw t6,0(a2)\n" + " 3a: 8f82 jr t6\n" + " 3c: 00462f83 lw t6,4(a2)\n" + " 40: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + %% Test with large gap (256+ bytes) to force mov_immediate path call_only_or_schedule_next_and_label_relocation_large_gap_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -1654,8 +1690,52 @@ wait_test() -> State3 = ?BACKEND:set_continuation_to_label(State2, Label), State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), - Stream = ?BACKEND:stream(State5), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: ffff .insn 2, 0xffff\n" + " 2: ffff .insn 2, 0xffff\n" + " 4: ffff .insn 2, 0xffff\n" + " 6: ffff .insn 2, 0xffff\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 02868067 jr 40(a3) # 0x30\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 0f068067 jr 240(a3) # 0x100\n" + " 18: ffff .insn 2, 0xffff\n" + " 1a: ffff .insn 2, 0xffff\n" + " 1c: ffff .insn 2, 0xffff\n" + " 1e: ffff .insn 2, 0xffff\n" + " 20: ffff .insn 2, 0xffff\n" + " 22: ffff .insn 2, 0xffff\n" + " 24: ffff .insn 2, 0xffff\n" + " 26: ffff .insn 2, 0xffff\n" + " 28: ffff .insn 2, 0xffff\n" + " 2a: ffff .insn 2, 0xffff\n" + " 2c: ffff .insn 2, 0xffff\n" + " 2e: ffff .insn 2, 0xffff\n" + " 30: 00000f97 auipc t6,0x0\n" + " 34: 0d0f8f93 addi t6,t6,208 # 0x100\n" + " 38: 01f5a223 sw t6,4(a1)\n" + " 3c: 07462f83 lw t6,116(a2)\n" + " 40: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), Dump = << " 0: ffff .insn 2, 0xffff\n" diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index b6a92365ad..50a175a016 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -893,6 +893,37 @@ call_only_or_schedule_next_and_label_relocation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_only_or_schedule_next_known_label_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:add_label(State2, 2, 16#2a), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: e9 2a 00 00 00 jmpq 0x2f\n" + " 5: e9 05 00 00 00 jmpq 0xf\n" + " a: e9 1b 00 00 00 jmpq 0x2a\n" + " f: ff 4e 10 decl 0x10(%rsi)\n" + " 12: 74 05 je 0x19\n" + " 14: e9 11 00 00 00 jmpq 0x2a\n" + " 19: 48 8d 05 0a 00 00 00 lea 0xa(%rip),%rax # 0x2a\n" + " 20: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 24: 48 8b 42 10 mov 0x10(%rdx),%rax\n" + " 28: ff e0 jmpq *%rax\n" + " 2a: 48 8b 02 mov (%rdx),%rax\n" + " 2d: ff e0 jmpq *%rax\n" + " 2f: 48 8b 42 08 mov 0x8(%rdx),%rax\n" + " 33: ff e0 jmpq *%rax\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_bif_with_large_literal_integer_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), @@ -1601,6 +1632,62 @@ jump_to_continuation_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test set_continuation_to_label with unknown label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State5 = ?BACKEND:add_label(State4, Label, 16#100), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 14 00 00 00 jmpq 0x1e\n" + " a: e9 f1 00 00 00 jmpq 0x100\n" + " f: e9 ff ff ff ff jmpq 0x13\n" + " 14: e9 ff ff ff ff jmpq 0x18\n" + " 19: e9 ff ff ff ff jmpq 0x1d\n" + " 1e: 48 8d 05 db 00 00 00 lea 0xdb(%rip),%rax\n" + " 25: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 29: 48 8b 82 e8 00 00 00 mov 0xe8(%rdx),%rax\n" + " 30: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test set_continuation_to_label with known label +wait_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:add_label(State2, Label, 16#100), + State4 = ?BACKEND:set_continuation_to_label(State3, Label), + State5 = ?BACKEND:call_primitive_last(State4, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + State6 = ?BACKEND:update_branches(State5), + + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: e9 ff ff ff ff jmpq 0x4\n" + " 5: e9 14 00 00 00 jmpq 0x1e\n" + " a: e9 f1 00 00 00 jmpq 0x100\n" + " f: e9 ff ff ff ff jmpq 0x13\n" + " 14: e9 ff ff ff ff jmpq 0x18\n" + " 19: e9 ff ff ff ff jmpq 0x1d\n" + " 1e: 48 8d 05 db 00 00 00 lea 0xdb(%rip),%rax\n" + " 25: 48 89 46 08 mov %rax,0x8(%rsi)\n" + " 29: 48 8b 82 e8 00 00 00 mov 0xe8(%rdx),%rax\n" + " 30: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). From 388f63cfb448863fae8875b486363ad81d4de74d Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Thu, 23 Oct 2025 22:39:11 +0200 Subject: [PATCH 26/28] riscv32: embedded JIT on esp32 Signed-off-by: Paul Guyot --- .../esp32/components/avm_sys/CMakeLists.txt | 3 +- .../avm_sys/jit_stream_flash_platform.c | 141 ++++++++++++++++++ ...am_flash.c => jit_stream_flash_platform.h} | 29 ++-- 3 files changed, 162 insertions(+), 11 deletions(-) create mode 100644 src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c rename src/platforms/esp32/components/avm_sys/{jit_stream_flash.c => jit_stream_flash_platform.h} (64%) diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt index 8156bb2ac8..2f942073f7 100644 --- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt +++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt @@ -25,7 +25,8 @@ set(AVM_SYS_COMPONENT_SRCS "sys.c" "platform_nifs.c" "platform_defaultatoms.c" - "jit_stream_flash.c" + "jit_stream_flash_platform.c" + "../../../../libAtomVM/jit_stream_flash.c" "../../../../libAtomVM/inet.c" "../../../../libAtomVM/otp_crypto.c" "../../../../libAtomVM/otp_net.c" diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c new file mode 100644 index 0000000000..bfaed52215 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.c @@ -0,0 +1,141 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "jit_stream_flash.h" + +#include +#include + +#include "esp32_sys.h" + +#if ESP_IDF_VERSION_MAJOR >= 5 +#include +#endif + +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV +#include +#endif + +struct JSFlashPlatformContext +{ + const esp_partition_t *partition; +}; + +struct JSFlashPlatformContext *jit_stream_flash_platform_init(void) +{ + const esp_partition_t *partition = esp_partition_find_first( + ESP_PARTITION_TYPE_DATA, ESP_PARTITION_SUBTYPE_ANY, JIT_PARTITION_NAME); + if (IS_NULL_PTR(partition)) { + fprintf(stderr, "Failed to find partition '%s' for JIT cache\n", JIT_PARTITION_NAME); + return NULL; + } + + struct JSFlashPlatformContext *pf_ctx = malloc(sizeof(struct JSFlashPlatformContext)); + if (IS_NULL_PTR(pf_ctx)) { + return NULL; + } + + pf_ctx->partition = partition; + return pf_ctx; +} + +void jit_stream_flash_platform_destroy(struct JSFlashPlatformContext *ctx) +{ + free(ctx); +} + +bool jit_stream_flash_platform_erase_sector(struct JSFlashPlatformContext *ctx, uintptr_t addr) +{ + if (UNLIKELY(!ctx || !ctx->partition)) { + return false; + } + + size_t flash_offset = spi_flash_cache2phys((const void *) addr); + if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) { + fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr); + return false; + } + + esp_err_t err = esp_partition_erase_range(ctx->partition, + flash_offset - ctx->partition->address, FLASH_SECTOR_SIZE); + if (UNLIKELY(err != ESP_OK)) { + fprintf(stderr, "Failed to erase sector at offset 0x%lx: %d\n", (unsigned long) flash_offset, err); + return false; + } + + return true; +} + +bool jit_stream_flash_platform_write_page(struct JSFlashPlatformContext *ctx, uintptr_t addr, const uint8_t *data) +{ + if (UNLIKELY(!ctx || !ctx->partition)) { + return false; + } + + size_t flash_offset = spi_flash_cache2phys((const void *) addr); + if (UNLIKELY(flash_offset == SPI_FLASH_CACHE2PHYS_FAIL)) { + fprintf(stderr, "Failed to convert cache address 0x%lx to physical address\n", (unsigned long) addr); + return false; + } + + esp_err_t err = esp_partition_write(ctx->partition, + flash_offset - ctx->partition->address, data, FLASH_PAGE_SIZE); + if (UNLIKELY(err != ESP_OK)) { + fprintf(stderr, "Failed to write page at offset 0x%lx: %d\n", (unsigned long) flash_offset, err); + return false; + } + + return true; +} + +uintptr_t jit_stream_flash_platform_ptr_to_executable(uintptr_t addr) +{ + // Convert data cache address to instruction cache address for RISC-V targets + // On ESP32-C3/C6/H2, flash is mapped to both DBUS (0x3C...) and IBUS (0x42...) + // but only IBUS addresses are executable +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV + if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_DBUS_VADDR_BASE) { + return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_IBUS_VADDR_BASE; + } + return addr; +#else + return addr; +#endif +} + +uintptr_t jit_stream_flash_platform_executable_to_ptr(uintptr_t addr) +{ + // Convert instruction cache address to data cache address for RISC-V targets + // This is the reverse of ptr_to_executable +#ifdef CONFIG_IDF_TARGET_ARCH_RISCV + if ((addr & ~SOC_MMU_VADDR_MASK) == SOC_MMU_IBUS_VADDR_BASE) { + return (addr & SOC_MMU_VADDR_MASK) | SOC_MMU_DBUS_VADDR_BASE; + } + return addr; +#else + return addr; +#endif +} + +REGISTER_NIF_COLLECTION(jit_stream_flash, jit_stream_flash_init, NULL, jit_stream_flash_get_nif) + +#endif // AVM_NO_JIT diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h similarity index 64% rename from src/platforms/esp32/components/avm_sys/jit_stream_flash.c rename to src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h index 77dfcca908..6f8d9bffc5 100644 --- a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash_platform.h @@ -18,17 +18,26 @@ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later */ -#ifndef AVM_NO_JIT +#ifndef _JIT_STREAM_FLASH_PLATFORM_H_ +#define _JIT_STREAM_FLASH_PLATFORM_H_ -#include "context.h" -#include "jit.h" -#include "term.h" +#include +#include +#include -ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) -{ - UNUSED(ctx); - UNUSED(jit_stream); - return NULL; -} +#ifdef __cplusplus +extern "C" { +#endif + +// ESP32 flash constants +#define FLASH_SECTOR_SIZE 4096 +#define FLASH_PAGE_SIZE 256 +// JIT code is stored in main.avm partition +#define JIT_PARTITION_NAME "main.avm" + +#ifdef __cplusplus +} #endif + +#endif // _JIT_STREAM_FLASH_PLATFORM_H_ From 6a6decc7634e4415aede72f5b1094c7d2f88badf Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 25 Oct 2025 16:44:09 +0200 Subject: [PATCH 27/28] riscv32: Implement support for private_append Signed-off-by: Paul Guyot --- libs/jit/src/jit_riscv32.erl | 34 ++++++++++++++++++---------- tests/libs/jit/jit_riscv32_tests.erl | 14 ++++++------ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl index 7c4198f111..9515c8f85b 100644 --- a/libs/jit/src/jit_riscv32.erl +++ b/libs/jit/src/jit_riscv32.erl @@ -87,6 +87,7 @@ -include_lib("jit.hrl"). -include("primitives.hrl"). +-include("term.hrl"). -define(ASSERT(Expr), true = Expr). @@ -1258,7 +1259,7 @@ if_block_cond( I1 = jit_riscv32_asm:mv(Temp, Reg), Stream1 = StreamModule:append(Stream0, I1), State1 = State0#state{stream = Stream1}, - State2 = and_(State1#state{available_regs = AT}, Temp, Mask), + {State2, Temp} = and_(State1#state{available_regs = AT}, {free, Temp}, Mask), Stream2 = State2#state.stream, %% Compare Temp with Val and branch if equal (NOT != Val) case Val of @@ -1304,7 +1305,7 @@ if_block_cond( ) when ?IS_GPR(Reg) -> %% RISC-V: AND with mask, then compare with value OffsetBefore = StreamModule:offset(Stream0), - State1 = and_(State0, Reg, Mask), + {State1, Reg} = and_(State0, RegTuple, Mask), Stream1 = State1#state.stream, %% Compare Reg with Val and branch if equal (NOT != Val) case Val of @@ -2460,14 +2461,14 @@ get_module_index( %% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to %% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool %% by using BICS for -4. -and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, {free, Reg}, 16#FFFFFF) -> I1 = jit_riscv32_asm:slli(Reg, Reg, 8), I2 = jit_riscv32_asm:srli(Reg, Reg, 8), Stream1 = StreamModule:append(Stream0, <>), - State0#state{stream = Stream1}; + {State0#state{stream = Stream1}, Reg}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, - Reg, + {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), @@ -2476,20 +2477,20 @@ and_( I1 = jit_riscv32_asm:not_(Temp, Temp), I2 = jit_riscv32_asm:and_(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, <>), - State1#state{available_regs = [Temp | AT], stream = Stream2}; + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; and_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, - Reg, + {free, Reg}, Val ) -> State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), Stream1 = State1#state.stream, I = jit_riscv32_asm:and_(Reg, Reg, Temp), Stream2 = StreamModule:append(Stream1, I), - State1#state{available_regs = [Temp | AT], stream = Stream2}; + {State1#state{available_regs = [Temp | AT], stream = Stream2}, Reg}; and_( #state{stream_module = StreamModule, available_regs = []} = State0, - Reg, + {free, Reg}, Val ) when Val < 0 andalso Val >= -256 -> % No available registers, use a0 as temp and save it to t3 @@ -2507,10 +2508,10 @@ and_( % Restore a0 from t3 Restore = jit_riscv32_asm:mv(a0, ?IP_REG), Stream4 = StreamModule:append(Stream3, Restore), - State0#state{stream = Stream4}; + {State0#state{stream = Stream4}, Reg}; and_( #state{stream_module = StreamModule, available_regs = []} = State0, - Reg, + {free, Reg}, Val ) -> % No available registers, use a0 as temp and save it to t3 @@ -2527,7 +2528,16 @@ and_( % Restore a0 from t3 Restore = jit_riscv32_asm:mv(a0, ?IP_REG), Stream4 = StreamModule:append(Stream3, Restore), - State0#state{stream = Stream4}. + {State0#state{stream = Stream4}, Reg}; +and_( + #state{stream_module = StreamModule, available_regs = [ResultReg | AT], used_regs = UR} = + State0, + Reg, + ?TERM_PRIMARY_CLEAR_MASK +) -> + I = jit_riscv32_asm:andi(ResultReg, Reg, -4), + Stream1 = StreamModule:append(State0#state.stream, I), + {State0#state{stream = Stream1, available_regs = AT, used_regs = [ResultReg | UR]}, ResultReg}. or_( #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl index a36420c525..35f83cef69 100644 --- a/tests/libs/jit/jit_riscv32_tests.erl +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -124,7 +124,7 @@ call_primitive_6_args_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + {State2, RegA} = ?BACKEND:and_(State1, {free, RegA}, ?TERM_PRIMARY_CLEAR_MASK), % Get another register for the last parameter to test {free, Reg} handling {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments @@ -1346,7 +1346,7 @@ call_bif_with_large_literal_integer_test() -> get_list_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {State2, Reg} = ?BACKEND:and_(State1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), State5 = ?BACKEND:free_native_registers(State4, [Reg]), @@ -1380,7 +1380,7 @@ is_integer_test() -> ?BACKEND:jump_to_label(BSt0, Label) end ), - MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt2, Reg} = ?BACKEND:and_(MSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), ?BACKEND:if_block( MSt3, @@ -1447,7 +1447,7 @@ is_number_test() -> BSt1 = cond_jump_to_label( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 ), - BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + {BSt2, Reg} = ?BACKEND:and_(BSt1, {free, Reg}, ?TERM_PRIMARY_CLEAR_MASK), BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), cond_jump_to_label( {'and', [ @@ -1941,7 +1941,7 @@ call_fun_test() -> ]) end ), - State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + {State5, RegCopy} = ?BACKEND:and_(State4, {free, RegCopy}, ?TERM_PRIMARY_CLEAR_MASK), State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), State7 = ?BACKEND:if_block( State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> @@ -2945,7 +2945,7 @@ and_register_exhaustion_negative_test() -> {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), % Test negative immediate (-4) which should use NOT+AND with t0 as temp - StateResult = ?BACKEND:and_(StateNoRegs, t6, -4), + {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, -4), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << " 0: 01852f83 lw t6,24(a0)\n" @@ -2970,7 +2970,7 @@ and_register_exhaustion_positive_test() -> {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), % Test positive immediate (0x3F) which should use AND with t0 as temp - StateResult = ?BACKEND:and_(StateNoRegs, t6, 16#3F), + {StateResult, t6} = ?BACKEND:and_(StateNoRegs, {free, t6}, 16#3F), Stream = ?BACKEND:stream(StateResult), ExpectedDump = << " 0: 01852f83 lw t6,24(a0)\n" From 4e7dc5cdccca029773d5d4cd28fc796db1a9d4a1 Mon Sep 17 00:00:00 2001 From: Paul Guyot Date: Sat, 25 Oct 2025 16:48:05 +0200 Subject: [PATCH 28/28] Dump largest refc binaries on oom crashes Signed-off-by: Paul Guyot --- src/libAtomVM/context.c | 2 +- src/libAtomVM/refc_binary.c | 83 +++++++++++++++++++++++++++++++++++++ src/libAtomVM/refc_binary.h | 10 +++++ 3 files changed, 94 insertions(+), 1 deletion(-) diff --git a/src/libAtomVM/context.c b/src/libAtomVM/context.c index f682e8c34c..89f8c94567 100644 --- a/src/libAtomVM/context.c +++ b/src/libAtomVM/context.c @@ -1297,7 +1297,7 @@ COLD_FUNC void context_dump(Context *ctx) fprintf(stderr, "process_count = %zu\n", process_count); fprintf(stderr, "ports_count = %zu\n", ports_count); fprintf(stderr, "atoms_count = %zu\n", atom_table_count(glb->atom_table)); - fprintf(stderr, "refc_binary_total_size = %zu\n", refc_binary_total_size(ctx)); + refc_binary_dump_info(ctx); } fprintf(stderr, "\n\n**End Of Crash Report**\n"); } diff --git a/src/libAtomVM/refc_binary.c b/src/libAtomVM/refc_binary.c index 61e4b05b8f..159d6f2ba9 100644 --- a/src/libAtomVM/refc_binary.c +++ b/src/libAtomVM/refc_binary.c @@ -141,3 +141,86 @@ size_t refc_binary_total_size(Context *ctx) synclist_unlock(&ctx->global->refc_binaries); return size; } + +COLD_FUNC void refc_binary_dump_info(Context *ctx) +{ + struct ListHead *item; + struct ListHead *refc_binaries = synclist_rdlock(&ctx->global->refc_binaries); + + // Note: This only counts non-const refc binaries (ones that allocate memory). + // Const binaries (created by term_from_const_binary) point to existing data + // and are never added to the global refc_binaries list, so they don't appear here. + + // First pass: count and calculate total size + size_t count = 0; + size_t total_size = 0; + LIST_FOR_EACH (item, refc_binaries) { + struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head); + count++; + total_size += refc->size; + } + + fprintf(stderr, "refc_binary_count = %d\n", (int) count); + fprintf(stderr, "refc_binary_total_size = %d\n", (int) total_size); + + if (count == 0) { + synclist_unlock(&ctx->global->refc_binaries); + return; + } + +// Find top 5 largest binaries +#define TOP_N 5 + struct RefcBinary *top[TOP_N] = { NULL }; + size_t top_indices[TOP_N] = { 0 }; + + size_t index = 0; + LIST_FOR_EACH (item, refc_binaries) { + struct RefcBinary *refc = GET_LIST_ENTRY(item, struct RefcBinary, head); + + // Try to insert into top 5 + for (size_t i = 0; i < TOP_N; i++) { + if (top[i] == NULL || refc->size > top[i]->size) { + // Shift down + for (size_t j = TOP_N - 1; j > i; j--) { + top[j] = top[j - 1]; + top_indices[j] = top_indices[j - 1]; + } + top[i] = refc; + top_indices[i] = index; + break; + } + } + index++; + } + + // Display top binaries + fprintf(stderr, "\nTop %d largest refc binaries:\n", TOP_N); + for (size_t i = 0; i < TOP_N && top[i] != NULL; i++) { + struct RefcBinary *refc = top[i]; + fprintf(stderr, " [%zu] size=%d bytes (%.1f%%), refcount=%d", + top_indices[i], + (int) refc->size, + (double) refc->size * 100.0 / (double) total_size, + (int) refc->ref_count); + + if (refc->resource_type) { + fprintf(stderr, " [resource]"); + } + + // Print first 32 bytes as hex + fprintf(stderr, "\n data: "); + size_t print_size = refc->size < 32 ? refc->size : 32; + for (size_t j = 0; j < print_size; j++) { + fprintf(stderr, "%02x", refc->data[j]); + if (j % 4 == 3 && j < print_size - 1) { + fprintf(stderr, " "); + } + } + if (refc->size > 32) { + fprintf(stderr, "..."); + } + fprintf(stderr, "\n"); + } + + synclist_unlock(&ctx->global->refc_binaries); +} diff --git a/src/libAtomVM/refc_binary.h b/src/libAtomVM/refc_binary.h index 3fc1784bd8..7ff38f545e 100644 --- a/src/libAtomVM/refc_binary.h +++ b/src/libAtomVM/refc_binary.h @@ -142,6 +142,16 @@ term refc_binary_create_binary_info(Context *ctx); */ size_t refc_binary_total_size(Context *ctx); +/** + * @brief Dump detailed information about reference counted binaries + * + * @details This function prints diagnostic information including the count, + * total size, and details about the top 5 largest binaries including + * their first bytes. Used for debugging memory issues. + * @param ctx the context + */ +COLD_FUNC void refc_binary_dump_info(Context *ctx); + #ifdef __cplusplus } #endif