diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml index fcc45fd033..5a983dc3e2 100644 --- a/.github/workflows/build-and-test.yaml +++ b/.github/workflows/build-and-test.yaml @@ -321,7 +321,7 @@ jobs: cmake_opts_other: "-DAVM_DISABLE_JIT=OFF" jit_target_arch: "aarch64" - # armhf build + # armhf builds - os: "ubuntu-24.04" cc: "arm-linux-gnueabihf-gcc" cxx: "arm-linux-gnueabihf-g++" @@ -336,6 +336,21 @@ jobs: arch: "armhf" library-arch: arm-linux-gnueabihf + - os: "ubuntu-24.04" + cc: "arm-linux-gnueabihf-gcc" + cxx: "arm-linux-gnueabihf-g++" + # -D_FILE_OFFSET_BITS=64 is required for making atomvm:posix_readdir/1 test work + # otherwise readdir will fail due to 64 bits inode numbers with 32 bit ino_t + cflags: "-mcpu=cortex-a7 -mfloat-abi=hard -O2 -mthumb -mthumb-interwork -D_FILE_OFFSET_BITS=64" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_JIT_TARGET_ARCH=armv6m -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/armhf_toolchain.cmake" + compiler_pkgs: "crossbuild-essential-armhf libc6-dbg:armhf zlib1g-dev:armhf libmbedtls-dev:armhf qemu-user qemu-user-binfmt binfmt-support" + arch: "armhf" + library-arch: arm-linux-gnueabihf + jit_target_arch: "armv6m" + # s390x build - os: "ubuntu-24.04" cc: "s390x-linux-gnu-gcc" @@ -351,6 +366,19 @@ jobs: arch: "s390x" library-arch: s390x-linux-gnu + # riscv32-ilp32 build + - os: "ubuntu-24.04" + cc: "riscv32-unknown-linux-gnu-gcc" + cxx: "riscv32-unknown-linux-gnu-g++" + cflags: "-O2" + otp: "28" + elixir_version: "1.17" + rebar3_version: "3.24.0" + cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake" + compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support" + arch: "riscv32" + library-arch: riscv32-linux-gnu-ilp32 + env: ImageOS: ${{ matrix.container == 'ubuntu:20.04' && 'ubuntu20' || matrix.os == 'ubuntu-20.04' && 'ubuntu20' || matrix.os == 'ubuntu-22.04' && 'ubuntu22' || matrix.os == 'ubuntu-24.04' && 'ubuntu24' || 'ubuntu24' }} CC: ${{ matrix.cc }} @@ -371,7 +399,7 @@ jobs: run: sudo dpkg --add-architecture i386 - name: "Setup cross compilation architecture" - if: matrix.library-arch != '' + if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32' run: | sudo dpkg --add-architecture ${{ matrix.arch }} cat > ${RUNNER_TEMP}/cross-compile-sources.list <> $GITHUB_PATH + + # Install the libs + sudo dpkg -i libc6-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb + sudo dpkg -i libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb + + sudo dpkg -i zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb + sudo dpkg -i zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb + + sudo dpkg -i libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb + sudo dpkg -i libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb + + sudo sed -i '/Types: deb/a Architectures: amd64' /etc/apt/sources.list.d/ubuntu.sources + + cat > ${RUNNER_TEMP}/${{ matrix.arch }}_toolchain.cmake <> $GITHUB_PATH + - name: "Git config safe.directory for codeql" run: git config --global --add safe.directory /__w/AtomVM/AtomVM @@ -74,7 +94,7 @@ jobs: set -euo pipefail mkdir build cd build - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.platform }} ${{ matrix.jit }} ninja - name: "Perform CodeQL Analysis" @@ -97,7 +117,7 @@ jobs: mkdir build.nosmp cd build.nosmp # TODO: fix all warnings and enable -DAVM_WARNINGS_ARE_ERRORS=ON - cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} -DAVM_DISABLE_SMP=1 + cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.jit }} -DAVM_DISABLE_SMP=1 cmake --build . --target=rp2_tests - name: Run tests with rp2040js @@ -112,7 +132,7 @@ jobs: npx tsx run-tests.ts ../build.nosmp/tests/rp2_tests.uf2 ../build.nosmp/tests/test_erl_sources/rp2_test_modules.uf2 - name: Build atomvmlib.uf2 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | set -euo pipefail @@ -122,7 +142,7 @@ jobs: make atomvmlib-${{ matrix.board }}.uf2 - name: Rename AtomVM and write sha256sum - if: startsWith(github.ref, 'refs/tags/') + if: startsWith(github.ref, 'refs/tags/') && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd src/platforms/rp2/build @@ -137,7 +157,7 @@ jobs: popd - name: Rename atomvmlib and write sha256sum - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' shell: bash run: | pushd build/libs @@ -148,7 +168,7 @@ jobs: - name: Release (Pico & Pico2) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true @@ -160,7 +180,7 @@ jobs: - name: Release (PicoW) uses: softprops/action-gh-release@v1 - if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' + if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' && matrix.platform == '' && matrix.jit == '' with: draft: true fail_on_unmatched_files: true diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ba351373d..875a16349b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,7 @@ option(AVM_DISABLE_SMP "Disable SMP." OFF) option(AVM_DISABLE_TASK_DRIVER "Disable task driver support." OFF) option(AVM_DISABLE_JIT "Disable just in time compilation." ON) option(AVM_ENABLE_PRECOMPILED "Enable execution of precompiled code, even if JIT is disabled." OFF) +option(AVM_DISABLE_JIT_DWARF "Disable DWARF debug and profiling info for JIT." ON) option(AVM_USE_32BIT_FLOAT "Use 32 bit floats." OFF) option(AVM_VERBOSE_ABORT "Print module and line number on VM abort" OFF) option(AVM_RELEASE "Build an AtomVM release" OFF) @@ -57,12 +58,14 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH) set(AVM_JIT_TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR}) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64|aarch64$") set(AVM_JIT_TARGET_ARCH "aarch64") + elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") + set(AVM_JIT_TARGET_ARCH "armv6m") else() - message(FATAL "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") + message(FATAL_ERROR "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") endif() endif() -set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") +set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32;riscv32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON") if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR diff --git a/CMakeModules/BuildErlang.cmake b/CMakeModules/BuildErlang.cmake index e19fb6d622..986a2900a1 100644 --- a/CMakeModules/BuildErlang.cmake +++ b/CMakeModules/BuildErlang.cmake @@ -22,7 +22,6 @@ macro(pack_archive avm_name) set(multiValueArgs ERLC_FLAGS MODULES) cmake_parse_arguments(PACK_ARCHIVE "" "" "${multiValueArgs}" ${ARGN}) - list(JOIN PACK_ARCHIVE_ERLC_FLAGS " " PACK_ARCHIVE_ERLC_FLAGS) foreach(module_name IN LISTS ${PACK_ARCHIVE_MODULES} PACK_ARCHIVE_MODULES PACK_ARCHIVE_UNPARSED_ARGUMENTS) add_custom_command( OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam @@ -77,10 +76,13 @@ macro(pack_precompiled_archive avm_name) else() set(jit_deps "jit") endif() - foreach(jit_target_arch ${AVM_PRECOMPILED_TARGETS}) + foreach(jit_target_arch_variant ${AVM_PRECOMPILED_TARGETS}) set(pack_precompile_archive_${avm_name}_beams "") + # Extract base architecture for module dependencies + string(REGEX REPLACE "\\+.*$" "" jit_target_arch "${jit_target_arch_variant}") set(jit_compiler_modules ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}.beam @@ -89,14 +91,14 @@ macro(pack_precompiled_archive avm_name) foreach(module_name IN LISTS ${PACK_ARCHIVE_MODULES} PACK_ARCHIVE_MODULES PACK_ARCHIVE_UNPARSED_ARGUMENTS) add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/${module_name}.beam - COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/ - && erl -pa ${CMAKE_BINARY_DIR}/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${jit_target_arch} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/ ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/${module_name}.beam + COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/ + && erl -pa ${CMAKE_BINARY_DIR}/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${jit_target_arch_variant} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/ ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam ${jit_compiler_modules} ${jit_deps} - COMMENT "Compiling ${module_name}.beam to ${jit_target_arch}" + COMMENT "Compiling ${module_name}.beam to ${jit_target_arch_variant}" VERBATIM ) - set(pack_precompile_archive_${avm_name}_beams ${pack_precompile_archive_${avm_name}_beams} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/${module_name}.beam) + set(pack_precompile_archive_${avm_name}_beams ${pack_precompile_archive_${avm_name}_beams} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/${module_name}.beam) endforeach() if(AVM_RELEASE) @@ -106,20 +108,20 @@ macro(pack_precompiled_archive avm_name) endif() add_custom_command( - OUTPUT ${avm_name}-${jit_target_arch}.avm + OUTPUT ${avm_name}-${jit_target_arch_variant}.avm DEPENDS ${pack_precompile_archive_${avm_name}_beams} PackBEAM - COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch}.avm ${pack_precompile_archive_${avm_name}_beams} - COMMENT "Packing archive ${avm_name}-${jit_target_arch}.avm" + COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch_variant}.avm ${pack_precompile_archive_${avm_name}_beams} + COMMENT "Packing archive ${avm_name}-${jit_target_arch_variant}.avm" VERBATIM ) add_custom_target( - ${avm_name}_${jit_target_arch} ALL - DEPENDS ${avm_name}-${jit_target_arch}.avm + ${avm_name}_${jit_target_arch_variant} ALL + DEPENDS ${avm_name}-${jit_target_arch_variant}.avm ) # Ensure source beams are built before precompilation - add_dependencies(${avm_name}_${jit_target_arch} ${avm_name}_emu) + add_dependencies(${avm_name}_${jit_target_arch_variant} ${avm_name}_emu) # Make main target depend on precompiled targets - add_dependencies(${avm_name} ${avm_name}_${jit_target_arch}) + add_dependencies(${avm_name} ${avm_name}_${jit_target_arch_variant}) endforeach() endif() endmacro() @@ -159,23 +161,23 @@ macro(pack_lib avm_name) set(target_deps ${avm_name}.avm) if(NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) - foreach(jit_target_arch ${AVM_PRECOMPILED_TARGETS}) + foreach(jit_target_arch_variant ${AVM_PRECOMPILED_TARGETS}) # Build JIT archives list for this specific target architecture - set(pack_lib_${avm_name}_jit_archives_${jit_target_arch} ${CMAKE_BINARY_DIR}/libs/jit/src/jit-${jit_target_arch}.avm) + set(pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant} ${CMAKE_BINARY_DIR}/libs/jit/src/jit-${jit_target_arch_variant}.avm) foreach(archive_name ${ARGN}) if(${archive_name} STREQUAL "estdlib") - set(pack_lib_${avm_name}_jit_archives_${jit_target_arch} ${pack_lib_${avm_name}_jit_archives_${jit_target_arch}} ${CMAKE_BINARY_DIR}/libs/${archive_name}/src/${archive_name}-${jit_target_arch}.avm) + set(pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant} ${pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant}} ${CMAKE_BINARY_DIR}/libs/${archive_name}/src/${archive_name}-${jit_target_arch_variant}.avm) endif() endforeach() add_custom_command( - OUTPUT ${avm_name}-${jit_target_arch}.avm + OUTPUT ${avm_name}-${jit_target_arch_variant}.avm DEPENDS ${pack_lib_${avm_name}_archive_targets} PackBEAM - COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch}.avm ${pack_lib_${avm_name}_jit_archives_${jit_target_arch}} ${pack_lib_${avm_name}_archives} - COMMENT "Packing lib ${avm_name}-${jit_target_arch}.avm" + COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch_variant}.avm ${pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant}} ${pack_lib_${avm_name}_archives} + COMMENT "Packing lib ${avm_name}-${jit_target_arch_variant}.avm" VERBATIM ) - set(target_deps ${target_deps} ${avm_name}-${jit_target_arch}.avm) + set(target_deps ${target_deps} ${avm_name}-${jit_target_arch_variant}.avm) endforeach() endif() add_custom_command( @@ -194,6 +196,24 @@ macro(pack_lib avm_name) ) set(target_deps ${target_deps} ${avm_name}-pico.uf2 ${avm_name}-pico2.uf2) + if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m" IN_LIST AVM_PRECOMPILED_TARGETS OR "armv6m+float32" IN_LIST AVM_PRECOMPILED_TARGETS)) + add_custom_command( + OUTPUT ${avm_name}-armv6m-pico.uf2 + DEPENDS ${avm_name}-armv6m.avm UF2Tool + COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m-pico.uf2 -s 0x10100000 ${avm_name}-armv6m.avm + COMMENT "Creating UF2 file ${avm_name}-armv6m.uf2" + VERBATIM + ) + add_custom_command( + OUTPUT ${avm_name}-armv6m-pico2.uf2 + DEPENDS ${avm_name}-armv6m.avm UF2Tool + COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m-pico2.uf2 -f data -s 0x10100000 ${avm_name}-armv6m.avm + COMMENT "Creating UF2 file ${avm_name}-armv6m.uf2" + VERBATIM + ) + set(target_deps ${target_deps} ${avm_name}-armv6m-pico.uf2 ${avm_name}-armv6m-pico2.uf2) + endif() + add_custom_target( ${avm_name} ALL DEPENDS ${target_deps} diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl index 427d5fa529..69aa359327 100644 --- a/libs/estdlib/src/code_server.erl +++ b/libs/estdlib/src/code_server.erl @@ -174,11 +174,12 @@ load(Module) -> BackendModule, BackendState0 ), - Stream1 = BackendModule:stream(BackendState1), + BackendState2 = BackendModule:flush(BackendState1), + Stream1 = BackendModule:stream(BackendState2), code_server:set_native_code(Module, LabelsCount, Stream1), End = erlang:system_time(millisecond), io:format("~B ms (bytecode: ~B bytes, native code: ~B bytes)\n", [ - End - Start, byte_size(Code), BackendModule:offset(BackendState1) + End - Start, byte_size(Code), BackendModule:offset(BackendState2) ]) catch T:V:S -> diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl index 427fa40aec..81ff1c42c2 100644 --- a/libs/jit/include/jit.hrl +++ b/libs/jit/include/jit.hrl @@ -22,7 +22,10 @@ -define(JIT_ARCH_X86_64, 1). -define(JIT_ARCH_AARCH64, 2). +-define(JIT_ARCH_ARMV6M, 3). +-define(JIT_ARCH_RISCV32, 4). -define(JIT_VARIANT_PIC, 1). +-define(JIT_VARIANT_FLOAT32, 2). -define(MAX_REG, 16). diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt index a5810feff9..df155f9d0d 100644 --- a/libs/jit/src/CMakeLists.txt +++ b/libs/jit/src/CMakeLists.txt @@ -24,16 +24,27 @@ include(BuildErlang) set(ERLANG_MODULES jit + jit_dwarf jit_precompile jit_stream_binary jit_stream_mmap jit_aarch64 jit_aarch64_asm + jit_armv6m + jit_armv6m_asm + jit_riscv32 + jit_riscv32_asm jit_x86_64 jit_x86_64_asm ) -pack_precompiled_archive(jit ${ERLANG_MODULES}) +if (NOT AVM_DISABLE_JIT_DWARF) + set(erlc_flags -DJIT_DWARF) +else() + set(erlc_flags) +endif() + +pack_precompiled_archive(jit ERLC_FLAGS ${erlc_flags} MODULES ${ERLANG_MODULES}) include(../../../version.cmake) diff --git a/libs/jit/src/compact_term.hrl b/libs/jit/src/compact_term.hrl new file mode 100644 index 0000000000..3739b4404d --- /dev/null +++ b/libs/jit/src/compact_term.hrl @@ -0,0 +1,52 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + + +-define(COMPACT_LITERAL, 0). +-define(COMPACT_INTEGER, 1). +-define(COMPACT_ATOM, 2). +-define(COMPACT_XREG, 3). +-define(COMPACT_YREG, 4). +-define(COMPACT_LABEL, 5). +-define(COMPACT_EXTENDED, 7). +-define(COMPACT_LARGE_LITERAL, 8). +-define(COMPACT_LARGE_INTEGER, 9). +-define(COMPACT_LARGE_ATOM, 10). +-define(COMPACT_LARGE_XREG, 11). +-define(COMPACT_LARGE_YREG, 12). + +% OTP-20+ format +-define(COMPACT_EXTENDED_LIST, 16#17). +-define(COMPACT_EXTENDED_FP_REGISTER, 16#27). +-define(COMPACT_EXTENDED_ALLOCATION_LIST, 16#37). +-define(COMPACT_EXTENDED_LITERAL, 16#47). +% https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_asm.erl#L433 +-define(COMPACT_EXTENDED_TYPED_REGISTER, 16#57). + +-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_WORDS, 0). +-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FLOATS, 1). +-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FUNS, 2). + +-define(COMPACT_LARGE_IMM_MASK, 16#18). +-define(COMPACT_11BITS_VALUE, 16#8). +-define(COMPACT_NBITS_VALUE, 16#18). + +-define(COMPACT_LARGE_INTEGER_11BITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_11BITS_VALUE)). +-define(COMPACT_LARGE_INTEGER_NBITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_NBITS_VALUE)). diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl index d30f52e7ed..83cb0c0d8d 100644 --- a/libs/jit/src/jit.erl +++ b/libs/jit/src/jit.erl @@ -24,13 +24,15 @@ stream/1, backend/1, beam_chunk_header/3, - compile/6 + compile/6, + decode_value64/1 ]). % NIFs -export([ stream_module/0, - backend_module/0 + backend_module/0, + variant/0 ]). -export_type([ @@ -45,38 +47,7 @@ -include("opcodes.hrl"). -include("primitives.hrl"). -include("term.hrl"). - --define(COMPACT_LITERAL, 0). --define(COMPACT_INTEGER, 1). --define(COMPACT_ATOM, 2). --define(COMPACT_XREG, 3). --define(COMPACT_YREG, 4). --define(COMPACT_LABEL, 5). --define(COMPACT_EXTENDED, 7). --define(COMPACT_LARGE_LITERAL, 8). --define(COMPACT_LARGE_INTEGER, 9). --define(COMPACT_LARGE_ATOM, 10). --define(COMPACT_LARGE_XREG, 11). --define(COMPACT_LARGE_YREG, 12). - -% OTP-20+ format --define(COMPACT_EXTENDED_LIST, 16#17). --define(COMPACT_EXTENDED_FP_REGISTER, 16#27). --define(COMPACT_EXTENDED_ALLOCATION_LIST, 16#37). --define(COMPACT_EXTENDED_LITERAL, 16#47). -% https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_asm.erl#L433 --define(COMPACT_EXTENDED_TYPED_REGISTER, 16#57). - --define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_WORDS, 0). --define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FLOATS, 1). --define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FUNS, 2). - --define(COMPACT_LARGE_IMM_MASK, 16#18). --define(COMPACT_11BITS_VALUE, 16#8). --define(COMPACT_NBITS_VALUE, 16#18). - --define(COMPACT_LARGE_INTEGER_11BITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_11BITS_VALUE)). --define(COMPACT_LARGE_INTEGER_NBITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_NBITS_VALUE)). +-include("compact_term.hrl"). -define(BOXED_FUN_SIZE, 3). -define(FLOAT_SIZE_64, 2). @@ -99,7 +70,8 @@ labels_count :: pos_integer(), atom_resolver :: fun((integer()) -> atom()), literal_resolver :: fun((integer()) -> any()), - type_resolver :: fun((integer()) -> any()) + type_resolver :: fun((integer()) -> any()), + tail_cache :: [{tuple(), non_neg_integer()}] }). -type stream() :: any(). @@ -112,6 +84,20 @@ -define(ASSERT_ALL_NATIVE_FREE(St), ok). -define(ASSERT(Expr), ok). +-ifdef(JIT_DWARF). +-define(DWARF_OPCODE(MMod, MSt, Opcode), MMod:dwarf_opcode(MSt, Opcode)). +-define(DWARF_LABEL(MMod, MSt, Label), MMod:dwarf_label(MSt, Label)). +-define(DWARF_FUNCTION(MMod, MSt, FunctionName, Arity), + MMod:dwarf_function(MSt, (State0#state.atom_resolver)(FunctionName), Arity) +). +-define(DWARF_LINE(MMod, MSt, Line), MMod:dwarf_line(MSt, Line)). +-else. +-define(DWARF_OPCODE(_MMod, MSt, _Opcode), MSt). +-define(DWARF_LABEL(MMod, MSt, _Label), MSt). +-define(DWARF_FUNCTION(_MMod, MSt, _FunctionName, _Arity), MSt). +-define(DWARF_LINE(_MMod, MSt, _Line), MSt). +-endif. + %%----------------------------------------------------------------------------- %% @param LabelsCount number of labels %% @param Arch code for the architecture @@ -141,7 +127,8 @@ compile( labels_count = LabelsCount, atom_resolver = AtomResolver, literal_resolver = LiteralResolver, - type_resolver = TypeResolver + type_resolver = TypeResolver, + tail_cache = [] }, {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0), MSt3 = second_pass(MMod, MSt2, State1), @@ -159,32 +146,46 @@ compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _MMod, _MSt) error(badarg, [CodeChunk]). % 1 -first_pass( - <>, MMod, MSt0, State0 -) -> +first_pass(<>, MMod, MSt, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_literal(Rest0), ?TRACE("OP_LABEL ~p\n", [Label]), + MSt0 = ?DWARF_LABEL(MMod, MSt, Label), MSt1 = MMod:add_label(MSt0, Label), ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest1, MMod, MSt1, State0); % 2 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"func_info/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_ModuleAtom, Rest1} = decode_atom(Rest0), {_FunctionName, Rest2} = decode_atom(Rest1), {_Arity, Rest3} = decode_literal(Rest2), ?TRACE("OP_FUNC_INFO ~p, ~p, ~p\n", [_ModuleAtom, _FunctionName, _Arity]), - % Implement function clause at the previous label. (TODO: optimize it out to save space) - MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [ - ctx, jit_state, offset, ?FUNCTION_CLAUSE_ATOM - ]), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest3, MMod, MSt1, State0); + % Implement function clause at the previous label. + Offset = MMod:offset(MSt0), + {MSt1, OffsetReg} = MMod:move_to_native_register(MSt0, Offset), + TailCacheKey = {call_primitive_last, ?PRIM_RAISE_ERROR, [OffsetReg, ?FUNCTION_CLAUSE_ATOM]}, + State1 = + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + MSt3 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR, [ + ctx, jit_state, {free, OffsetReg}, ?FUNCTION_CLAUSE_ATOM + ]), + State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, CacheOffset} -> + MSt2 = MMod:jump_to_offset(MSt1, CacheOffset), + MSt3 = MMod:free_native_registers(MSt2, [OffsetReg]), + State0 + end, + MSt4 = ?DWARF_FUNCTION(MMod, MSt3, _FunctionName, _Arity), + ?ASSERT_ALL_NATIVE_FREE(MSt4), + first_pass(Rest3, MMod, MSt4, State1); % 3 first_pass( - <>, MMod, MSt0, #state{labels_count = LabelsCount} = State + <>, MMod, MSt, #state{labels_count = LabelsCount} = State ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"int_call_end/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_INT_CALL_END\n", []), MSt1 = MMod:add_label(MSt0, LabelsCount), @@ -193,7 +194,8 @@ first_pass( ]), {State, MSt2}; % 4 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), @@ -202,28 +204,61 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State0); % 5 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_last/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), {NWords, Rest3} = decode_literal(Rest2), ?TRACE("OP_CALL_LAST ~p, ~p, ~p\n", [_Arity, Label, NWords]), - MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), - MSt2 = MMod:increment_sp(MSt1, NWords + 1), - MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + TailCacheKey0 = {op_call_last, NWords, Label}, + case lists:keyfind(TailCacheKey0, 1, TC) of + false -> + Offset0 = MMod:offset(MSt0), + MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}), + MSt2 = MMod:increment_sp(MSt1, NWords + 1), + TailCacheKey1 = {op_call_only, Label}, + case lists:keyfind(TailCacheKey1, 1, TC) of + false -> + Offset1 = MMod:offset(MSt2), + MSt3 = MMod:call_only_or_schedule_next(MSt2, Label), + State1 = State0#state{ + tail_cache = [{TailCacheKey1, Offset1}, {TailCacheKey0, Offset0} | TC] + }; + {TailCacheKey1, Offset1} -> + MSt3 = MMod:jump_to_offset(MSt2, Offset1), + State1 = State0#state{ + tail_cache = [{TailCacheKey0, Offset0} | TC] + } + end; + {TailCacheKey0, Offset0} -> + MSt3 = MMod:jump_to_offset(MSt0, Offset0), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt3), - first_pass(Rest3, MMod, MSt3, State0); + first_pass(Rest3, MMod, MSt3, State1); % 6 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_only/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Arity, Rest1} = decode_literal(Rest0), {Label, Rest2} = decode_label(Rest1), ?TRACE("OP_CALL_ONLY ~p, ~p\n", [_Arity, Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + State1 = State0 + end, ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest2, MMod, MSt1, State0); + first_pass(Rest2, MMod, MSt1, State1); % 7 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {Index, Rest2} = decode_literal(Rest1), @@ -235,7 +270,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 8 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext_last/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {Index, Rest2} = decode_literal(Rest1), @@ -248,7 +284,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest3, MMod, MSt2, State0); % 9 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif0/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Bif, Rest1} = decode_literal(Rest0), {MSt1, FuncPtr} = MMod:call_primitive(MSt0, ?PRIM_GET_IMPORTED_BIF, [ @@ -264,7 +301,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest2, MMod, MSt5, State0); % 10 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif1/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Bif, Rest2} = decode_literal(Rest1), @@ -281,7 +319,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 11 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif2/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Bif, Rest2} = decode_literal(Rest1), @@ -299,7 +338,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 12 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"allocate/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {StackNeed, Rest1} = decode_literal(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -311,7 +351,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 13 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"allocate_heap/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {StackNeed, Rest1} = decode_literal(Rest0), {HeapNeed, Rest2} = decode_allocator_list(MMod, Rest1), @@ -324,7 +365,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest3, MMod, MSt2, State0); % 16 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"test_heap/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {HeapNeed, Rest1} = decode_allocator_list(MMod, Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -336,7 +378,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 18 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"deallocate/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {NWords, Rest1} = decode_literal(Rest0), ?TRACE("OP_DEALLOCATE ~p\n", [NWords]), @@ -347,16 +390,45 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 19 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"return/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_RETURN\n", []), - MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RETURN, [ - ctx, jit_state - ]), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest, MMod, MSt1, State0); + % Optimized return: check if returning within same module + {MSt1, CpReg0} = MMod:move_to_native_register(MSt0, cp), + {MSt2, ModuleIndexReg} = MMod:get_module_index(MSt1), + % Extract module index from cp (upper 8 bits: cp >> 24) + {MSt3, CpReg1} = MMod:shift_right(MSt2, CpReg0, 24), + % Compare extracted module index with current module index + MSt4 = MMod:if_block( + MSt3, + {{free, CpReg1}, '==', {free, ModuleIndexReg}}, + % Same module: fast intra-module return + fun(BSt0) -> + % Mask to get lower 24 bits and shift right by 2 for offset + BSt1 = MMod:and_(BSt0, CpReg0, 16#FFFFFF), + {BSt3, CPReg1} = MMod:shift_right(BSt1, {free, CpReg0}, 2), + % Jump to continuation (this is a tail call) + MMod:jump_to_continuation(BSt3, {free, CPReg1}) + end + ), + MSt5 = MMod:free_native_registers(MSt4, [CpReg0]), + % Different module: use existing slow path + TailCacheKey = {call_primitive_last, ?PRIM_RETURN}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt5), + MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]), + State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}; + {TailCacheKey, Offset} -> + MSt6 = MMod:jump_to_offset(MSt5, Offset), + State1 = State0 + end, + ?ASSERT_ALL_NATIVE_FREE(MSt6), + first_pass(Rest, MMod, MSt6, State1); % 20 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"send/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_SEND\n", []), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_SEND, [ @@ -366,7 +438,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest, MMod, MSt2, State0); % 21 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"remove_message/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_REMOVE_MESSAGE\n", []), {MSt1, Reg1} = MMod:call_primitive(MSt0, ?PRIM_CANCEL_TIMEOUT, [ @@ -384,7 +457,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest, MMod, MSt6, State0); % 22 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"timeout/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_TIMEOUT\n", []), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_TIMEOUT, [ @@ -394,7 +468,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest0, MMod, MSt2, State0); % 23 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"loop_rec/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ @@ -410,7 +485,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest2, MMod, MSt7, State0); % 24 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"loop_rec_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_LOOP_REC_END ~p\n", [Label]), @@ -426,7 +502,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest1, MMod, MSt5, State0); % 25 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"wait/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_WAIT ~p\n", [Label]), @@ -435,7 +512,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 26 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"wait_timeout/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, OffsetRef0} = MMod:set_continuation_to_offset(MSt0), @@ -461,7 +539,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt9), first_pass(Rest2, MMod, MSt9, State0); % 39 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_lt/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -477,7 +556,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 40 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ge/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -493,7 +573,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 41 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_eq/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -512,7 +593,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 42 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ne/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -526,7 +608,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 43 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_eq_exact/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -555,7 +638,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 44 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ne_exact/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -580,7 +664,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest3, MMod, MSt5, State0); % 45 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_integer/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -591,7 +676,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 46 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_float/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -600,7 +686,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 47 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_number/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -633,7 +720,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 48 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_atom/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -645,7 +733,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 49 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_pid/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -656,7 +745,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 50 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_reference/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -678,7 +768,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 51 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_port/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -689,7 +780,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 52 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_nil/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -700,7 +792,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 53 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_binary/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -710,7 +803,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 55 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_list/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -728,7 +822,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 56 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_nonempty_list/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -740,7 +835,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 57 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_tuple/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -749,7 +845,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 58 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"test_arity/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -758,13 +855,13 @@ first_pass(<>, MMod, MSt0, State0) -> {MSt2, Reg} = MMod:move_to_native_register(MSt1, Arg1), MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK), MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg), - MSt5 = MMod:shift_right(MSt4, Reg, 6), - MSt6 = cond_jump_to_label({Reg, '!=', Arity}, Label, MMod, MSt5), - MSt7 = MMod:free_native_registers(MSt6, [Reg]), - ?ASSERT_ALL_NATIVE_FREE(MSt7), - first_pass(Rest3, MMod, MSt7, State0); + {MSt5, ArityReg} = MMod:shift_right(MSt4, {free, Reg}, 6), + MSt6 = cond_jump_to_label({{free, ArityReg}, '!=', Arity}, Label, MMod, MSt5), + ?ASSERT_ALL_NATIVE_FREE(MSt6), + first_pass(Rest3, MMod, MSt6, State0); % 59 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"select_val/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {DefaultLabel, Rest2} = decode_label(Rest1), @@ -794,7 +891,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 60 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"select_tuple_arity/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {DefaultLabel, Rest2} = decode_label(Rest1), @@ -818,16 +916,27 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 61 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, #state{tail_cache = TC} = State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"jump/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), ?TRACE("OP_JUMP ~p\n", [Label]), - MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), - ?ASSERT_ALL_NATIVE_FREE(MSt1), - first_pass(Rest1, MMod, MSt1, State0); + TailCacheKey = {op_call_only, Label}, + case lists:keyfind(TailCacheKey, 1, TC) of + false -> + Offset = MMod:offset(MSt0), + MSt1 = MMod:call_only_or_schedule_next(MSt0, Label), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0#state{tail_cache = [{TailCacheKey, Offset} | TC]}); + {TailCacheKey, Offset} -> + MSt1 = MMod:jump_to_offset(MSt0, Offset), + ?ASSERT_ALL_NATIVE_FREE(MSt1), + first_pass(Rest1, MMod, MSt1, State0) + end; % 62 % Same implementation as OP_TRY, to confirm. -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"catch/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), {Label, Rest2} = decode_label(Rest1), @@ -836,7 +945,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 63 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"catch_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_CATCH_END ~p\n", [Dest]), @@ -847,7 +957,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest1, MMod, MSt5, State0); % 64 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"move/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Source, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -857,7 +968,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 65 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_list/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, List, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, HeadDest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -873,7 +985,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt10), first_pass(Rest3, MMod, MSt10, State0); % 66 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_tuple_element/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Source, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {Element, Rest2} = decode_literal(Rest1), @@ -886,7 +999,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 67 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"set_tuple_element/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, NewElement, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Tuple, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0), @@ -899,7 +1013,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 69 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_list/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Head, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Tail, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0), @@ -914,7 +1029,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest3, MMod, MSt7, State0); % 72 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"badmatch/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_BADMATCH ~p\n", [Arg1]), @@ -924,7 +1040,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 73 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"if_end/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_IF_END\n", []), MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [ @@ -933,7 +1050,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest0, MMod, MSt1, State0); % 74 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"case_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_CASE_END ~p\n", [Arg1]), @@ -943,7 +1061,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 75 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_fun/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {ArgsCount, Rest1} = decode_literal(Rest0), ?TRACE("OP_CALL_FUN ~p\n", [ArgsCount]), @@ -956,7 +1075,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest1, MMod, MSt4, State0); % 77 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_function/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -965,7 +1085,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 78 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext_only/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {Index, Rest2} = decode_literal(Rest1), @@ -975,7 +1096,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 96 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmove/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FPRegIndex, Rest1} = decode_literal(Rest0), {MSt1, Dest, Rest2} = decode_dest(Rest1, MMod, MSt0), @@ -985,7 +1107,8 @@ first_pass(<>, MMod, MSt MSt4 = MMod:free_native_registers(MSt3, [ResultReg, Dest]), ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmove/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {FPReg, Rest2} = decode_fp_register(Rest1), @@ -998,7 +1121,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State0); % 97 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fconv/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Reg} = MMod:move_to_native_register(MSt1, SrcValue), @@ -1019,23 +1143,28 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 98 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fadd/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FADD, Rest0, MMod, MSt0, State0); % 99 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fsub/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FSUB, Rest0, MMod, MSt0, State0); % 100 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmul/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FMUL, Rest0, MMod, MSt0, State0); % 101 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fdiv/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), first_pass_float3(?PRIM_FDIV, Rest0, MMod, MSt0, State0); % 102 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fnegate/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Label, Rest1} = decode_label(Rest0), {{fp_reg, FPRegIndex1}, Rest2} = decode_fp_register(Rest1), @@ -1048,7 +1177,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest3, MMod, MSt2, State0); % 104 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), {Label, Rest2} = decode_label(Rest1), @@ -1057,7 +1187,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 105 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_TRY_END ~p\n", [Dest]), @@ -1066,7 +1197,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest1, MMod, MSt3, State0); % 106 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_case/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_TRY_CASE ~p\n", [Dest]), @@ -1075,7 +1207,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest1, MMod, MSt3, State0); % 107 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_case_end/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_TRY_CASE_END ~p\n", [Arg1]), @@ -1085,7 +1218,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 108 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"raise/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Stacktrace, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, ExcValue, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0), @@ -1096,7 +1230,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 112 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"apply/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {MSt1, Module} = read_any_xreg(Arity, MMod, MSt0), @@ -1111,7 +1246,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest1, MMod, MSt6, State0); % 113 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"apply_last/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Arity, Rest1} = decode_literal(Rest0), {NWords, Rest2} = decode_literal(Rest1), @@ -1129,7 +1265,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 114 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_boolean/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1142,7 +1279,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt4), first_pass(Rest2, MMod, MSt4, State0); % 115 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_function2/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1156,23 +1294,25 @@ first_pass(<>, MMod, MSt0, State0) -> MSt6, {IndexOrModuleReg, '&', ?TERM_IMMED2_TAG_MASK, '!=', ?TERM_IMMED2_ATOM}, fun(BSt0) -> - BSt1 = MMod:shift_right(BSt0, IndexOrModuleReg, 4), + {BSt1, IndexReg} = MMod:shift_right(BSt0, {free, IndexOrModuleReg}, 4), {BSt2, FunArity} = MMod:call_primitive(BSt1, ?PRIM_MODULE_GET_FUN_ARITY, [ - ModuleReg, IndexOrModuleReg + ModuleReg, {free, IndexReg} ]), cond_jump_to_label({'(int)', {free, FunArity}, '!=', Arity}, Label, MMod, BSt2) end, fun(BSt0) -> - {BSt1, FunArity} = MMod:get_array_element(BSt0, FuncPtr, 3), - BSt2 = MMod:shift_right(BSt1, FunArity, 4), - cond_jump_to_label({'(int)', {free, FunArity}, '!=', Arity}, Label, MMod, BSt2) + BSt1 = MMod:free_native_registers(BSt0, [IndexOrModuleReg]), + {BSt2, FunArity} = MMod:get_array_element(BSt1, FuncPtr, 3), + {BSt3, FunArityReg} = MMod:shift_right(BSt2, {free, FunArity}, 4), + cond_jump_to_label({'(int)', {free, FunArityReg}, '!=', Arity}, Label, MMod, BSt3) end ), - MSt8 = MMod:free_native_registers(MSt7, [FuncPtr, IndexOrModuleReg, ModuleReg, Arity]), + MSt8 = MMod:free_native_registers(MSt7, [FuncPtr, ModuleReg, Arity]), ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest3, MMod, MSt8, State0); % 117 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_integer2/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1210,7 +1350,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt18), first_pass(Rest7, MMod, MSt18, State0); % 118 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_float2/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1247,7 +1388,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt17), first_pass(Rest7, MMod, MSt17, State0); % 119 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_binary2/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1257,7 +1399,7 @@ first_pass(<>, MMod, MSt0, State0) -> {FlagsValue, Rest6} = decode_literal(Rest5), {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src), {MSt4, BSBinaryReg0} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1), - {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), + {MSt5, BSOffsetReg0} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2), MSt6 = if Unit =/= 8 -> @@ -1271,22 +1413,22 @@ first_pass(<>, MMod, MSt0, State0) -> true -> MSt5 end, - MSt7 = MMod:if_block(MSt6, {BSOffsetReg, '&', 16#7, '!=', 0}, fun(BlockSt) -> + MSt7 = MMod:if_block(MSt6, {BSOffsetReg0, '&', 16#7, '!=', 0}, fun(BlockSt) -> MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ctx, jit_state, offset, ?BADARG_ATOM]) end), - MSt8 = MMod:shift_right(MSt7, BSOffsetReg, 3), + {MSt8, BSOffsetReg1} = MMod:shift_right(MSt7, {free, BSOffsetReg0}, 3), MSt9 = MMod:and_(MSt8, BSBinaryReg0, ?TERM_PRIMARY_CLEAR_MASK), {MSt10, SizeReg} = MMod:get_array_element(MSt9, {free, BSBinaryReg0}, 1), {MSt13, SizeValue} = if Size =:= ?ALL_ATOM -> - MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg), + MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg1), {MSt11, SizeReg}; is_integer(Size) -> % SizeReg is binary size % SizeVal is a constant MSt11 = MMod:sub(MSt10, SizeReg, Size bsl 4), - MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg}, Fail, MMod, MSt11), + MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg1}, Fail, MMod, MSt11), {MSt12, Size bsl 4}; true -> {MSt11, SizeValReg} = MMod:move_to_native_register(MSt10, Size), @@ -1294,20 +1436,20 @@ first_pass(<>, MMod, MSt0, State0) -> MSt11, {SizeValReg, '==', ?ALL_ATOM}, fun(BSt0) -> - BSt1 = MMod:sub(BSt0, SizeReg, BSOffsetReg), + BSt1 = MMod:sub(BSt0, SizeReg, BSOffsetReg1), MMod:free_native_registers(BSt1, [SizeValReg]) end, fun(BSt0) -> {BSt1, SizeValReg} = term_to_int(SizeValReg, 0, MMod, BSt0), BSt2 = MMod:sub(BSt1, SizeReg, SizeValReg), - BSt3 = cond_jump_to_label({SizeReg, '<', BSOffsetReg}, Fail, MMod, BSt2), + BSt3 = cond_jump_to_label({SizeReg, '<', BSOffsetReg1}, Fail, MMod, BSt2), BSt4 = MMod:move_to_native_register(BSt3, SizeValReg, SizeReg), MMod:free_native_registers(BSt4, [SizeValReg]) end ), {MSt12, SizeReg} end, - {MSt14, NewOffsetReg} = MMod:copy_to_native_register(MSt13, BSOffsetReg), + {MSt14, NewOffsetReg} = MMod:copy_to_native_register(MSt13, BSOffsetReg1), MSt15 = MMod:add(MSt14, NewOffsetReg, SizeValue), MSt16 = MMod:shift_left(MSt15, NewOffsetReg, 3), % Write new offset @@ -1324,7 +1466,7 @@ first_pass(<>, MMod, MSt0, State0) -> BSBinaryReg1, Live, {free, HeapSizeReg}, MMod, MSt23 ), {MSt25, ResultTerm} = MMod:call_primitive(MSt24, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ - ctx, {free, BSBinaryReg2}, {free, BSOffsetReg}, {free, SizeValue} + ctx, {free, BSBinaryReg2}, {free, BSOffsetReg1}, {free, SizeValue} ]), {MSt26, Dest, Rest7} = decode_dest(Rest6, MMod, MSt25), ?TRACE("OP_BS_GET_BINARY2 ~p,~p,~p,~p,~p,~p,~p\n", [ @@ -1335,7 +1477,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt28), first_pass(Rest7, MMod, MSt28, State0); % 120 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_bits2/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1365,7 +1508,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt15), first_pass(Rest5, MMod, MSt15, State0); % 121 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_test_tail2/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1383,7 +1527,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt10), first_pass(Rest3, MMod, MSt10, State0); % 124 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif1/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -1408,7 +1553,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest5, MMod, MSt7, State0); % 125 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif2/6">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -1434,7 +1580,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest6, MMod, MSt8, State0); % 129 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_bitstr/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1460,7 +1607,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt8), first_pass(Rest2, MMod, MSt8, State0); % 132 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_match_string/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0), @@ -1480,7 +1628,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt9), first_pass(Rest4, MMod, MSt9, State0); % 133 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_init_writable/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), ?TRACE("OP_BS_INIT_WRITABLE\n", []), HeapSize = term_binary_heap_size(0, MMod), @@ -1497,7 +1646,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest0, MMod, MSt6, State0); % 136 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"trim/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {NWords, Rest1} = decode_literal(Rest0), {_NRemaining, Rest2} = decode_literal(Rest1), @@ -1506,7 +1656,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State0); % 138 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf8/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1521,7 +1672,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 139 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf8/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1533,7 +1685,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 140 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf16/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1550,7 +1703,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 141 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf16/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1564,7 +1718,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 142 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf32/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1581,7 +1736,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest5, MMod, MSt6, State0); % 143 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf32/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1595,7 +1751,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest4, MMod, MSt3, State0); % 152 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif3/7">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FailLabel, Rest1} = decode_label(Rest0), {Live, Rest2} = decode_literal(Rest1), @@ -1630,12 +1787,14 @@ first_pass( ) -> {Line, Rest1} = decode_literal(Rest0), ?TRACE("OP_LINE ~p\n", [Line]), - Offset = MMod:offset(MSt), - first_pass(Rest1, MMod, MSt, State0#state{ + MSt0 = ?DWARF_LINE(MMod, MSt, Line), + Offset = MMod:offset(MSt0), + first_pass(Rest1, MMod, MSt0, State0#state{ line_offsets = [{Line, Offset} | AccLines] }); % 154 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_map_assoc/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1716,7 +1875,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt18), first_pass(Rest6, MMod, MSt18, State0); % 155 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_map_exact/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {_Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1790,7 +1950,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt14), first_pass(Rest6, MMod, MSt14, State0); % 156 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_map/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1799,7 +1960,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest2, MMod, MSt2, State0); % 157 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"has_map_fields/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1845,7 +2007,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest5, MMod, MSt7, State0); % 158 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_map_elements/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1902,8 +2065,9 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(Rest6, MMod, MSt14, State0); % 159 first_pass( - <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 + <>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0 ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_tagged_tuple/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Label, Rest1} = decode_label(Rest0), {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -1915,13 +2079,13 @@ first_pass( {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2 ), MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt5, TagReg} = MMod:get_array_element(MSt4, Reg, 0), + {MSt5, TagReg0} = MMod:get_array_element(MSt4, Reg, 0), MSt6 = cond_jump_to_label( - {TagReg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5 + {TagReg0, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5 ), - MSt7 = MMod:shift_right(MSt6, TagReg, 6), - MSt8 = cond_jump_to_label({TagReg, '!=', Arity}, Label, MMod, MSt7), - MSt9 = MMod:free_native_registers(MSt8, [TagReg]), + {MSt7, TagReg1} = MMod:shift_right(MSt6, {free, TagReg0}, 6), + MSt8 = cond_jump_to_label({TagReg1, '!=', Arity}, Label, MMod, MSt7), + MSt9 = MMod:free_native_registers(MSt8, [TagReg1]), MSt10 = MMod:move_array_element(MSt9, Reg, 1, Reg), {MSt11, AtomReg} = case maps:find(AtomResolver(AtomIndex), ?DEFAULT_ATOMS) of @@ -1938,7 +2102,8 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt14), first_pass(Rest4, MMod, MSt14, State0); % 160 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"build_stacktrace/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_STACKTRACE_BUILD, [ctx]), MSt2 = MMod:move_to_vm_register(MSt1, ResultReg, {x_reg, 0}), @@ -1946,7 +2111,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest0, MMod, MSt3, State0); % 161 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"raw_raise/0">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, ExClassReg} = MMod:move_to_native_register(MSt0, {x_reg, 0}), MSt2 = MMod:if_block(MSt1, {ExClassReg, '==', ?ERROR_ATOM}, fun(BSt0) -> @@ -1962,7 +2128,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest0, MMod, MSt5, State0); % 162 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_hd/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), @@ -1974,7 +2141,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 163 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_tl/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1), @@ -1986,7 +2154,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 164 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_tuple2/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), {ListSize, Rest2} = decode_extended_list_header(Rest1), @@ -2011,7 +2180,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest3, MMod, MSt7, State0); % 165 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_tail/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2034,7 +2204,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt14), first_pass(Rest3, MMod, MSt14, State0); % 166 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_start_match3/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -2047,7 +2218,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 167 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_position/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Src, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2063,7 +2235,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt9), first_pass(Rest3, MMod, MSt9, State0); % 168 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_set_position/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0), {MSt2, Pos, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt1, State0), @@ -2075,7 +2248,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State0); % 169 -first_pass(<>, MMod, MSt0, State) -> +first_pass(<>, MMod, MSt, State) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"swap/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, ArgA, Rest1} = decode_dest(Rest0, MMod, MSt0), {MSt2, ArgB, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2087,7 +2261,8 @@ first_pass(<>, MMod, MSt0, State) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest2, MMod, MSt6, State); % 170 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_start_match4/4">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_atom_or_label(Rest0, State0), {Live, Rest2} = decode_literal(Rest1), @@ -2108,7 +2283,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt5), first_pass(Rest4, MMod, MSt5, State0); % 171 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"make_fun3/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {FunIndex, Rest1} = decode_literal(Rest0), {MSt1, Dest, Rest2} = decode_dest(Rest1, MMod, MSt0), @@ -2136,7 +2312,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt7), first_pass(Rest4, MMod, MSt7, State0); % 172 -first_pass(<>, MMod, MSt0, State) -> +first_pass(<>, MMod, MSt, State) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"init_yregs/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {ListSize, Rest1} = decode_extended_list_header(Rest0), ?TRACE("OP_INIT_YREGS ~p\n", [ListSize]), @@ -2153,7 +2330,8 @@ first_pass(<>, MMod, MSt0, State) -> ?ASSERT_ALL_NATIVE_FREE(MSt1), first_pass(Rest2, MMod, MSt1, State); % 173 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_bind/2">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0), {MSt2, RegB, Rest2} = decode_dest(Rest1, MMod, MSt1), @@ -2162,7 +2340,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest2, MMod, MSt3, State0); % 174 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_clear/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_RECV_MARKER_CLEAR ~p\n", [RegA]), @@ -2170,7 +2349,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt2), first_pass(Rest1, MMod, MSt2, State0); % 175 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_reserve/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_RECV_MARKER_RESERVE ~p\n", [Dest]), @@ -2180,7 +2360,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt3), first_pass(Rest1, MMod, MSt3, State0); % 176 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_use/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0), ?TRACE("OP_RECV_MARKER_USE ~p\n", [RegA]), @@ -2189,8 +2370,9 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(Rest1, MMod, MSt2, State0); % 177 first_pass( - <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 + <>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0 ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_create_bin/6">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {Alloc, Rest2} = decode_allocator_list(MMod, Rest1), @@ -2262,15 +2444,19 @@ first_pass( {MSt7, (BinaryTotalSize div 8), term_binary_heap_size((BinaryTotalSize div 8), MMod) + Alloc}; true -> - MSt8 = MMod:shift_right(MSt7, BinaryTotalSize, 3), - {MSt9, BinaryTotalSize0} = MMod:copy_to_native_register(MSt8, BinaryTotalSize), - {MSt10, AllocSizeReg} = term_binary_heap_size({free, BinaryTotalSize0}, MMod, MSt9), + {MSt8, BinaryTotalSizeBytes} = MMod:shift_right(MSt7, {free, BinaryTotalSize}, 3), + {MSt9, BinaryTotalSizeBytes0} = MMod:copy_to_native_register( + MSt8, BinaryTotalSizeBytes + ), + {MSt10, AllocSizeReg} = term_binary_heap_size( + {free, BinaryTotalSizeBytes0}, MMod, MSt9 + ), case Alloc of 0 -> - {MSt10, BinaryTotalSize, AllocSizeReg}; + {MSt10, BinaryTotalSizeBytes, AllocSizeReg}; _ -> MSt11 = MMod:add(MSt10, AllocSizeReg, Alloc), - {MSt11, BinaryTotalSize, AllocSizeReg} + {MSt11, BinaryTotalSizeBytes, AllocSizeReg} end end, {MSt13, MemoryEnsureFreeReg} = MMod:call_primitive( @@ -2318,7 +2504,8 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt19), first_pass(Rest7, MMod, MSt19, State1); % 178 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_fun2/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Tag, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), {ArgsCount, Rest2} = decode_literal(Rest1), @@ -2334,7 +2521,8 @@ first_pass(<>, MMod, MSt0, State0) -> ?ASSERT_ALL_NATIVE_FREE(MSt6), first_pass(Rest3, MMod, MSt6, State0); % 180 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"badrecord/1">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0), ?TRACE("OP_BADRECORD ~p\n", [Arg1]), @@ -2345,8 +2533,9 @@ first_pass(<>, MMod, MSt0, State0) -> first_pass(Rest1, MMod, MSt2, State0); % 181 first_pass( - <>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0 + <>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0 ) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"update_record/5">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {HintAtomIndex, Rest1} = decode_atom(Rest0), Hint = AtomResolver(HintAtomIndex), @@ -2422,7 +2611,8 @@ first_pass( ?ASSERT_ALL_NATIVE_FREE(MSt11), first_pass(Rest6, MMod, MSt11, State0); % 182 -first_pass(<>, MMod, MSt0, State0) -> +first_pass(<>, MMod, MSt, State0) -> + MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_match/3">>), ?ASSERT_ALL_NATIVE_FREE(MSt0), {Fail, Rest1} = decode_label(Rest0), {MSt1, MatchState, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0), @@ -2879,33 +3069,32 @@ first_pass_bs_match_binary( ]) end, MatchedBytes = MatchedBits div 8, - {MSt2, BSOffseBytesReg} = MMod:copy_to_native_register(MSt1, BSOffsetReg), - MSt3 = MMod:shift_right(MSt2, BSOffseBytesReg, 3), - {MSt4, RemainingBytesReg} = MMod:get_array_element(MSt3, BSBinaryReg, 1), - MSt5 = MMod:sub(MSt4, RemainingBytesReg, BSOffseBytesReg), - MSt6 = cond_jump_to_label({RemainingBytesReg, '<', MatchedBytes}, Fail, MMod, MSt5), - MSt7 = MMod:free_native_registers(MSt6, [RemainingBytesReg]), - {MSt8, HeapSizeReg} = MMod:call_primitive(MSt7, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ + {MSt2, BSOffseBytesReg} = MMod:shift_right(MSt1, BSOffsetReg, 3), + {MSt3, RemainingBytesReg} = MMod:get_array_element(MSt2, BSBinaryReg, 1), + MSt4 = MMod:sub(MSt3, RemainingBytesReg, BSOffseBytesReg), + MSt5 = cond_jump_to_label({RemainingBytesReg, '<', MatchedBytes}, Fail, MMod, MSt4), + MSt6 = MMod:free_native_registers(MSt5, [RemainingBytesReg]), + {MSt7, HeapSizeReg} = MMod:call_primitive(MSt6, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ BSBinaryReg, MatchedBytes ]), - {MSt9, NewMatchState} = memory_ensure_free_with_extra_root( - MatchState, Live, {free, HeapSizeReg}, MMod, MSt8 + {MSt8, NewMatchState} = memory_ensure_free_with_extra_root( + MatchState, Live, {free, HeapSizeReg}, MMod, MSt7 ), % Restore BSBinaryReg as it may have been gc'd as well - {MSt10, MatchStateReg0} = MMod:copy_to_native_register(MSt9, NewMatchState), - MSt11 = MMod:and_(MSt10, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), - MSt12 = MMod:move_array_element(MSt11, MatchStateReg0, 1, BSBinaryReg), - MSt13 = MMod:free_native_registers(MSt12, [MatchStateReg0]), - {MSt14, ResultTerm} = MMod:call_primitive(MSt13, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ + {MSt9, MatchStateReg0} = MMod:copy_to_native_register(MSt8, NewMatchState), + MSt10 = MMod:and_(MSt9, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + MSt11 = MMod:move_array_element(MSt10, MatchStateReg0, 1, BSBinaryReg), + MSt12 = MMod:free_native_registers(MSt11, [MatchStateReg0]), + {MSt13, ResultTerm} = MMod:call_primitive(MSt12, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ ctx, BSBinaryReg, {free, BSOffseBytesReg}, MatchedBytes ]), - MSt15 = MMod:and_(MSt14, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt16, Dest, Rest5} = decode_dest(Rest4, MMod, MSt15), + MSt14 = MMod:and_(MSt13, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt15, Dest, Rest5} = decode_dest(Rest4, MMod, MSt14), ?TRACE("~p},", [Dest]), - MSt17 = MMod:move_to_vm_register(MSt16, ResultTerm, Dest), - MSt18 = MMod:free_native_registers(MSt17, [ResultTerm]), - MSt19 = MMod:add(MSt18, BSOffsetReg, MatchedBits), - {J0 - 5, Rest5, NewMatchState, BSOffsetReg, MSt19}. + MSt16 = MMod:move_to_vm_register(MSt15, ResultTerm, Dest), + MSt17 = MMod:free_native_registers(MSt16, [ResultTerm]), + MSt18 = MMod:add(MSt17, BSOffsetReg, MatchedBits), + {J0 - 5, Rest5, NewMatchState, BSOffsetReg, MSt18}. first_pass_bs_match_get_tail(MatchState, BSBinaryReg, BSOffsetReg, J0, Rest0, MMod, MSt0) -> {Live, Rest1} = decode_literal(Rest0), @@ -2925,32 +3114,31 @@ do_get_tail( MatchState, Live, BSOffsetReg, BSBinaryReg, MMod, MSt0 ) -> MSt1 = cond_raise_badarg({BSOffsetReg, '&', 2#111, '!=', 0}, MMod, MSt0), - {MSt2, BSOffseBytesReg} = MMod:copy_to_native_register(MSt1, BSOffsetReg), - MSt3 = MMod:shift_right(MSt2, BSOffseBytesReg, 3), - {MSt4, TailBytesReg0} = MMod:get_array_element(MSt3, BSBinaryReg, 1), - MSt5 = MMod:sub(MSt4, TailBytesReg0, BSOffseBytesReg), - {MSt6, HeapSizeReg} = MMod:call_primitive(MSt5, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ + {MSt2, BSOffseBytesReg} = MMod:shift_right(MSt1, BSOffsetReg, 3), + {MSt3, TailBytesReg0} = MMod:get_array_element(MSt2, BSBinaryReg, 1), + MSt4 = MMod:sub(MSt3, TailBytesReg0, BSOffseBytesReg), + {MSt5, HeapSizeReg} = MMod:call_primitive(MSt4, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [ BSBinaryReg, {free, TailBytesReg0} ]), - {MSt7, NewMatchState} = memory_ensure_free_with_extra_root( - MatchState, Live, {free, HeapSizeReg}, MMod, MSt6 + {MSt6, NewMatchState} = memory_ensure_free_with_extra_root( + MatchState, Live, {free, HeapSizeReg}, MMod, MSt5 ), % Restore BSBinaryReg as it may have been gc'd as well - {MSt8, MatchStateReg0} = MMod:copy_to_native_register(MSt7, NewMatchState), - MSt9 = MMod:and_(MSt8, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), - MSt10 = MMod:move_array_element(MSt9, MatchStateReg0, 1, BSBinaryReg), - MSt11 = MMod:free_native_registers(MSt10, [MatchStateReg0]), - MSt12 = MMod:and_(MSt11, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), - {MSt13, TailBytesReg1} = MMod:get_array_element(MSt12, BSBinaryReg, 1), - MSt14 = MMod:sub(MSt13, TailBytesReg0, BSOffseBytesReg), - MSt15 = MMod:add(MSt14, BSBinaryReg, ?TERM_PRIMARY_BOXED), - {MSt16, ResultTerm} = MMod:call_primitive(MSt15, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ + {MSt7, MatchStateReg0} = MMod:copy_to_native_register(MSt6, NewMatchState), + MSt8 = MMod:and_(MSt7, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK), + MSt9 = MMod:move_array_element(MSt8, MatchStateReg0, 1, BSBinaryReg), + MSt10 = MMod:free_native_registers(MSt9, [MatchStateReg0]), + MSt11 = MMod:and_(MSt10, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK), + {MSt12, TailBytesReg1} = MMod:get_array_element(MSt11, BSBinaryReg, 1), + MSt13 = MMod:sub(MSt12, TailBytesReg0, BSOffseBytesReg), + MSt14 = MMod:add(MSt13, BSBinaryReg, ?TERM_PRIMARY_BOXED), + {MSt15, ResultTerm} = MMod:call_primitive(MSt14, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [ ctx, BSBinaryReg, {free, BSOffseBytesReg}, TailBytesReg1 ]), - MSt17 = MMod:shift_left(MSt16, TailBytesReg1, 3), - MSt18 = MMod:add(MSt17, BSOffsetReg, TailBytesReg1), - MSt19 = MMod:free_native_registers(MSt18, [TailBytesReg1]), - {MSt19, ResultTerm, NewMatchState}. + MSt16 = MMod:shift_left(MSt15, TailBytesReg1, 3), + MSt17 = MMod:add(MSt16, BSOffsetReg, TailBytesReg1), + MSt18 = MMod:free_native_registers(MSt17, [TailBytesReg1]), + {MSt18, ResultTerm, NewMatchState}. first_pass_bs_match_equal_colon_equal( Fail, MatchState, BSBinaryReg, BSOffsetReg, J0, Rest0, MMod, MSt0 @@ -2978,9 +3166,8 @@ first_pass_bs_match_equal_colon_equal( {MSt5, IntValue} = MMod:get_array_element(MSt4, {free, Result}, 1), cond_jump_to_label({{free, IntValue}, '!=', PatternValue}, Fail, MMod, MSt5); _ -> - MSt4 = MMod:shift_right(MSt3, Result, 4), - MSt5 = cond_jump_to_label({Result, '!=', PatternValue}, Fail, MMod, MSt4), - MMod:free_native_registers(MSt5, [Result]) + {MSt4, ResultInt} = MMod:shift_right(MSt3, {free, Result}, 4), + cond_jump_to_label({{free, ResultInt}, '!=', PatternValue}, Fail, MMod, MSt4) end, MSt7 = MMod:add(MSt6, BSOffsetReg, Size), {J0 - 3, Rest3, MatchState, BSOffsetReg, MSt7}. @@ -3221,8 +3408,8 @@ term_to_int({literal, Val}, _FailLabel, _MMod, MSt0) when is_integer(Val) -> % Optimized case: when we have type information showing this is an integer, skip the type check term_to_int({typed, Term, {t_integer, _Range}}, _FailLabel, MMod, MSt0) -> {MSt1, Reg} = MMod:move_to_native_register(MSt0, Term), - MSt2 = MMod:shift_right(MSt1, Reg, 4), - {MSt2, Reg}; + {MSt2, IntReg} = MMod:shift_right(MSt1, {free, Reg}, 4), + {MSt2, IntReg}; term_to_int({typed, Term, _NonIntegerType}, FailLabel, MMod, MSt0) -> % Type information shows it's not an integer, fall back to generic path term_to_int(Term, FailLabel, MMod, MSt0); @@ -3231,8 +3418,8 @@ term_to_int(Term, FailLabel, MMod, MSt0) -> MSt2 = cond_raise_badarg_or_jump_to_fail_label( {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, FailLabel, MMod, MSt1 ), - MSt3 = MMod:shift_right(MSt2, Reg, 4), - {MSt3, Reg}. + {MSt3, IntReg} = MMod:shift_right(MSt2, {free, Reg}, 4), + {MSt3, IntReg}. first_pass_float3(Primitive, Rest0, MMod, MSt0, State0) -> {Label, Rest1} = decode_label(Rest0), @@ -3590,8 +3777,8 @@ term_get_tuple_arity(Tuple, MMod, MSt0) -> end, MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg), - MSt4 = MMod:shift_right(MSt3, Reg, 6), - {MSt4, Reg}. + {MSt4, ArityReg} = MMod:shift_right(MSt3, {free, Reg}, 6), + {MSt4, ArityReg}. term_get_map_size(Map, MMod, MSt0) -> {MSt1, MapKeys} = term_get_map_keys(Map, MMod, MSt0), @@ -3638,7 +3825,7 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) -> {Reg, '<', ?REFC_BINARY_MIN_32}, fun(BSt0) -> BSt1 = MMod:add(BSt0, Reg, 3), - BSt2 = MMod:shift_right(BSt1, Reg, 2), + {BSt2, Reg} = MMod:shift_right(BSt1, {free, Reg}, 2), MMod:add(BSt2, Reg, 1 + ?BINARY_HEADER_SIZE) end, fun(BSt0) -> @@ -3654,7 +3841,7 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) -> {Reg, '<', ?REFC_BINARY_MIN_64}, fun(BSt0) -> BSt1 = MMod:add(BSt0, Reg, 7), - BSt2 = MMod:shift_right(BSt1, Reg, 3), + {BSt2, Reg} = MMod:shift_right(BSt1, {free, Reg}, 3), MMod:add(BSt2, Reg, 1 + ?BINARY_HEADER_SIZE) end, fun(BSt0) -> @@ -3702,9 +3889,16 @@ stream(MaxSize) -> backend_module() -> erlang:nif_error(undefined). +%% @doc Get the JIT variant suitable for runtime compilation +%% @return The JIT variant for this platform and float precision +-spec variant() -> non_neg_integer(). +variant() -> + erlang:nif_error(undefined). + %% @doc Instantiate backend for this platform %% @return A tuple with the backend module and the backend state for this platform backend({StreamModule, Stream}) -> BackendModule = ?MODULE:backend_module(), - BackendState = BackendModule:new(?JIT_VARIANT_PIC, StreamModule, Stream), + Variant = ?MODULE:variant(), + BackendState = BackendModule:new(Variant, StreamModule, Stream), {BackendModule, BackendState}. diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl index 3449a0a997..9b35569f5d 100644 --- a/libs/jit/src/jit_aarch64.erl +++ b/libs/jit/src/jit_aarch64.erl @@ -37,6 +37,8 @@ call_primitive_with_cp/3, return_if_not_equal_to_ctx/2, jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -70,10 +72,26 @@ add_label/3 ]). +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2, + dwarf_ctx_register/0 +]). +-endif. + +-compile([warnings_as_errors]). + -include_lib("jit.hrl"). -include("primitives.hrl"). +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + %-define(ASSERT(Expr), true = Expr). -define(ASSERT(_Expr), ok). @@ -133,7 +151,8 @@ branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [aarch64_register()], used_regs :: [aarch64_register()], - labels :: [{integer() | reference(), integer()}] + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() }). -type state() :: #state{}. @@ -155,7 +174,8 @@ | {'(int)', maybe_free_aarch64_register(), '!=', aarch64_register() | integer()} | {'(bool)', maybe_free_aarch64_register(), '==', false} | {'(bool)', maybe_free_aarch64_register(), '!=', false} - | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()}. + | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, aarch64_register()}, '==', {free, aarch64_register()}}. % ctx->e is 0x28 % ctx->x is 0x30 @@ -167,6 +187,13 @@ -define(X_REG(N), {?CTX_REG, 16#30 + (N * ?WORD_SIZE)}). -define(CP, {?CTX_REG, 16#B8}). -define(FP_REGS, {?CTX_REG, 16#C0}). +-define(FP_REG_OFFSET(State, F), + (F * + case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of + 0 -> 8; + _ -> 4 + end) +). -define(BS, {?CTX_REG, 16#C8}). -define(BS_OFFSET, {?CTX_REG, 16#D0}). -define(JITSTATE_MODULE, {?JITSTATE_REG, 0}). @@ -188,6 +215,8 @@ -define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]). -define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]). +-include("jit_backend_dwarf_impl.hrl"). + %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. %% sizeof(uintptr_t) @@ -216,7 +245,7 @@ word_size() -> ?WORD_SIZE. %% @return New backend state %%----------------------------------------------------------------------------- -spec new(any(), module(), stream()) -> state(). -new(_Variant, StreamModule, Stream) -> +new(Variant, StreamModule, Stream) -> #state{ stream_module = StreamModule, stream = Stream, @@ -224,7 +253,8 @@ new(_Variant, StreamModule, Stream) -> offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], - labels = [] + labels = [], + variant = Variant }. %%----------------------------------------------------------------------------- @@ -520,6 +550,47 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + Rel = TargetOffset - Offset, + I1 = jit_aarch64_asm:b(Rel), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Jump to a continuation address stored in a register. +%% This is used for optimized intra-module returns. +%% @end +%% @param State current backend state +%% @param OffsetReg register containing the continuation offset +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + offset = BaseOffset, + available_regs = [TempReg | _] + } = State, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset, + + % Get native code base address into temporary register + I1 = jit_aarch64_asm:adr(TempReg, NetOffset), + % Add target offset to get final absolute address + I2 = jit_aarch64_asm:add(TempReg, TempReg, OffsetReg), + % Indirect branch to the calculated absolute address + I3 = jit_aarch64_asm:br(TempReg), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + %% @private -spec rewrite_branch_instruction( jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer() @@ -783,6 +854,20 @@ if_block_cond( State1 = if_block_free_reg(RegOrTuple, State0), State2 = State1#state{stream = Stream1}, {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, Reg1}, '==', {free, Reg2}} +) -> + % Compare two free registers + I1 = jit_aarch64_asm:cmp(Reg1, Reg2), + I2 = jit_aarch64_asm:bcc(ne, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free both registers + State1 = if_block_free_reg({free, Reg1}, State0), + State2 = if_block_free_reg({free, Reg2}, State1), + State3 = State2#state{stream = Stream1}, + {State3, ne, byte_size(I1)}; if_block_cond( #state{stream_module = StreamModule, stream = Stream0} = State0, {'(bool)', RegOrTuple, '==', false} @@ -924,13 +1009,29 @@ merge_used_regs(State, []) -> %% @param Shift number of bits to shift %% @return new state %%----------------------------------------------------------------------------- --spec shift_right(state(), aarch64_register(), non_neg_integer()) -> state(). -shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when +-spec shift_right(#state{}, maybe_free_aarch64_register(), non_neg_integer()) -> + {#state{}, aarch64_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when ?IS_GPR(Reg) andalso is_integer(Shift) -> I = jit_aarch64_asm:lsr(Reg, Reg, Shift), Stream1 = StreamModule:append(Stream0, I), - State#state{stream = Stream1}. + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_aarch64_asm:lsr(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively @@ -1273,7 +1374,7 @@ move_to_vm_register( ) -> I1 = jit_aarch64_asm:ldr(Reg, {Reg, ?WORD_SIZE}), I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS), - I3 = jit_aarch64_asm:str(Reg, {Temp, F * ?WORD_SIZE}), + I3 = jit_aarch64_asm:str(Reg, {Temp, ?FP_REG_OFFSET(State0, F)}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State1 = free_native_register(State0, Reg), @@ -1550,7 +1651,19 @@ move_to_array_element( %% @param Value value to move (can be an immediate, vm register, pointer, or native register) %% @return Tuple of {Updated backend state, Native register containing the value} %%----------------------------------------------------------------------------- --spec move_to_native_register(state(), value()) -> {state(), aarch64_register()}. +-spec move_to_native_register(state(), value() | cp) -> {state(), aarch64_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + I1 = jit_aarch64_asm:ldr(Reg, ?CP), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register(State, Reg) when is_atom(Reg) -> {State, Reg}; move_to_native_register( @@ -2230,3 +2343,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -spec add_label(state(), integer() | reference(), integer()) -> state(). add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (x0/r0 in aarch64) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_X0_REG_AARCH64. +-endif. diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl new file mode 100644 index 0000000000..7343473b18 --- /dev/null +++ b/libs/jit/src/jit_armv6m.erl @@ -0,0 +1,3178 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_armv6m). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3 +]). + +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2, + dwarf_ctx_register/0 +]). +-endif. + +-compile([warnings_as_errors]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). + +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + +-define(ASSERT(Expr), true = Expr). + +%% ARMv6-M AAPCS32 ABI: r0-r3 are used for argument passing and return value. +%% r0-r1 form a double-word for 64-bit returns, additional args passed on stack. +%% r4-r11 are callee-saved registers (must be preserved across calls), +%% r12 (IP) is intra-procedure-call scratch register, +%% r13 (SP) is stack pointer, +%% r14 (LR) is link register, +%% r15 (PC) is program counter. +%% ARMv6-M has no floating-point unit, so no FP registers available. +%% +%% See: Arm® Architecture Procedure Call Standard (AAPCS32) +%% https://developer.arm.com/documentation/ihi0042/latest/ +%% +%% Registers used by the JIT backend (ARMv6-M Thumb): +%% - Argument/return: r0-r3 +%% - Callee-saved: r4-r11 (must preserve) +%% - Scratch: r12 (IP) - intra-procedure call +%% - Stack pointer: r13 (SP) +%% - Link register: r14 (LR) +%% - Program counter: r15 (PC) +%% - Available for JIT scratch: r12 only (very limited!) +%% +%% Note: ARMv6-M Thumb instructions are mostly 16-bit with limited +%% register access (many instructions only work with r0-r7). +%% +%% For more details, refer to the AAPCS32 Procedure Call Standard. + +-type armv6m_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15. + +-define(IS_GPR(Reg), + (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse + Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7 orelse Reg =:= r8 orelse Reg =:= r9 orelse + Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse + Reg =:= r15) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [armv6m_register()], + used_regs :: [armv6m_register()], + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer(), + literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}] +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, armv6m_register()}. +-type value() :: immediate() | vm_register() | armv6m_register() | {ptr, armv6m_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. + +-type maybe_free_armv6m_register() :: + {free, armv6m_register()} | armv6m_register(). + +-type condition() :: + {armv6m_register(), '<', integer()} + | {maybe_free_armv6m_register(), '<', armv6m_register()} + | {maybe_free_armv6m_register(), '==', integer()} + | {maybe_free_armv6m_register(), '!=', armv6m_register() | integer()} + | {'(int)', maybe_free_armv6m_register(), '==', integer()} + | {'(int)', maybe_free_armv6m_register(), '!=', armv6m_register() | integer()} + | {'(bool)', maybe_free_armv6m_register(), '==', false} + | {'(bool)', maybe_free_armv6m_register(), '!=', false} + | {maybe_free_armv6m_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, armv6m_register()}, '==', {free, armv6m_register()}}. + +% ctx->e is 0x28 +% ctx->x is 0x30 +-define(CTX_REG, r0). +-define(NATIVE_INTERFACE_REG, r2). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). +% JITSTATE is on stack, accessed via stack offset +% These macros now expect a register that contains the jit_state pointer +-define(JITSTATE_MODULE(Reg), {Reg, 0}). +-define(JITSTATE_CONTINUATION(Reg), {Reg, 16#4}). +-define(JITSTATE_REDUCTIONCOUNT(Reg), {Reg, 16#8}). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +-define(JUMP_TABLE_ENTRY_SIZE, 12). + +% aarch64 ABI specific +%% ARMv6-M register mappings + +%% IP can be used as an additional scratch register +-define(IP_REG, r12). + +%% Stack offset for function prolog: push {r1,r4,r5,r6,r7,lr} +%% r1 (JITSTATE_REG) is at SP+0 after push +-define(STACK_OFFSET_JITSTATE, 0). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). + +%% ARMv6-M register allocation: +%% - r0: context pointer (reserved) +%% - r1, r3: available (r1 saved/restored, r3 can be parameter) +%% - r2: parameter register (not available for scratch) +%% - r4-r7: callee-saved (saved/restored on entry/exit) +%% - r8-r11: high registers, limited Thumb access +%% - r12: intra-procedure call scratch +%% - r13 (SP), r14 (LR), r15 (PC): special purpose +%% Reorder to match AArch64 test expectations (r7 first) +-define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1]). +-define(PARAMETER_REGS, [r0, r1, r2, r3]). +-define(SCRATCH_REGS, [r7, r6, r5, r4, r3, r2, r1, r0, r12]). + +-include("jit_backend_dwarf_impl.hrl"). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + used_regs = [], + labels = [], + variant = Variant, + literal_pool = [] + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:bkpt(0)), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [armv6m_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [armv6m_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% +%% On this platform, each jump table entry is 12 bytes. +%% ``` +%% ldr r3, pc+4 +%% push {r1, r4, r5, r6, r7, lr} +%% add pc, pc, r3 +%% nop() +%% offset_to_label0 +%% ``` +%% +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + % Create jump table entry with calculated offsets - all at emit time + I1 = jit_armv6m_asm:ldr(r3, {pc, 4}), + I2 = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + I3 = jit_armv6m_asm:add(pc, r3), + I4 = jit_armv6m_asm:nop(), + + JumpEntry = <>, + Stream1 = StreamModule:append(Stream0, JumpEntry), + + % Add relocation for the data entry so update_branches/2 can patch the jump target + DataOffset = StreamModule:offset(Stream1) - 4, + % Calculate the offset of the add instruction (3rd instruction, at offset 4 from entry start) + EntryStartOffset = StreamModule:offset(Stream1) - 12, + AddInstrOffset = EntryStartOffset + 4, + DataReloc = {N, DataOffset, {jump_table_data, AddInstrOffset}}, + UpdatedState = State#state{stream = Stream1, branches = [DataReloc | Branches]}, + + jump_table0(UpdatedState, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {adr, Reg} when Rel rem 4 =:= 0 -> jit_armv6m_asm:adr(Reg, Rel); + {adr, Reg} when Rel rem 4 =:= 2 -> jit_armv6m_asm:adr(Reg, Rel + 2); + {far_branch, Size, TempReg} -> + % Check if branch can now be optimized to near branch + if + Rel >= -2044 andalso Rel =< 2050 andalso (Rel rem 2) =:= 0 -> + % Optimize to near branch: b + nops to fill original size + DirectBranch = jit_armv6m_asm:b(Rel), + % Fill remaining bytes with NOPs + NopCount = (Size - 2) div 2, + Nops = <<<<(jit_armv6m_asm:nop())/binary>> || _ <- lists:seq(1, NopCount)>>, + <>; + true -> + % Keep far branch sequence, calculate correct ldr immediate and update literal + + % Set thumb bit for bx instruction - target address must be odd for Thumb mode + % So we substract 1 less + % ldr requires align PC + % add rx, pc doesn't and reads pc+4 whatever the alignment + + case {TempReg, Size} of + {?IP_REG, 18} -> + % 18-byte sequence with alignment + % Unaligned + I1 = jit_armv6m_asm:push([r0]), + % Aligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + I7 = jit_armv6m_asm:nop(), + RelativeOffset = LabelOffset - Offset - 11, + I8 = <>, + <>; + {?IP_REG, 16} -> + % 16-byte sequence without alignment + % Aligned + I1 = jit_armv6m_asm:push([r0]), + % Unaligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + RelativeOffset = LabelOffset - Offset - 11, + I7 = <>, + <>; + {_, 12} -> + % 12-byte sequence with alignment + % Aligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + I4 = jit_armv6m_asm:nop(), + RelativeOffset = LabelOffset - Offset - 5, + I5 = <>, + <>; + {_, 10} -> + % 10-byte sequence without alignment + % Unaligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + RelativeOffset = LabelOffset - Offset - 5, + I4 = <>, + <> + end + end; + {jump_table_data, AddInstrOffset} -> + % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label + % PC when add instruction executes + AddPC = AddInstrOffset + 4, + RelativeOffset = LabelOffset - AddPC, + <> + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), armv6m_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, 0}); + N when N * 4 =< 124 -> + jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, N * 4}); + N when N * 4 < 256 -> + % Can encode N * 4 directly in movs instruction (8-bit immediate limit) + I1 = jit_armv6m_asm:movs(TargetReg, N * 4), + I2 = jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, TargetReg}), + <>; + N -> + % For very large primitive numbers, load N and shift left by 2 (multiply by 4) + I1 = jit_armv6m_asm:movs(TargetReg, N), + I2 = jit_armv6m_asm:lsls(TargetReg, TargetReg, 2), + I3 = jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, TargetReg}), + <> + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), armv6m_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | RestRegs], + used_regs = UsedRegs + } = State, + Primitive, + Args +) -> + % Use a low register for LDR since ARM Thumb LDR only works with low registers + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + available_regs = RestRegs, + used_regs = [TempReg | UsedRegs] + }, + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = []} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = load_primitive_ptr(Primitive, Temp), + Stream1 = StreamModule:append(Stream0, PrepCall), + + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + + % Preprocess offset special arg + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + % Handle arguments differently for 5+ arguments - use direct call without register preservation + State4 = + case Args1 of + [Arg1, Arg2, Arg3, Arg4, Arg5 | Arg6L] -> + State2 = + case Arg6L of + [Arg6] -> + set_stack_args(State1, Arg5, Arg6); + [] -> + set_stack_args(State1, Arg5, undefined) + end, + State3 = set_registers_args(State2, [Arg1, Arg2, Arg3, Arg4], 8), + #state{stream = Stream2} = State3, + % Call the function pointer directly + Call = jit_armv6m_asm:blx(Temp), + Stream3 = StreamModule:append(Stream2, Call), + % Deallocate stack space that was allocated for 5+ arguments + DeallocateArgs = jit_armv6m_asm:add(sp, sp, 8), + Stream4 = StreamModule:append(Stream3, DeallocateArgs), + % Return: pop prolog registers and return + PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + Stream5 = StreamModule:append(Stream4, PopCode), + State3#state{stream = Stream5}; + [FirstArg, jit_state | ArgsT] -> + % For 4 or fewer args, use tail call + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + State2 = set_registers_args(State1, ArgsForTailCall, 0), + tail_call_with_jit_state_registers_only(State2, Temp) + end, + State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State5). + +%%----------------------------------------------------------------------------- +%% @doc Tail call to address in register, restoring prolog registers including +%% jit_state in r1. Only use when target function expects jit_state as second parameter. +%% Function prolog saves: push {r1,r4,r5,r6,r7,lr} +%% @end +%% @param State current backend state +%% @param Reg register containing the target address +%% @return Updated backend state +%%----------------------------------------------------------------------------- +tail_call_with_jit_state_registers_only( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Reg +) -> + % Standard tail call for 4 or fewer arguments + % First restore LR from stack (so target function can return properly) + % Choose temp register to avoid conflict with Reg + TempReg = + case Reg of + r7 -> r6; + _ -> r7 + end, + % Load saved LR to temp + RestoreLRToTemp = jit_armv6m_asm:ldr(TempReg, {sp, 20}), + % Store function pointer (pipeline friendly) + OverwriteLR = jit_armv6m_asm:str(Reg, {sp, 20}), + % Move saved LR to LR register + RestoreLR = jit_armv6m_asm:mov(lr, TempReg), + % Pop prolog registers: {r1,r4,r5,r6,r7,lr} where lr is now target address + % This restores jit_state in r1 and branches to target via pc + PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + I1 = jit_armv6m_asm:cmp(Reg, ?CTX_REG), + I3 = + case Reg of + % Return value is already in r0 + r0 -> <<>>; + % Move to r0 (return register) + _ -> jit_armv6m_asm:mov(r0, Reg) + end, + I4 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + I2 = jit_armv6m_asm:bcc(eq, 2 + byte_size(I3) + byte_size(I4)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label +) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), + Offset = StreamModule:offset(Stream0), + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State2 = State1#state{stream = Stream1}, + flush_literal_pool(State2). + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State2 = State#state{stream = Stream1}, + flush_literal_pool(State2). + +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% The continuation points to a function prologue, so we need to compute +%% the target address using PIC and use function epilogue to jump. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + % ARM v6-M PIC implementation using one temp register: + % 1. Use ADR to get PC into temp register + % 2. Add PC to OffsetReg to get intermediate value + % 3. Load base offset immediate into temp + % 4. Add base offset to get final target address + % 5. Use function epilogue pattern to jump + + AdrOffset = StreamModule:offset(Stream0), + % ADR Temp, +4 stores PC+4 in Temp + I1 = jit_armv6m_asm:adr(Temp, 4), + + % Add PC to OffsetReg: OffsetReg = OffsetReg + PC + I2 = jit_armv6m_asm:adds(OffsetReg, OffsetReg, Temp), + + Stream1 = StreamModule:append(Stream0, <>), + + % PC is aligned down to 4-byte boundary + AdrPC = (AdrOffset + 4) band (bnot 3), + + % Calculate what we need to add: BaseOffset - AdrPC + 1 for thumb bit + ImmediateValue = BaseOffset - AdrPC + 1, + + % Generate mov_immediate to load the calculated base offset into Temp + State1 = mov_immediate(State0#state{stream = Stream1}, Temp, ImmediateValue), + + % Add base offset to get final target address: OffsetReg = OffsetReg + BaseOffset + I3 = jit_armv6m_asm:adds(OffsetReg, OffsetReg, Temp), + + % Function epilogue pattern: + % Load saved LR to temp register (LR is at sp+20) + I4 = jit_armv6m_asm:ldr(Temp, {sp, 20}), + % Store target address to LR position on stack + I5 = jit_armv6m_asm:str(OffsetReg, {sp, 20}), + % Move saved LR to LR register + I6 = jit_armv6m_asm:mov(lr, Temp), + % Pop prolog registers: {r1,r4,r5,r6,r7,lr} where lr is now target address + % This restores jit_state in r1 and branches to target via pc + I7 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + + Code = <>, + Stream2 = StreamModule:append(State1#state.stream, Code), + % Free all registers as this is a terminal instruction + State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []}, + flush_literal_pool(State2). + +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 +-> + % Near branch: use direct B instruction + Rel = TargetOffset - Offset, + jit_armv6m_asm:b(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, Offset, TargetOffset +) -> + % Far branch: use register-based sequence, need temporary register + if + Offset rem 4 =:= 0 -> + % Aligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + % Unaligned : need nop + I4 = jit_armv6m_asm:nop(), + LiteralValue = TargetOffset - Offset - 5, + I5 = <>, + <>; + true -> + % Unaligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + LiteralValue = TargetOffset - Offset - 5, + I4 = <>, + <> + end. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false +) -> + {CodeBlock, SequenceSize} = + if + Offset rem 4 =:= 0 -> + % Aligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + % Unaligned : need nop + I4 = jit_armv6m_asm:nop(), + % Placeholder offset + I5 = <<0:32/little>>, + Seq = <>, + {Seq, byte_size(Seq)}; + true -> + % Unaligned + I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}), + I2 = jit_armv6m_asm:add(TempReg, pc), + I3 = jit_armv6m_asm:bx(TempReg), + % Placeholder offset + I4 = <<0:32/little>>, + Seq = <>, + {Seq, byte_size(Seq)} + end, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code( + #state{available_regs = [], branches = Branches} = State0, Offset, Label, false +) -> + {CodeBlock, SequenceSize} = + if + Offset rem 4 =/= 0 -> + % Unaligned + I1 = jit_armv6m_asm:push([r0]), + % Aligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + % Unaligned : need nop + I7 = jit_armv6m_asm:nop(), + % Placeholder offset + I8 = <<0:32/little>>, + Seq = + <>, + {Seq, byte_size(Seq)}; + true -> + % Aligned + I1 = jit_armv6m_asm:push([r0]), + % Unaligned + I2 = jit_armv6m_asm:ldr(r0, {pc, 8}), + I3 = jit_armv6m_asm:mov(?IP_REG, r0), + I4 = jit_armv6m_asm:pop([r0]), + I5 = jit_armv6m_asm:add(?IP_REG, pc), + I6 = jit_armv6m_asm:bx(?IP_REG), + % Placeholder offset + I7 = <<0:32/little>>, + Seq = + <>, + {Seq, byte_size(Seq)} + end, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, SequenceSize, ?IP_REG}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> + error({no_available_registers, _LabelLookup}). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, CC, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, CC} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, CC}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = jit_armv6m_asm:bcc(CC, BranchOffset), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + NewBranchInstr = jit_armv6m_asm:bcc(CC, BranchOffset), + Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond), + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + ElseJumpInstr = jit_armv6m_asm:b(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset), + NewBranchInstr = jit_armv6m_asm:bcc(CC, ElseBranchOffset), + Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_armv6m_asm:b(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> + { + state(), + jit_armv6m_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, + non_neg_integer() + }. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + %% Compare register with 0 + I1 = jit_armv6m_asm:cmp(Reg, 0), + %% Branch if positive (N flag clear) + I2 = jit_armv6m_asm:bcc(pl, 0), + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, + {State1, pl, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + I1 = jit_armv6m_asm:cmp(Reg, Val), + % ge = greater than or equal + I2 = jit_armv6m_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = State0#state{stream = Stream1}, + {State1, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + State1 = mov_immediate(State0, Temp, Val), + Stream0 = State1#state.stream, + I1 = jit_armv6m_asm:cmp(Reg, Temp), + % ge = greater than or equal + I2 = jit_armv6m_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp(Reg, RegB), + % ge = greater than or equal + I2 = jit_armv6m_asm:bcc(ge, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ge, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% Compare register with 0 + I1 = jit_armv6m_asm:cmp(Reg, 0), + %% Branch if not equal + I2 = jit_armv6m_asm:bcc(ne, 0), + Stream1 = StreamModule:append(Stream0, <>), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ne, byte_size(I1)}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when (is_integer(Val) andalso Val >= 0 andalso Val =< 255) orelse ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp(Reg, Val), + I2 = jit_armv6m_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, eq, byte_size(I1)}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + I1 = jit_armv6m_asm:cmp(Reg, Val), + I2 = jit_armv6m_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + % Compare two free registers: cmp RegA, RegB; beq + I1 = jit_armv6m_asm:cmp(RegA, RegB), + Stream1 = StreamModule:append(Stream0, I1), + I2 = jit_armv6m_asm:bcc(ne, 0), + Stream2 = StreamModule:append(Stream1, I2), + State1 = State0#state{stream = Stream2}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, ne, byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Offset0 = StreamModule:offset(Stream0), + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + Offset1 = StreamModule:offset(Stream1), + I1 = jit_armv6m_asm:cmp(Reg, Temp), + I2 = jit_armv6m_asm:bcc(ne, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream2 = StreamModule:append(Stream1, Code), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, ne, Offset1 - Offset0 + byte_size(I1)}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Offset0 = StreamModule:offset(Stream0), + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + Offset1 = StreamModule:offset(Stream1), + I1 = jit_armv6m_asm:cmp(Reg, Temp), + I2 = jit_armv6m_asm:bcc(eq, 0), + Code = << + I1/binary, + I2/binary + >>, + Stream2 = StreamModule:append(Stream1, Code), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, eq, Offset1 - Offset0 + byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test bit 0: shift bit 0 to MSB and branch if positive (bit was 0/false) + I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), + % branch if negative (bit was 1/true) + I2 = jit_armv6m_asm:bcc(mi, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, mi, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test bit 0: shift bit 0 to MSB and branch if negative (bit was 1/true) + I1 = jit_armv6m_asm:lsls(Temp, Reg, 31), + % branch if positive (bit was 0/false) + I2 = jit_armv6m_asm:bcc(pl, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, pl, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % Test bits - optimize for low bits masks that can use lsls + {TestCode, BranchCond} = + case bit_test_optimization(Val) of + {low_bits_mask, BitCount} -> + % Low bits mask: use lsls to shift high bits away + ShiftAmount = 32 - BitCount, + TestCode0 = jit_armv6m_asm:lsls(Temp, Reg, ShiftAmount), + % branch if zero (no low bit was set) + {TestCode0, eq}; + no_optimization -> + % General case: use mov+tst + TestCode0 = jit_armv6m_asm:movs(Temp, Val), + TestCode1 = jit_armv6m_asm:tst(Reg, Temp), + {<>, eq} + end, + I2 = jit_armv6m_asm:bcc(BranchCond, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, BranchCond, byte_size(TestCode)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_armv6m_asm:mvns(Temp, Reg), + % 32 - 4 + I2 = jit_armv6m_asm:lsls(Temp, Temp, 28), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, + {State1, eq, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_armv6m_asm:mvns(Reg, Reg), + % 32 - 4 + I2 = jit_armv6m_asm:lsls(Reg, Reg, 28), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream1 = StreamModule:append(Stream0, <>), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, eq, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % AND with mask + OffsetBefore = StreamModule:offset(Stream0), + I1 = jit_armv6m_asm:mov(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + State2 = and_(State1#state{available_regs = AT}, Temp, Mask), + Stream2 = State2#state.stream, + % Compare with value + I2 = jit_armv6m_asm:cmp(Temp, Val), + Stream3 = StreamModule:append(Stream2, I2), + OffsetAfter = StreamModule:offset(Stream3), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream4 = StreamModule:append(Stream3, I3), + State3 = State2#state{stream = Stream4, available_regs = [Temp | State2#state.available_regs]}, + {State3, eq, OffsetAfter - OffsetBefore}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + % AND with mask + OffsetBefore = StreamModule:offset(Stream0), + State1 = and_(State0, Reg, Mask), + Stream1 = State1#state.stream, + % Compare with value + I2 = jit_armv6m_asm:cmp(Reg, Val), + Stream2 = StreamModule:append(Stream1, I2), + OffsetAfter = StreamModule:offset(Stream2), + I3 = jit_armv6m_asm:bcc(eq, 0), + Stream3 = StreamModule:append(Stream2, I3), + State3 = State1#state{stream = Stream3}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, eq, OffsetAfter - OffsetBefore}. + +-spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +%% Helper function to determine if a bit test can be optimized using lsls +-spec bit_test_optimization(non_neg_integer()) -> + {low_bits_mask, non_neg_integer()} | no_optimization. +% ?TERM_PRIMARY_MASK +bit_test_optimization(16#3) -> {low_bits_mask, 2}; +% +bit_test_optimization(16#7) -> {low_bits_mask, 3}; +% ?TERM_IMMED_TAG_MASK +bit_test_optimization(16#F) -> {low_bits_mask, 4}; +% ?TERM_BOXED_TAG_MASK or ?TERM_IMMED2_TAG_MASK +bit_test_optimization(16#3F) -> {low_bits_mask, 6}; +bit_test_optimization(_) -> no_optimization. + +-spec merge_used_regs(state(), [armv6m_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(#state{}, maybe_free_armv6m_register(), non_neg_integer()) -> + {#state{}, armv6m_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_armv6m_asm:lsrs(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_armv6m_asm:lsrs(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_armv6m_asm:lsls(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, armv6m_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), armv6m_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + SavedRegsBase = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + + % Calculate available registers for potential padding + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + % Add padding register if odd number to maintain 8-byte stack alignment per ARM AAPCS + SavedRegs = + case (length(SavedRegsBase) rem 2) =:= 1 of + true when AvailableRegs1 /= [] -> + [PaddingReg | _] = AvailableRegs1, + SavedRegsBase ++ [PaddingReg]; + _ -> + PaddingReg = undefined, + SavedRegsBase + end, + + Stream1 = push_registers(SavedRegs, StreamModule, Stream0), + + % Set up arguments following ARM AAPCS calling convention + % First four args are passed in r0-r4, but 5th and 6th are passed + % on the stack. + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + {RegArgs0, StackArgs} = + case Args1 of + [Arg1, Arg2, Arg3, Arg4 | StackArgs0] -> {[Arg1, Arg2, Arg3, Arg4], StackArgs0}; + _ -> {Args, []} + end, + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + StackArgsRegs = lists:flatmap(fun arg_to_reg_list/1, StackArgs), + + % We pushed registers to stack, so we can use these registers we saved + % and the currently available registers to push values to the stack. + SetArgsPushStackAvailableArgs = (UsedRegs1 -- (RegArgsRegs ++ StackArgsRegs)) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = SetArgsPushStackAvailableArgs, + used_regs = ?AVAILABLE_REGS -- SetArgsPushStackAvailableArgs, + stream = Stream1 + }, + State2 = + case StackArgs of + [] -> State1; + [Arg5] -> set_stack_args(State1, Arg5, undefined); + [Arg5, Args6] -> set_stack_args(State1, Arg5, Args6) + end, + + SetArgsRegsOnlyAvailableArgs = State2#state.available_regs, + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must swap it with a free reg. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> + % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + % that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_armv6m_asm:mov(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State2#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State2#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; + false -> + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State2#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + end; + {primitive, Primitive} -> + [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(State2#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + end, + + State3 = State2#state{ + available_regs = SetArgsAvailableRegs, + used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + stream = Stream3 + }, + + StackOffset = + case StackArgs of + [] -> length(SavedRegs) * 4; + _ -> length(SavedRegs) * 4 + 8 + end, + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), + Stream4 = State4#state.stream, + + % Call the function pointer (using BLX for call with return) + Call = jit_armv6m_asm:blx(FuncPtrReg), + Stream5 = StreamModule:append(Stream4, Call), + + % For result, we need a free register (including FuncPtrReg) but ideally + % not the one used for padding. If none are available (all 8 registers + % were pushed to the stack), we write the result to the stack position + % of FuncPtrReg + {Stream6, UsedRegs2} = + case length(SavedRegs) of + 8 when element(1, FuncPtrTuple) =:= free -> + % We use original FuncPtrReg then as we know it's available. + % Calculate stack offset: register number * 4 bytes + ResultReg = element(2, FuncPtrTuple), + StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, + StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + 8 when PaddingReg =/= undefined -> + % We use PaddingReg then as we know it's available. + % Calculate stack offset: register number * 4 bytes + ResultReg = PaddingReg, + StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4, + StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}), + {StreamModule:append(Stream5, StoreResult), [PaddingReg | UsedRegs1]}; + _ -> + % Use any free that is not in SavedRegs + [ResultReg | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_armv6m_asm:mov(ResultReg, r0), + {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + end, + + % Deallocate stack space if we allocated it for 5+ arguments + Stream7 = + case length(Args) >= 5 of + true -> + DeallocateArgs = jit_armv6m_asm:add(sp, 8), + StreamModule:append(Stream6, DeallocateArgs); + false -> + Stream6 + end, + + Stream8 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream7), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + { + State4#state{ + stream = Stream8, + available_regs = AvailableRegs3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + +push_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> + StreamModule:append(Stream0, jit_armv6m_asm:push(SavedRegs)); +push_registers([], _StreamModule, Stream0) -> + Stream0. + +pop_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:pop(SavedRegs)), + Stream1; +pop_registers([], _StreamModule, Stream0) -> + Stream0. + +%% @doc Handle 5th and optionally 6th arguments on stack. +%% For 5 args: push 5th arg at sp+0 with 4-byte padding at sp+4 for 8-byte alignment +%% For 6 args: push 5th arg at sp+0, 6th arg at sp+4 (2×4 bytes = 8-byte aligned, no padding) +set_stack_args( + #state{stream_module = StreamModule, stream = Stream0} = State0, Arg5, Arg6 +) -> + % Decrement stack pointer by 8 bytes once + I1 = jit_armv6m_asm:sub(sp, sp, 8), + Stream1 = StreamModule:append(Stream0, I1), + + % Handle Arg6 if present (goes at sp+4) + State1 = + case Arg6 of + undefined -> + % 5 arguments: no 6th arg to handle + State0#state{stream = Stream1}; + {free, Reg6} when is_atom(Reg6) -> + % 6 arguments: Arg6 is already in native register, store directly and free + I2 = jit_armv6m_asm:str(Reg6, {sp, 4}), + StreamB = StreamModule:append(Stream1, I2), + free_native_register(State0#state{stream = StreamB}, Reg6); + _ -> + % 6 arguments: store Arg6 at sp+4 + % Handle {free, NonNativeReg} by unwrapping + ActualArg6 = + case Arg6 of + {free, InnerArg6} -> InnerArg6; + Other6 -> Other6 + end, + {StateA, Reg6} = move_to_native_register( + State0#state{stream = Stream1}, ActualArg6 + ), + StreamA = StateA#state.stream, + I2 = jit_armv6m_asm:str(Reg6, {sp, 4}), + StreamB = StreamModule:append(StreamA, I2), + free_native_register(StateA#state{stream = StreamB}, Reg6) + end, + + % Handle Arg5 (always present, always goes at sp+0) + State2 = + case Arg5 of + {free, Reg5} when is_atom(Reg5) -> + % Arg5 is already in native register, store directly and free + I3 = jit_armv6m_asm:str(Reg5, {sp, 0}), + Stream3 = StreamModule:append(State1#state.stream, I3), + free_native_register(State1#state{stream = Stream3}, Reg5); + _ -> + % Move Arg5 to register, store, and free + % Handle {free, NonNativeReg} by unwrapping + ActualArg5 = + case Arg5 of + {free, InnerArg5} -> InnerArg5; + Other5 -> Other5 + end, + {StateTemp, Reg5} = move_to_native_register(State1, ActualArg5), + StreamTemp = StateTemp#state.stream, + I3 = jit_armv6m_asm:str(Reg5, {sp, 0}), + Stream3 = StreamModule:append(StreamTemp, I3), + free_native_register(StateTemp#state{stream = Stream3}, Reg5) + end, + State2. + +set_registers_args(State0, Args, StackOffset) -> + ParamRegs = parameter_regs(Args), + set_registers_args(State0, Args, ParamRegs, StackOffset). + +set_registers_args( + #state{used_regs = UsedRegs} = State0, + Args, + ParamRegs, + StackOffset +) -> + ArgsRegs = args_regs(Args), + AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), + Stream1 = State1#state.stream, + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State1#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +% AAPCS32: 64-bit arguments require double-word alignment (even register number) +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([{avm_int64_t, _} | T], [r0, r1 | Rest], Acc) -> + parameter_regs0(T, Rest, [r1, r0 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [r1, r2, r3 | Rest], Acc) -> + parameter_regs0(T, Rest, [r3, r2 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [r2, r3 | Rest], Acc) -> + parameter_regs0(T, Rest, [r3, r2 | Acc]); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> + State; +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to AAPCS32 +set_registers_args0( + State, + [{avm_int64_t, Value} | ArgsT], + ArgsRegs, + ParamRegs, + AvailGP, + StackOffset +) when is_integer(Value) -> + LowPart = Value band 16#FFFFFFFF, + HighPart = (Value bsr 32) band 16#FFFFFFFF, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, + [Arg | ArgsT], + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, + StackOffset +) -> + case lists:member(ParamReg, ArgsRegsT) of + false -> + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); + true -> + [Avail | AvailGPT] = AvailGP, + I = jit_armv6m_asm:mov(Avail, ParamReg), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset + ) + end. + +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, jit_state, ParamReg, StackOffset +) -> + JitStateOffset = ?STACK_OFFSET_JITSTATE + StackOffset, + I = jit_armv6m_asm:ldr(ParamReg, {sp, JitStateOffset}), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +% For tail calls, jit_state will be restored by pop - skip generating load instruction +set_registers_args1(State, jit_state_tail_call, r1, _StackOffset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> + I = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> + I = jit_armv6m_asm:ldr(Reg, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> + I = jit_armv6m_asm:ldr(Reg, {Source, 0}), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + {y_reg, X}, + Reg, + _StackOffset +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when + ?IS_GPR(ArgReg) +-> + I = jit_armv6m_asm:mov(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + I1 = jit_armv6m_asm:str(Src, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + I1 = jit_armv6m_asm:str(Src, ?X_REG(X)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_armv6m_asm:str(Src, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State0#state{stream = Stream1}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_armv6m_asm:movs(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_armv6m_asm:movs(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + State2#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(X)), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_armv6m_asm:ldr(Temp, {Reg, 0}), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% term_to_float +move_to_vm_register( + #state{ + stream_module = StreamModule, + available_regs = [Temp1, Temp2 | _], + stream = Stream0, + variant = Variant + } = + State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + I1 = jit_armv6m_asm:ldr(Temp1, ?FP_REGS), + I2 = jit_armv6m_asm:ldr(Temp2, {Reg, 4}), + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8}), + I4 = jit_armv6m_asm:ldr(Temp2, {Reg, 8}), + I5 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8 + 4}), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 4}), + Code = <> + end, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + armv6m_register(), + non_neg_integer() | armv6m_register(), + vm_register() | armv6m_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), + I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}), + I2 = jit_armv6m_asm:str(Temp, {Dest, 0}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:ldr(Dest, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), + I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + I3 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), + I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + I3 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when is_atom(IndexReg) -> + I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2), + I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I3 = Code, + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element(state(), armv6m_register() | {free, armv6m_register()}, non_neg_integer()) -> + {state(), armv6m_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | armv6m_register(), armv6m_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_armv6m_asm:str(ValueReg, {Reg, Index * 4}), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_armv6m_asm:mov(Temp, IndexReg), + I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), + I3 = jit_armv6m_asm:str(ValueReg, {Reg, Temp}), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), + I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), + I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset), + I2 = jit_armv6m_asm:lsls(Temp, Temp, 2), + I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value() | cp) -> {state(), armv6m_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?CP), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_armv6m_asm:ldr(Reg, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + available_regs = [Reg | AvailT], + used_regs = Used + } = State0, + Imm +) when + is_integer(Imm) +-> + State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + Code = ldr_y_reg(Reg, Y, AvailT), + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [RegA, RegB | AvailT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS), + I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}), + I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + {fp, RegA, RegB} + }. + +-spec move_to_native_register(state(), value(), armv6m_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) -> + I = jit_armv6m_asm:mov(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + mov_immediate(State, RegDst, ValSrc); +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_armv6m_asm:ldr(RegDst, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(?MAX_REG)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(X)), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State, + {y_reg, Y}, + RegDst +) -> + Code = ldr_y_reg(RegDst, Y, AT), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {fp_reg, F}, + {fp, RegA, RegB} +) -> + I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS), + I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}), + I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), armv6m_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_armv6m_asm:mov(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_armv6m_asm:ldr(SaveReg, {Reg, 0}), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + {y_reg, Y} +) -> + I1 = ldr_y_reg(Reg, Y, AvailT), + I2 = jit_armv6m_asm:str(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS), + I2 = jit_armv6m_asm:adds(Reg, Offset * 4), + I3 = jit_armv6m_asm:str(Reg, ?Y_REGS), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + offset = JumpTableOffset, + available_regs = [Temp1, Temp2 | _] + } = State, + Label +) -> + % Calculate jump table entry offset + JumpTableEntryOffset = (Label * ?JUMP_TABLE_ENTRY_SIZE) + JumpTableOffset, + + AdrOffset = StreamModule:offset(Stream0), + % ADR Temp, +.4 means we're storing PC value in Temp1. + % For example, if AdrOffset is 0x0808034c, Temp1 will contain 0x08080350 + I1 = jit_armv6m_asm:adr(Temp1, 4), + Stream1 = StreamModule:append(Stream0, I1), + + AdrPC = (AdrOffset + 4) band (bnot 3), + + % Calculate what we need to load: JumpTableEntryOffset - AdrPC + 1 (for thumb bit) + ImmediateValue = JumpTableEntryOffset + 1 - AdrPC, + + % Generate mov_immediate to load the calculated offset + State1 = mov_immediate(State#state{stream = Stream1}, Temp2, ImmediateValue), + + % Add PC + offset (with thumb bit set), load jit_state, and store continuation + I2 = jit_armv6m_asm:adds(Temp2, Temp2, Temp1), + I3 = jit_armv6m_asm:ldr(Temp1, {sp, ?STACK_OFFSET_JITSTATE}), + I4 = jit_armv6m_asm:str(Temp2, ?JITSTATE_CONTINUATION(Temp1)), + + Code = <>, + Stream2 = StreamModule:append(State1#state.stream, Code), + State1#state{stream = Stream2}. + +%% @doc Set the contination to a given offset +%% Return a reference so the offset will be updated with update_branches +%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current +%% code and not too far, so on Thumb we can use adr instruction. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp, TempJitState | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + I1 = jit_armv6m_asm:adr(Temp, 4), + Reloc = {OffsetRef, Offset, {adr, Temp}}, + % Set thumb bit (LSB = 1) by adding 1 to the 4-byte aligned address + I2 = jit_armv6m_asm:adds(Temp, Temp, 1), + % Load jit_state pointer from stack, then store continuation + I3 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + I4 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%% @doc Implement a continuation entry point. +%% TODO: push r4-r7 and lr +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State +) -> + % Align if required. + Offset = StreamModule:offset(Stream0), + Stream1 = + case Offset rem 4 of + 0 -> Stream0; + 2 -> StreamModule:append(Stream0, <<0:16>>) + end, + Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + Stream2 = StreamModule:append(Stream1, Prolog), + State#state{stream = Stream2}. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg, TempJitState | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + % Load jit_state pointer from stack, then load module + I1a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + I1b = jit_armv6m_asm:ldr(Reg, ?JITSTATE_MODULE(TempJitState)), + I2 = jit_armv6m_asm:ldr(Reg, ?MODULE_INDEX(Reg)), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{ + stream = Stream1, + available_regs = [TempJitState | AvailableT], + used_regs = [Reg | UsedRegs0] + }, + Reg + }. + +%% @doc Perform an AND of a register with an immediate. +%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to +%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool +%% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> + I1 = jit_armv6m_asm:lsls(Reg, Reg, 8), + I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:bics(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:ands(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + % No available registers, use r0 as temp and save it to r12 + Stream0 = State0#state.stream, + % Save r0 to r12 + Save = jit_armv6m_asm:mov(?IP_REG, r0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into r0 + State1 = mov_immediate(State0#state{stream = Stream1}, r0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BICS operation + I = jit_armv6m_asm:bics(Reg, r0), + Stream3 = StreamModule:append(Stream2, I), + % Restore r0 from r12 + Restore = jit_armv6m_asm:mov(r0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) -> + % No available registers, use r0 as temp and save it to r12 + Stream0 = State0#state.stream, + % Save r0 to r12 + Save = jit_armv6m_asm:mov(?IP_REG, r0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into r0 + State1 = mov_immediate(State0#state{stream = Stream1}, r0, Val), + Stream2 = State1#state.stream, + % Perform ANDS operation + I = jit_armv6m_asm:ands(Reg, r0), + Stream3 = StreamModule:append(Stream2, I), + % Restore r0 from r12 + Restore = jit_armv6m_asm:mov(r0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}. + +or_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:orrs(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + (Val >= 0 andalso Val =< 255) orelse is_atom(Val) +-> + I = jit_armv6m_asm:adds(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:adds(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I = jit_armv6m_asm:movs(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= -255 andalso Val < 0 +-> + I1 = jit_armv6m_asm:movs(Reg, -Val), + I2 = jit_armv6m_asm:negs(Reg, Reg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +mov_immediate( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State, Reg, Val +) -> + LdrInstructionAddr = StreamModule:offset(Stream0), + I1 = jit_armv6m_asm:ldr(Reg, {pc, 0}), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1, literal_pool = [{LdrInstructionAddr, Reg, Val} | LP]}. + +flush_literal_pool(#state{literal_pool = []} = State) -> + State; +flush_literal_pool( + #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State +) -> + % Align + Offset = StreamModule:offset(Stream0), + Stream1 = + if + Offset rem 4 =:= 0 -> Stream0; + true -> StreamModule:append(Stream0, <<0:16>>) + end, + % Lay all values and update ldr instructions + Stream2 = lists:foldl( + fun({LdrInstructionAddr, Reg, Val}, AccStream) -> + LiteralPosition = StreamModule:offset(AccStream), + LdrPC = (LdrInstructionAddr band (bnot 3)) + 4, + LiteralOffset = LiteralPosition - LdrPC, + LdrInstruction = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}), + AccStream1 = StreamModule:append(AccStream, <>), + StreamModule:replace( + AccStream1, LdrInstructionAddr, LdrInstruction + ) + end, + Stream1, + lists:reverse(LP) + ), + State#state{stream = Stream2, literal_pool = []}. + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + (Val >= 0 andalso Val =< 255) orelse is_atom(Val) +-> + I1 = jit_armv6m_asm:subs(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:subs(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_armv6m_asm:lsls(Temp, Reg, 1), + I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_armv6m_asm:lsls(Temp, Reg, 2), + I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), + I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_armv6m_asm:lsls(Temp, Reg, 3), + I2 = jit_armv6m_asm:adds(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_armv6m_asm:lsls(Temp, Reg, 4), + I2 = jit_armv6m_asm:subs(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_armv6m_asm:muls(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + +%% +%% Analysis of AArch64 pattern and ARM Thumb mapping: +%% +%% AArch64 layout (from call_ext_only_test): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: b.ne 0x20 ; Branch if reductions != 0 to continuation +%% 0x10-0x1c: adr/str/ldr/br sequence for scheduling next process +%% 0x20: [CONTINUATION POINT] - Actual function starts here +%% +%% ARM Thumb equivalent should be: +%% 0x0-0x6: Decrement reductions, store back +%% 0x8: bne continuation_after_prolog ; Branch OVER the prolog if reductions != 0 +%% 0xa-0x?: adr/str/ldr/blx sequence for scheduling +%% continuation: push {r1,r4-r7,lr} ; PROLOG (only executed when scheduled) +%% continuation_after_prolog: [actual function body] +%% +%% Key insight: When reductions != 0, we branch PAST the prolog directly to the function. +%% When reductions == 0, we schedule next process, and when we resume, we execute the prolog +%% then continue to the function body. +%% +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{ + stream_module = StreamModule, stream = Stream0, available_regs = [Temp, TempJitState | _] + } = State0 +) -> + % Load jit_state pointer from stack + I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + % Load reduction count + I1 = jit_armv6m_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), + % Decrement reduction count + I2 = jit_armv6m_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = jit_armv6m_asm:bcc(ne, 0), + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + I5 = jit_armv6m_asm:adr(Temp, 4), + I6 = jit_armv6m_asm:adds(Temp, Temp, 1), + I7 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Add the prolog at the continuation point (where scheduled execution resumes) + #state{stream = Stream3} = State2, + CurrentOffset = StreamModule:offset(Stream3), + % Ensure continuation point is 4-byte aligned by adding NOP if necessary + {AlignedContinuationOffset, Stream3_5} = + case CurrentOffset rem 4 of + % Already 4-byte aligned + 0 -> + {CurrentOffset, Stream3}; + 2 -> + % Add NOP to achieve 4-byte alignment + NOPPadded = StreamModule:append(Stream3, jit_armv6m_asm:nop()), + {StreamModule:offset(NOPPadded), NOPPadded}; + _ -> + error({unexpected_alignment, CurrentOffset}) + end, + Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + Stream4 = StreamModule:append(Stream3_5, Prolog), + % Calculate offsets for rewriting + ContinuationAfterPrologOffset = StreamModule:offset(Stream4), + % Rewrite the branch to skip over the prolog (branch to continuation_after_prolog) + NewI4 = jit_armv6m_asm:bcc(ne, ContinuationAfterPrologOffset - BNEOffset), + % Rewrite the adr to point to the aligned continuation point (prolog location) + % The ADR instruction uses PC aligned down to 4-byte boundary + ADRAlignedOffset = ADROffset band (bnot 3), + ADRImmediate = AlignedContinuationOffset - ADRAlignedOffset, + NewI5 = jit_armv6m_asm:adr(Temp, ADRImmediate), + Stream5 = StreamModule:replace( + Stream4, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream5}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp, TempJitState | _] + } = State0, + Label +) -> + % Load jit_state pointer from stack + I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}), + % Load reduction count + I1 = jit_armv6m_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), + % Decrement reduction count + I2 = jit_armv6m_asm:subs(Temp, Temp, 1), + % Store back the decremented value + I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)), + Stream1 = StreamModule:append(Stream0, <>), + % Use trampoline technique: branch if zero (eq) to skip over the long branch + % If not zero, we want to continue execution at Label + % If zero, we want to fall through to scheduling code + + % Look up label once to avoid duplicate lookup in helper + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + BccOffset = StreamModule:offset(Stream1), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + % Label is known, check if we can optimize the conditional branch + % After bcc instruction + Rel = LabelOffset - BccOffset, + + if + Rel >= -252 andalso Rel =< 258 andalso (Rel rem 2) =:= 0 -> + % Near branch: use direct conditional branch + + % Branch if NOT zero (ne) + I4 = jit_armv6m_asm:bcc(ne, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + % Far branch: use trampoline with helper + % Get the code block size for the far branch sequence that will follow + FarSeqOffset = BccOffset + 2, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + % Skip over the far branch sequence if zero (eq) + I4 = jit_armv6m_asm:bcc(eq, FarSeqSize + 2), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + % Label not known, get the far branch size for the skip + FarSeqOffset = BccOffset + 2, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_armv6m_asm:bcc(eq, FarSeqSize + 2), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +-spec set_cp(state()) -> {state(), non_neg_integer(), armv6m_register()}. +set_cp(State0) -> + % get module index (dynamically) + { + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State1, + Reg + } = get_module_index( + State0 + ), + % Get a temporary register from available registers + [TempReg | _] = AvailRegs, + + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_armv6m_asm:lsls(Reg, Reg, 24), + % Emit a single nop as placeholder for offset load instruction + I2 = jit_armv6m_asm:nop(), + MOVOffset = Offset + byte_size(I1), + % OR the module index with the offset (loaded in temp register) + I3 = jit_armv6m_asm:orrs(Reg, TempReg), + I4 = jit_armv6m_asm:str(Reg, ?CP), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + {State3, MOVOffset, TempReg}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), armv6m_register()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + TempReg +) -> + CurrentOffset = StreamModule:offset(Stream0), + AlignedOffset = (CurrentOffset + 3) band (bnot 3), + PaddingSize = AlignedOffset - CurrentOffset, + % Execution should resume at an aligned offset + + Delta0 = AlignedOffset - CodeOffset, + OffsetImm0 = Delta0 bsl 2, + + % Check if offset fits in movs immediate (0-255) + {NewMoveInstr, Stream1} = + if + OffsetImm0 =< 255 -> + PaddedStream = + if + PaddingSize > 0 -> + StreamModule:append(Stream0, <<0:16>>); + true -> + Stream0 + end, + {jit_armv6m_asm:movs(TempReg, OffsetImm0), PaddedStream}; + true -> + % Need to emit literal pool with proper alignment + Delta1 = Delta0 + 4, + OffsetImm1 = Delta1 bsl 2, + % Emit the 32-bit literal to point to position after + % the pool + StreamWithLiteral = StreamModule:append( + Stream0, <<0:(PaddingSize * 8), OffsetImm1:32/little>> + ), + + % Compute PC-relative offset for ldr instruction + PCValue = (RewriteOffset + 4) band (bnot 3), + PCRelOffset = AlignedOffset - PCValue, + LdrInstr = jit_armv6m_asm:ldr(TempReg, {pc, PCRelOffset}), + {LdrInstr, StreamWithLiteral} + end, + Stream2 = StreamModule:replace(Stream1, RewriteOffset, NewMoveInstr), + Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]), + Stream3 = StreamModule:append(Stream2, Prolog), + State0#state{stream = Stream3}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + TermReg +) -> + I1 = jit_armv6m_asm:str(TermReg, ?BS), + I2 = jit_armv6m_asm:movs(Temp, 0), + I3 = jit_armv6m_asm:str(Temp, ?BS_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + % Check if current offset is 4-byte aligned + CurrentOffset = StreamModule:offset(Stream0), + + {I1, Padding} = + case CurrentOffset rem 4 of + 0 -> + % Aligned - use offset 4 + {jit_armv6m_asm:adr(r0, 4), <<>>}; + _ -> + % Unaligned - use offset 8 with 2-byte padding + {jit_armv6m_asm:adr(r0, 8), <<0:16>>} + end, + I2 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]), + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), + I2 = jit_armv6m_asm:str(SrcReg, {TempReg, Y * 4}), + <>; +str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> + % Large offset - use register arithmetic with second available register + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS), + I2 = jit_armv6m_asm:movs(TempReg2, Offset), + I3 = jit_armv6m_asm:add(TempReg2, TempReg1), + I4 = jit_armv6m_asm:str(SrcReg, {TempReg2, 0}), + <>; +str_y_reg(SrcReg, Y, TempReg1, []) -> + % Large offset - no additional registers available, use IP_REG as second temp + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS), + I2 = jit_armv6m_asm:mov(?IP_REG, TempReg1), + I3 = jit_armv6m_asm:movs(TempReg1, Offset), + I4 = jit_armv6m_asm:add(TempReg1, ?IP_REG), + I5 = jit_armv6m_asm:str(SrcReg, {TempReg1, 0}), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(DstReg, {TempReg, Y * 4}), + <>; +ldr_y_reg(DstReg, Y, [TempReg | _]) -> + % Large offset - use DstReg as second temp register for arithmetic + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS), + I2 = jit_armv6m_asm:movs(DstReg, Offset), + I3 = jit_armv6m_asm:add(DstReg, TempReg), + I4 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), + <>; +ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> + % Small offset, no registers available - use DstReg as temp + I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS), + I2 = jit_armv6m_asm:ldr(DstReg, {DstReg, Y * 4}), + <>; +ldr_y_reg(DstReg, Y, []) -> + % Large offset, no registers available - use IP_REG as temp register + % Note: IP_REG (r12) can only be used with mov, not ldr directly + Offset = Y * 4, + I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS), + I2 = jit_armv6m_asm:mov(?IP_REG, DstReg), + I3 = jit_armv6m_asm:movs(DstReg, Offset), + I4 = jit_armv6m_asm:add(DstReg, ?IP_REG), + I5 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}), + <>. + +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset. Eventually align it with a nop. +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + {State1, Offset1} = + if + Offset0 rem 4 =:= 0 -> + {State0, Offset0}; + true -> + Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:nop()), + {State0#state{stream = Stream1}, Offset0 + 2} + end, + add_label(State1, Label, Offset1). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (r0 in ARM) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_R0_REG_ARMV6M. +-endif. diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl new file mode 100644 index 0000000000..6410e03952 --- /dev/null +++ b/libs/jit/src/jit_armv6m_asm.erl @@ -0,0 +1,668 @@ +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + +-module(jit_armv6m_asm). + +-export([ + add/2, + add/3, + adds/2, + adds/3, + sub/2, + sub/3, + subs/2, + subs/3, + muls/2, + b/1, + bcc/2, + bkpt/1, + blx/1, + bx/1, + cmp/2, + ands/2, + bics/2, + negs/2, + rsbs/3, + orrs/2, + ldr/2, + lsls/2, + lsls/3, + lsrs/2, + lsrs/3, + mov/2, + movs/2, + mvns/2, + nop/0, + str/2, + tst/2, + adr/2, + push/1, + pop/1, + reg_to_num/1 +]). + +-export_type([ + cc/0 +]). + +-type arm_gpr_register() :: + r0 + | r1 + | r2 + | r3 + | r4 + | r5 + | r6 + | r7 + | r8 + | r9 + | r10 + | r11 + | r12 + | r13 + | r14 + | r15 + | sp + | lr + | pc. + +-type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers for assembly generation +%% for r0 to r30 +reg_to_num(r0) -> 0; +reg_to_num(r1) -> 1; +reg_to_num(r2) -> 2; +reg_to_num(r3) -> 3; +reg_to_num(r4) -> 4; +reg_to_num(r5) -> 5; +reg_to_num(r6) -> 6; +reg_to_num(r7) -> 7; +reg_to_num(r8) -> 8; +reg_to_num(r9) -> 9; +reg_to_num(r10) -> 10; +reg_to_num(r11) -> 11; +reg_to_num(r12) -> 12; +reg_to_num(r13) -> 13; +reg_to_num(r14) -> 14; +reg_to_num(r15) -> 15; +%% Stack pointer (SP) is r13 +reg_to_num(sp) -> 13; +%% Link register (LR) is r14 +reg_to_num(lr) -> 14; +%% Program counter (PC) is r15 +reg_to_num(pc) -> 15. + +%% Convert condition atom to condition code number +-spec cond_to_num(atom()) -> 0..15. +% Equal (Z set) +cond_to_num(eq) -> 0; +% Not equal (Z clear) +cond_to_num(ne) -> 1; +% Carry set +cond_to_num(cs) -> 2; +% Carry clear +cond_to_num(cc) -> 3; +% Minus (N set) +cond_to_num(mi) -> 4; +% Plus (N clear) +cond_to_num(pl) -> 5; +% Overflow set +cond_to_num(vs) -> 6; +% Overflow clear +cond_to_num(vc) -> 7; +% Higher (unsigned) +cond_to_num(hi) -> 8; +% Lower or same (unsigned) +cond_to_num(ls) -> 9; +% Greater than or equal (signed) +cond_to_num(ge) -> 10; +% Less than (signed) +cond_to_num(lt) -> 11; +% Greater than (signed) +cond_to_num(gt) -> 12; +% Less than or equal (signed) +cond_to_num(le) -> 13; +% Always +cond_to_num(al) -> 14; +% Never +cond_to_num(nv) -> 15. + +-define(IS_LOW_REGISTER(Reg), + (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse + Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7) +). + +%% Emit an ADD instruction (Thumb encoding, high register form) +%% ADD Rd, Rm - adds register value to register (supports high registers including PC) +%% Encoding: 01000100 DN RmNum[3:0] RdLow3[2:0] +%% ADD SP, #imm - adds immediate value to stack pointer +-spec add + (arm_gpr_register(), arm_gpr_register()) -> binary(); + (sp, integer()) -> binary(). +add(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + % Extract bit 3 of Rd + DN = (RdNum bsr 3) band 1, + RdLow3 = RdNum band 7, + % Build 16-bit instruction: 01000100 DN RmNum[3:0] RdLow3[2:0] + Instr = (2#01000100 bsl 8) bor (DN bsl 7) bor (RmNum bsl 3) bor RdLow3, + <>; +add(sp, Imm) when is_integer(Imm), Imm >= 0, Imm =< 508, (Imm rem 4) =:= 0 -> + %% Thumb ADD SP, SP, #imm7*4 encoding: 10110000 0iiiiiii + Imm7 = Imm div 4, + <<(16#B000 bor (Imm7 band 127)):16/little>>; +add(sp, Imm) when is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% ADD SP, SP, #imm - adds immediate value to stack pointer (3-operand form) +-spec add(sp, sp, integer()) -> binary(). +add(sp, sp, Imm) -> + add(sp, Imm). + +%% Emit an ADDS instruction (Thumb encoding) +%% ADDS Rd, #imm - adds immediate value to register and sets flags (2-operand form) +-spec adds(arm_gpr_register(), integer()) -> binary(). +adds(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + adds(Rd, Rd, Imm); +adds(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% ADDS Rd, Rn, #imm - adds immediate value to register and sets flags (3-operand form) +-spec adds(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). + +adds(Rd, Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + %% Thumb ADDS (immediate, 8-bit) encoding: 00110dddiiiiiiii (Rd = Rn) + RdNum = reg_to_num(Rd), + <<(16#3000 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>; +adds(Rd, Rn, Imm) when + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 +-> + %% Thumb ADDS (immediate, 3-bit) encoding: 0001110iiinnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + <<(16#1C00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>; +adds(Rd, Rn, Imm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}); +adds(Rd, Rn, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) -> + %% Thumb ADDS (register) encoding: 0001100mmmnnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + << + (16#1800 bor ((RmNum band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little + >>. + +%% Emit an unconditional branch (B) instruction (Thumb encoding, ARMv6-M) +%% offset is in bytes, relative to the PC+4 (next instruction) +%% ARMv6-M only supports 16-bit Thumb B with ±2KB range +-spec b(integer()) -> binary(). +b(Offset) when is_integer(Offset), Offset >= -2044, Offset =< 2050, (Offset rem 2) =:= 0 -> + %% Thumb B (unconditional) encoding: 11100iiiiiiiiiii + %% imm11 is (Offset / 2) signed, fits in 11 bits + %% Adjust offset by -4 to match assembler behavior (PC+4 relative) + AdjustedOffset = Offset - 4, + Offset11 = AdjustedOffset div 2, + <<(16#E000 bor (Offset11 band 16#7FF)):16/little>>; +b(Offset) when is_integer(Offset) -> + error({unencodable_offset, Offset}). + +%% Emit a branch with link register (BLR) instruction (Thumb encoding) +%% Register is the register atom (r0-r15) +-spec blx(arm_gpr_register()) -> binary(). +blx(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% Thumb BLX (register) encoding: 010001111mmmm000 + %% This branches to register and sets LR + <<(16#4780 bor (RegNum bsl 3)):16/little>>. + +%% Emit a branch register (BR) instruction (Thumb encoding) +%% Register is the register atom (r0-r15) +-spec bx(arm_gpr_register()) -> binary(). +bx(Reg) when is_atom(Reg) -> + RegNum = reg_to_num(Reg), + %% Thumb BX (branch exchange) encoding: 010001110mmmm000 + %% This branches to register without setting LR + <<(16#4700 bor (RegNum bsl 3)):16/little>>. + +%% Emit a BKPT (breakpoint) instruction +-spec bkpt(byte()) -> binary(). +bkpt(Imm) when is_integer(Imm), Imm >= 0, Imm =< 16#FF -> + %% ARM Thumb BKPT encoding: 1011 1110 iiii iiii + %% where iiii iiii is the 8-bit immediate value + <<(16#BE00 bor (Imm band 16#FF)):16/little>>. + +%% Emit a load register (LDR) instruction +-spec ldr(arm_gpr_register(), {arm_gpr_register(), integer()}) -> binary(). +%% LDR Rt, [Rn, #imm5*4] - 16-bit immediate offset (0-124, multiple of 4) +ldr(Rt, {Rn, Imm}) when + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), + is_integer(Imm), + Imm >= 0, + Imm =< 124, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + Imm5 = Imm div 4, + %% Thumb LDR immediate: 01101iiiiinnnttt + <<(16#6800 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>; +%% LDR Rt, [SP, #imm8*4] - SP-relative load (0-1020, multiple of 4) +ldr(Rt, {sp, Imm}) when + ?IS_LOW_REGISTER(Rt), + is_integer(Imm), + Imm >= 0, + Imm =< 1020, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + Imm8 = Imm div 4, + %% Thumb LDR SP-relative: 10011tttiiiiiiii + <<(16#9800 bor (RtNum bsl 8) bor Imm8):16/little>>; +%% LDR Rt, [PC, #imm8*4] - PC-relative load (0-1020, multiple of 4) +ldr(Rt, {pc, Imm}) when + ?IS_LOW_REGISTER(Rt), + is_integer(Imm), + Imm >= 0, + Imm =< 1020, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + Imm8 = Imm div 4, + %% Thumb LDR PC-relative: 01001tttiiiiiiii + <<(16#4800 bor (RtNum bsl 8) bor Imm8):16/little>>; +%% LDR Rt, [Rn, Rm] - register offset +ldr(Rt, {Rn, Rm}) when + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), + ?IS_LOW_REGISTER(Rm) +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% Thumb LDR register: 0101100mmmnnntttt + <<(16#5800 bor (RmNum bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>. + +%% ARMv6-M Thumb MOVS instruction (sets flags) +-spec movs(arm_gpr_register(), integer() | arm_gpr_register()) -> binary(). +%% MOVS immediate - 8-bit immediates only (0-255) +movs(Rd, Imm) when + ?IS_LOW_REGISTER(Rd), + is_integer(Imm), + Imm >= 0, + Imm =< 255 +-> + RdNum = reg_to_num(Rd), + %% Thumb MOVS immediate: 00100dddiiiiiiii + <<(16#2000 bor (RdNum bsl 8) bor Imm):16/little>>; +%% MOVS register - low registers only (both must be r0-r7) +movs(Rd, Rm) when + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm) +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + <<(16#0000 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% MVNS bitwise NOT +-spec mvns(arm_gpr_register(), arm_gpr_register()) -> binary(). +mvns(Rd, Rm) when + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm) +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb MVNS register: 0100001111mmmdddd + <<(16#43C0 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% ARMv6-M Thumb MOV instruction - handle both immediate and register moves +-spec mov(arm_gpr_register(), arm_gpr_register() | arm_gpr_register()) -> binary(). +mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) -> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + D = + if + RdNum >= 8 -> 1; + true -> 0 + end, + M = + if + RmNum >= 8 -> 1; + true -> 0 + end, + RdLow = RdNum band 7, + RmLow = RmNum band 7, + <<(16#4600 bor (D bsl 7) bor (M bsl 6) bor (RmLow bsl 3) bor RdLow):16/little>>. + +%% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4) +str(Rt, {Rn, Imm}) when + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), + is_integer(Imm), + Imm >= 0, + Imm =< 124, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + Imm5 = Imm div 4, + %% Thumb STR immediate: 01100iiiiinnnttt + <<(16#6000 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>; +%% SP-relative STR (0-1020, multiple of 4) +str(Rt, {sp, Imm}) when + ?IS_LOW_REGISTER(Rt), + is_integer(Imm), + Imm >= 0, + Imm =< 1020, + (Imm rem 4) =:= 0 +-> + RtNum = reg_to_num(Rt), + Imm8 = Imm div 4, + %% Thumb STR SP relative: 1001ttttiiiiiiiii + <<(16#9000 bor (RtNum bsl 8) bor Imm8):16/little>>; +%% STR Rt, [Rn, Rm] - register offset +str(Rt, {Rn, Rm}) when + ?IS_LOW_REGISTER(Rt), + ?IS_LOW_REGISTER(Rn), + ?IS_LOW_REGISTER(Rm) +-> + RtNum = reg_to_num(Rt), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% Thumb STR register: 0101000mmmnnntttt + <<(16#5000 bor (RmNum bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>. + +%% Emit a conditional branch instruction +-spec bcc(cc(), integer()) -> binary(). +%% Special case: 'al' (always) condition uses unconditional branch for efficiency +bcc(al, Offset) when is_integer(Offset) -> + b(Offset); +bcc(Cond, Offset) when + is_atom(Cond), is_integer(Offset), Offset >= -252, Offset =< 258, (Offset rem 2) =:= 0 +-> + CondNum = cond_to_num(Cond), + %% Thumb conditional branch encoding (ARMv6-M): 1101cccciiiiiiiii + %% imm8 is (Offset / 2) signed, fits in 8 bits + %% ARMv6-M only supports 16-bit Thumb conditional branches with ±256B range + %% Adjust offset by -4 to match assembler behavior (PC+4 relative) + AdjustedOffset = Offset - 4, + Offset8 = AdjustedOffset div 2, + <<(16#D000 bor (CondNum bsl 8) bor (Offset8 band 16#FF)):16/little>>; +bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) -> + error({unencodable_offset, Offset}). + +%% ARMv6-M Thumb CMP instruction +-spec cmp(arm_gpr_register(), arm_gpr_register() | integer()) -> binary(). +%% CMP register-register form (low registers only) +cmp(Rn, Rm) when + ?IS_LOW_REGISTER(Rn), + ?IS_LOW_REGISTER(Rm) +-> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + %% Thumb CMP register: 0100001010mmmnnn + <<(16#4280 bor (RmNum bsl 3) bor RnNum):16/little>>; +%% CMP register-immediate form (8-bit immediate 0-255) +cmp(Rn, Imm) when + ?IS_LOW_REGISTER(Rn), + is_integer(Imm), + Imm >= 0, + Imm =< 255 +-> + RnNum = reg_to_num(Rn), + %% Thumb CMP immediate: 00101nnniiiiiiiii + <<(16#2800 bor (RnNum bsl 8) bor Imm):16/little>>; +cmp(Rn, Imm) when ?IS_LOW_REGISTER(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% Emit an AND instruction (bitwise AND) +%% ARMv6-M Thumb ANDS instruction (register only - no immediate support) +-spec ands(arm_gpr_register(), arm_gpr_register()) -> binary(). +ands(Rd, Rm) when + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm) +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb ANDS (2-operand): 0100000000mmmddd + <<(16#4000 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% Emit an BICS instruction (bitwise AND with complement) +-spec bics(arm_gpr_register(), arm_gpr_register()) -> binary(). +bics(Rd, Rm) when + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm) +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb ANDS (2-operand): 0100000000mmmddd + <<(16#4380 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% Emit an NEGS instruction (bitwise NAND) +-spec negs(arm_gpr_register(), arm_gpr_register()) -> binary(). +negs(Rd, Rm) -> + rsbs(Rd, Rm, 0). + +-spec rsbs(arm_gpr_register(), arm_gpr_register(), 0) -> binary(). +rsbs(Rd, Rn, 0) when + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rn) +-> + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + %% Thumb ANDS (2-operand): 0100000000mmmddd + <<(16#4240 bor (RnNum bsl 3) bor RdNum):16/little>>. + +%% ARMv6-M Thumb ORRS instruction (register only - sets flags) +-spec orrs(arm_gpr_register(), arm_gpr_register()) -> binary(). +orrs(Rd, Rm) when + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm) +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb ORRS (2-operand): 0100001100mmmddd + <<(16#4300 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% ARMv6-M Thumb logical shift left (LSLS) instructions +-spec lsls(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). +%% LSLS Rd, Rm, #imm5 - immediate shift (1-31) +lsls(Rd, Rm, Imm) when + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm), + is_integer(Imm), + Imm >= 1, + Imm =< 31 +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb LSLS immediate: 00000iiiiimmmddd + <<(16#0000 bor (Imm bsl 6) bor (RmNum bsl 3) bor RdNum):16/little>>. + +-spec lsls(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% LSLS Rdn, Rm - register shift (Rdn = Rdn << Rm) +lsls(Rdn, Rm) when + ?IS_LOW_REGISTER(Rdn), + ?IS_LOW_REGISTER(Rm) +-> + RdnNum = reg_to_num(Rdn), + RmNum = reg_to_num(Rm), + %% Thumb LSLS register: 0100000010mmmddd + <<(16#4080 bor (RmNum bsl 3) bor RdnNum):16/little>>. + +%% ARMv6-M Thumb logical shift right (LSRS) instructions +-spec lsrs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). +%% LSRS Rd, Rm, #imm5 - immediate shift (1-32) +lsrs(Rd, Rm, Imm) when + ?IS_LOW_REGISTER(Rd), + ?IS_LOW_REGISTER(Rm), + is_integer(Imm), + Imm >= 1, + Imm =< 32 +-> + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + %% Thumb LSRS immediate: 00001iiiiimmmddd (imm5=0 means shift by 32) + Imm5 = + if + Imm =:= 32 -> 0; + true -> Imm + end, + <<(16#0800 bor (Imm5 bsl 6) bor (RmNum bsl 3) bor RdNum):16/little>>. + +-spec lsrs(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% LSRS Rdn, Rm - register shift (Rdn = Rdn >> Rm) +lsrs(Rdn, Rm) when + ?IS_LOW_REGISTER(Rdn), + ?IS_LOW_REGISTER(Rm) +-> + RdnNum = reg_to_num(Rdn), + RmNum = reg_to_num(Rm), + %% Thumb LSRS register: 0100000011mmmddd + <<(16#40C0 bor (RmNum bsl 3) bor RdnNum):16/little>>. + +%% ARMv6-M Thumb TST instruction (register only) +-spec tst(arm_gpr_register(), arm_gpr_register()) -> binary(). +%% TST Rn, Rm - test bits (performs Rn & Rm, updates flags, low registers only) +tst(Rn, Rm) when ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) -> + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + <<(16#4200 bor (RmNum bsl 3) bor RnNum):16/little>>. + +%% Emit a SUBS instruction (Thumb encoding) +%% SUBS Rd, #imm - subtracts immediate value from register and sets flags (2-operand form) +-spec subs(arm_gpr_register(), integer()) -> binary(). +subs(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + subs(Rd, Rd, Imm); +subs(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% SUBS Rd, Rn, #imm - subtracts immediate value from register and sets flags (3-operand form) +-spec subs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary(). +subs(Rd, Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 -> + %% Thumb SUBS (immediate, 8-bit) encoding: 00111dddiiiiiiii (Rd = Rn) + RdNum = reg_to_num(Rd), + <<(16#3800 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>; +subs(Rd, Rn, Imm) when + ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm), Imm >= 0, Imm =< 7 +-> + %% Thumb SUBS (immediate, 3-bit) encoding: 0001111iiinnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + <<(16#1E00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>; +subs(Rd, Rn, Imm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm) -> + error({unencodable_immediate, Imm}); +subs(Rd, Rn, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) -> + %% Thumb SUBS (register) encoding: 0001101mmmnnnddd + RdNum = reg_to_num(Rd), + RnNum = reg_to_num(Rn), + RmNum = reg_to_num(Rm), + << + (16#1A00 bor ((RmNum band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little + >>. + +%% SUB SP, #imm - subtracts immediate value from stack pointer (2-operand form) +-spec sub(sp, integer()) -> binary(). +sub(sp, Imm) when is_integer(Imm), Imm >= 0, Imm =< 508, (Imm rem 4) =:= 0 -> + %% Thumb SUB SP, SP, #imm7*4 encoding: 10110000 1iiiiiii + Imm7 = Imm div 4, + <<(16#B080 bor (Imm7 band 127)):16/little>>; +sub(sp, Imm) when is_integer(Imm) -> + error({unencodable_immediate, Imm}). + +%% SUB SP, SP, #imm - subtracts immediate value from stack pointer (3-operand form) +-spec sub(sp, sp, integer()) -> binary(). +sub(sp, sp, Imm) -> + sub(sp, Imm). + +%% ARMv6-M Thumb address calculation (ADR) instruction +%% ADR is implemented as ADD Rd, PC, #imm8*4 in Thumb +%% In Thumb, PC = current_instruction_address + 4, so adr(Rd, N) means: +%% Rd = (current_pc + 4) + immediate = current_pc + (N - 4) + 4 = current_pc + N +-spec adr(arm_gpr_register(), integer()) -> binary(). +adr(Rd, Offset) when + ?IS_LOW_REGISTER(Rd), + is_integer(Offset), + Offset >= 4, + Offset =< 1024, + (Offset rem 4) =:= 0 +-> + RdNum = reg_to_num(Rd), + %% PC-relative offset in Thumb is (PC+4) + immediate + %% So for adr(Rd, N): immediate = N - 4 + Immediate = Offset - 4, + Imm8 = Immediate div 4, + %% Thumb ADR (ADD PC-relative): 10100dddiiiiiiii + <<(16#A000 bor (RdNum bsl 8) bor Imm8):16/little>>. + +%% Emit a MULS instruction (Thumb encoding) +%% MULS Rd, Rm - multiply Rd by Rm, store result in Rd (sets flags) +-spec muls(arm_gpr_register(), arm_gpr_register()) -> binary(). +muls(Rd, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm) -> + %% Thumb MULS encoding: 0100001101mmmrrr (Rd is both source and destination) + RdNum = reg_to_num(Rd), + RmNum = reg_to_num(Rm), + <<(16#4340 bor (RmNum bsl 3) bor RdNum):16/little>>. + +%% ARMv6-M Thumb PUSH instruction +%% PUSH {register_list} - push registers to stack (low registers + optional LR) +-spec push([arm_gpr_register()]) -> binary(). +push(RegList) when is_list(RegList) -> + %% Process register list and build bitmask + {LowRegMask, LRBit} = process_reglist(RegList, lr), + %% Thumb PUSH encoding: 1011010Rlllllll where R=LR bit, lllllll=low register mask + <<(16#B400 bor (LRBit bsl 8) bor LowRegMask):16/little>>. + +%% ARMv6-M Thumb POP instruction +%% POP {register_list} - pop registers from stack (low registers + optional PC) +-spec pop([arm_gpr_register()]) -> binary(). +pop(RegList) when is_list(RegList) -> + %% Process register list and build bitmask + {LowRegMask, PCBit} = process_reglist(RegList, pc), + %% Thumb POP encoding: 1011110Plllllll where P=PC bit, lllllll=low register mask + <<(16#BC00 bor (PCBit bsl 8) bor LowRegMask):16/little>>. + +%% ARMv6-M Thumb NOP instruction +%% NOP - no operation (encoded as mov r8, r8) +-spec nop() -> binary(). +nop() -> + <<16#46c0:16/little>>. + +%% Generic helper function to process register lists for PUSH/POP +process_reglist(RegList, SpecialReg) -> + RegBits = lists:foldl( + fun(Reg, Acc) -> + Acc + (1 bsl reg_to_num(Reg)) + end, + 0, + RegList + ), + LowRegsBits = RegBits band 2#11111111, + SpecialRegBit = RegBits band (1 bsl reg_to_num(SpecialReg)), + if + RegBits =/= LowRegsBits + SpecialRegBit -> + error({invalid_register, RegBits - LowRegsBits - SpecialRegBit}); + SpecialRegBit =/= 0 -> + {LowRegsBits, 1}; + true -> + {LowRegsBits, 0} + end. diff --git a/libs/jit/src/jit_backend_dwarf_impl.hrl b/libs/jit/src/jit_backend_dwarf_impl.hrl new file mode 100644 index 0000000000..cfba532531 --- /dev/null +++ b/libs/jit/src/jit_backend_dwarf_impl.hrl @@ -0,0 +1,39 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-ifdef(JIT_DWARF). + +dwarf_opcode(#state{stream = Stream0} = State, OpCode) -> + Stream1 = jit_dwarf:opcode(Stream0, OpCode), + State#state{stream = Stream1}. + +dwarf_label(#state{stream = Stream0} = State, Label) -> + Stream1 = jit_dwarf:label(Stream0, Label), + State#state{stream = Stream1}. + +dwarf_line(#state{stream = Stream0} = State, Line) -> + Stream1 = jit_dwarf:line(Stream0, Line), + State#state{stream = Stream1}. + +dwarf_function(#state{stream = Stream0} = State, FunctionName, Arity) -> + Stream1 = jit_dwarf:function(Stream0, FunctionName, Arity), + State#state{stream = Stream1}. + +-endif. diff --git a/libs/jit/src/jit_dwarf.erl b/libs/jit/src/jit_dwarf.erl new file mode 100644 index 0000000000..b68d20f681 --- /dev/null +++ b/libs/jit/src/jit_dwarf.erl @@ -0,0 +1,1766 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_dwarf). + +-include("jit_dwarf.hrl"). + +-record(dwarf, { + % Backend module (jit_armv6m, etc.) + backend :: module(), + % Current module being compiled + module_name :: module(), + opcodes = [] :: [{Offset :: non_neg_integer(), Opcode :: atom(), Size :: non_neg_integer()}], + labels = [] :: [{Offset :: non_neg_integer(), Label :: non_neg_integer()}], + functions = [] :: [ + {Offset :: non_neg_integer(), FunctionName :: atom(), Arity :: non_neg_integer()} + ], + lines = [] :: [ + {Offset :: non_neg_integer(), Filename :: binary(), LineNumber :: pos_integer()} + ], + stream_module :: module(), + stream :: any(), + line_resolver :: fun((non_neg_integer()) -> false | {ok, binary(), pos_integer()}) +}). + +-type state() :: #dwarf{}. + +-export([ + new/5, + opcode/2, + label/2, + function/3, + line/2, + stream/1, + elf/2 +]). + +% jit_stream interface +-export([ + offset/1, + append/2, + replace/3, + map/4 +]). + +%%----------------------------------------------------------------------------- +%% @returns A new state +%% @doc Create a new state with the proxied stream. +%% @end +%%----------------------------------------------------------------------------- +-spec new(module(), module(), module(), pos_integer(), fun( + (non_neg_integer()) -> false | {ok, binary(), pos_integer()} +)) -> state(). +new(Backend, ModuleName, StreamModule, MaxSize, LineResolver) -> + Stream = StreamModule:new(MaxSize), + #dwarf{ + backend = Backend, + module_name = ModuleName, + stream_module = StreamModule, + stream = Stream, + line_resolver = LineResolver, + % Add jump table symbol at offset 0, size will be calculated + opcodes = [{0, jump_table, 0}] + }. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to get the offset from +%% @returns The current offset +%% @doc Get the current offset in the stream +%% @end +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#dwarf{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @param Stream stream to append to +%% @param Binary binary to append to the stream +%% @returns The updated stream +%% @doc Append a binary to the stream +%% @end +%%----------------------------------------------------------------------------- +-spec append(state(), binary()) -> state(). +append(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Binary) -> + Stream1 = StreamModule:append(Stream0, Binary), + State#dwarf{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to update +%% @param Offset offset to update from +%% @param Replacement binary to write at offset +%% @returns The updated stream +%% @doc Replace bytes at a given offset +%% @end +%%----------------------------------------------------------------------------- +-spec replace(state(), non_neg_integer(), binary()) -> state(). +replace(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Offset, Replacement) -> + Stream1 = StreamModule:replace(Stream0, Offset, Replacement), + State#dwarf{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param Stream stream to update +%% @param Offset offset to update from +%% @param Length length of the section to update +%% @param MapFunction function that updates the binary +%% @returns The updated stream +%% @doc Replace bytes at a given offset calling a map function +%% @end +%%----------------------------------------------------------------------------- +-spec map(state(), non_neg_integer(), pos_integer(), fun((binary()) -> binary())) -> state(). +map(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Offset, Length, MapFunction) -> + Stream1 = StreamModule:map(Stream0, Offset, Length, MapFunction), + State#dwarf{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param Opcode the opcode atom to record +%% @returns The updated state with opcode recorded at current offset +%% @doc Record an opcode at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec opcode(state(), binary()) -> state(); + (any(), binary()) -> any(). +opcode(#dwarf{stream_module = StreamModule, stream = Stream, opcodes = Opcodes0} = State, Opcode) -> + Offset = StreamModule:offset(Stream), + % Update size of previous opcode and add new opcode + Opcodes1 = update_previous_opcode_size(Opcodes0, Offset), + % Size will be calculated later + Opcodes2 = [{Offset, Opcode, 0} | Opcodes1], + State#dwarf{opcodes = Opcodes2}; +opcode(BackendStateDwarfDisabled, _Opcode) -> + BackendStateDwarfDisabled. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param Label the label number to record +%% @returns The updated state with label recorded at current offset +%% @doc Record a label at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec label(state(), non_neg_integer()) -> state(); + (any(), non_neg_integer()) -> any(). +label( + #dwarf{stream_module = StreamModule, stream = Stream, labels = Labels0, opcodes = Opcodes0} = + State, + Label +) -> + Offset = StreamModule:offset(Stream), + % Update size of previous opcode before adding label + Opcodes1 = update_previous_opcode_size(Opcodes0, Offset), + Labels1 = [{Offset, Label} | Labels0], + State#dwarf{labels = Labels1, opcodes = Opcodes1}; +label(BackendStateDwarfDisabled, _Label) -> + BackendStateDwarfDisabled. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param FunctionName the function name atom to record +%% @param Arity the function arity +%% @returns The updated state with function recorded at current offset +%% @doc Record a function at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec function(state(), atom(), non_neg_integer()) -> state(); + (any(), atom(), non_neg_integer()) -> any(). +function( + #dwarf{stream_module = StreamModule, stream = Stream, functions = Functions0} = State, + FunctionName, + Arity +) -> + Offset = StreamModule:offset(Stream), + Functions1 = [{Offset, FunctionName, Arity} | Functions0], + State#dwarf{functions = Functions1}; +function(BackendStateDwarfDisabled, _FunctionName, _Arity) -> + BackendStateDwarfDisabled. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param Line the line number to record +%% @returns The updated state with line recorded at current offset +%% @doc Record a line number at the current stream offset +%% @end +%%----------------------------------------------------------------------------- +-spec line(state(), pos_integer()) -> state(); + (any(), pos_integer()) -> any(). +line( + #dwarf{ + stream_module = StreamModule, + stream = Stream, + lines = Lines0, + line_resolver = LineResolver, + module_name = ModuleName + } = State, + LineRef +) -> + Offset = StreamModule:offset(Stream), + case LineResolver(LineRef) of + {ok, Filename, LineNumber} -> + % Check if this is the first time we see the module file and add line 1 at offset 0 + Lines1 = maybe_add_initial_line(Lines0, ModuleName, Filename), + Lines2 = [{Offset, Filename, LineNumber} | Lines1], + State#dwarf{lines = Lines2}; + false -> + % No line information available, skip storing this line + State + end; +line(BackendStateDwarfDisabled, _LineRef) -> + BackendStateDwarfDisabled. + +%% Helper function to add line 1 at offset 0 for the module file if not already present +maybe_add_initial_line(Lines, ModuleName, Filename) -> + ExpectedBasename = <<(atom_to_binary(ModuleName, utf8))/binary, ".erl">>, + Basename = lists:last(binary:split(Filename, <<"/">>, [global])), + case Basename =:= ExpectedBasename of + true -> + % This is the module file, check if we already have an entry at offset 0 + case lists:any(fun({Offset, _, _}) -> Offset =:= 0 end, Lines) of + false -> + % Add line 1 at offset 0 for the jump table + [{0, Filename, 1} | Lines]; + true -> + % Already have an entry at offset 0, don't duplicate + Lines + end; + false -> + % Not the module file, no change needed + Lines + end. + +%% Helper function to update the size of the most recent opcode +update_previous_opcode_size([], _NewOffset) -> + % No previous opcode to update + []; +update_previous_opcode_size([{Offset, Opcode, 0} | Rest], NewOffset) -> + % Update the size of the most recent opcode + Size = NewOffset - Offset, + [{Offset, Opcode, Size} | Rest]; +update_previous_opcode_size([{Offset, Opcode, Size} | Rest], _NewOffset) when Size > 0 -> + % Previous opcode already has a calculated size, don't change it + [{Offset, Opcode, Size} | Rest]; +update_previous_opcode_size(Opcodes, _NewOffset) -> + % Unexpected format, return unchanged + Opcodes. + +-spec stream(state()) -> any(). +stream(#dwarf{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @param State DWARF state containing debug information +%% @returns {ok, binary(), binary()} with ELF structure containing DWARF info, +%% (without and with native code in .text) or false if not compiled +%% with JIT_DWARF +%% @doc Generate ELF binaries with DWARF debug sections +%% @end +%%----------------------------------------------------------------------------- +-spec elf(state(), binary()) -> {ok, binary(), binary()} | false. +-ifdef(JIT_DWARF). +elf(#dwarf{module_name = ModuleName, backend = Backend} = State, NativeCode) -> + SourceFile = <<(atom_to_binary(ModuleName, utf8))/binary, ".erl">>, + + % Generate DWARF sections + DebugInfoSection = generate_debug_info_section_with_opcodes(State, SourceFile), + DebugLineSection = generate_debug_line_section(State, SourceFile), + DebugAbbrevSection = generate_debug_abbrev_section_with_opcodes(), + DebugStrSection = generate_debug_str_section(State, SourceFile), + DebugArangesSection = generate_debug_aranges_section(State), + + % Generate symbol table sections for function names + {SymtabSection, StrtabSection} = generate_symbol_table(State, Backend), + + % Create base sections list + BaseSections = [ + {<<".debug_info">>, DebugInfoSection}, + {<<".debug_line">>, DebugLineSection}, + {<<".debug_abbrev">>, DebugAbbrevSection}, + {<<".debug_str">>, DebugStrSection}, + {<<".debug_aranges">>, DebugArangesSection}, + {<<".symtab">>, SymtabSection}, + {<<".strtab">>, StrtabSection} + ], + + % Add ARM attributes section for armv6m backend + Sections = + case Backend of + jit_armv6m -> + ArmAttributesSection = generate_arm_attributes_section(), + BaseSections ++ [{<<".ARM.attributes">>, ArmAttributesSection}]; + _ -> + BaseSections + end, + + % Create complete ELF with text section and debug sections + {CombinedELF, TextSectionOffset} = create_elf_with_text_and_debug_sections( + Backend, Sections, NativeCode + ), + {ok, TextSectionOffset, CombinedELF}. +-else. +elf(_State, _NativeCode) -> + false. +-endif. + +-ifdef(JIT_DWARF). + +%% Map JIT backend to ELF machine type +backend_to_machine_type(jit_x86_64) -> ?EM_X86_64; +backend_to_machine_type(jit_aarch64) -> ?EM_AARCH64; +backend_to_machine_type(jit_armv6m) -> ?EM_ARM; +backend_to_machine_type(jit_riscv32) -> ?EM_RISCV. + +%% Map JIT backend to ELF flags +backend_to_elf_flags(jit_armv6m) -> + ?EF_ARM_EABI_VER5 bor ?EF_ARM_ABI_FLOAT_SOFT bor ?EF_ARM_ARCH_V6M; +backend_to_elf_flags(_) -> + 0. + +%% Find section index by name +find_section_index(SectionName, SectionNames) -> + find_section_index_helper(SectionName, SectionNames, 0). + +find_section_index_helper(_, [], _) -> + error({section_not_found}); +find_section_index_helper(SectionName, [SectionName | _], Index) -> + Index; +find_section_index_helper(SectionName, [_ | Rest], Index) -> + find_section_index_helper(SectionName, Rest, Index + 1). + +%% Find .symtab section index in section headers + +%% Generate ARM attributes section for ARMv6-M +generate_arm_attributes_section() -> + % ARM EABI attributes format according to ARM IHI 0045E + + % Build the tag-value pairs for file attributes + TagValuePairs = << + % CPU_arch attribute: ARMv6S-M (value 11) + 6, + 11, + % CPU_arch_profile attribute: 'M' profile (value 77 = 'M') + 7, + 77, + % ARM_ISA_use attribute: No ARM ISA (value 0) + 8, + 0, + % THUMB_ISA_use attribute: Thumb-1 only (value 1) + 9, + 1, + % FP_arch attribute: No FP (value 0) + 10, + 0, + % ABI_PCS_wchar_t attribute: 4 bytes (value 2) + 18, + 2, + % ABI_enum_size attribute: int-sized (value 2) + 26, + 2, + % ABI_align_needed attribute: 8-byte alignment (value 1) + 24, + 1, + % ABI_align_preserved attribute: 8-byte alignment (value 1) + 25, + 1 + >>, + + % Calculate file attributes subsection length (tag + length field + tag-value pairs) + FileAttributesLength = 1 + 4 + byte_size(TagValuePairs), + + % Build file attributes subsection + FileAttributes = << + % File attributes tag + 1, + % Length of this file attributes subsection + FileAttributesLength:32/little, + % The tag-value pairs + TagValuePairs/binary + >>, + + % Build vendor subsection ("aeabi" + null + file attributes) + VendorContent = <<"aeabi", 0, FileAttributes/binary>>, + VendorLength = byte_size(VendorContent), + + % Calculate total section length (format version + vendor length + vendor content) + TotalLength = 1 + 4 + VendorLength, + + % Build final section according to ARM EABI spec + << + % Format version 'A' + $A, + % Total section length (4 bytes, little-endian) + TotalLength:32/little, + % Vendor subsection content + VendorContent/binary + >>. + +generate_debug_str_section(#dwarf{module_name = ModuleName}, SourceFile) -> + % String table: null-terminated strings + Strings = [ + % Index 0: empty string + <<0>>, + % Index 1: source file name + SourceFile, + <<0>>, + % Index 2: producer + <<"AtomVM JIT Compiler v0.7.0">>, + <<0>>, + % Index 3: comp_dir + <<"/tmp">>, + <<0>>, + % Index 4: module name + atom_to_binary(ModuleName, utf8), + <<0>> + ], + iolist_to_binary(Strings). + +generate_debug_aranges_section(#dwarf{backend = Backend} = State) -> + % Get word size and calculate address range + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + {LowPC, HighPC} = calculate_address_range(State), + Length = HighPC - LowPC, + + % Calculate padding needed to align descriptor to 2*address_size + % Header so far: version(2) + debug_info_offset(4) + addr_size(1) + seg_size(1) = 8 bytes + % Need to align to 2*WordSize boundary + HeaderSize = 8, + TupleAlignment = 2 * WordSize, + PaddingSize = (TupleAlignment - (HeaderSize rem TupleAlignment)) rem TupleAlignment, + Padding = <<0:(PaddingSize*8)/little>>, + + % Header + Header = << + % DWARF version + 2:16/little, + % Debug info offset (always 0 - first compile unit) + 0:32/little, + % Address size + WordSize, + % Segment size (0 for flat address space) + 0 + >>, + + % Address descriptors + Descriptors = << + % Address range descriptor + LowPC:WordSizeInBits/little, % Start address + Length:WordSizeInBits/little, % Length + % Terminating entry (two zero addresses) + 0:WordSizeInBits/little, + 0:WordSizeInBits/little + >>, + + % Combine all parts + HeaderAndTable = <
>, + + % Calculate total length (header + table - 4 for the length field itself) + TotalLength = byte_size(HeaderAndTable), + + % Build final section with length prefix + <>. + +generate_debug_abbrev_section_with_opcodes() -> + % Abbreviation table + << + % Abbrev 1: DW_TAG_compile_unit + + % Abbreviation code + 1, + % Tag + ?DW_TAG_compile_unit, + % Has children (DW_CHILDREN_yes) + 1, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Compilation directory + ?DW_AT_comp_dir, + ?DW_FORM_string, + % Producer + ?DW_AT_producer, + ?DW_FORM_string, + % Language + ?DW_AT_language, + ?DW_FORM_data4, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % High PC + ?DW_AT_high_pc, + ?DW_FORM_addr, + % Statement list + ?DW_AT_stmt_list, + ?DW_FORM_sec_offset, + % End of attributes + 0, + 0, + + % Abbrev 2: DW_TAG_lexical_block (for opcodes) + % Abbreviation code + 2, + % Tag + ?DW_TAG_lexical_block, + % Has no children + 0, + % Name attribute (opcode name) + ?DW_AT_name, + ?DW_FORM_string, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % End of attributes + 0, + 0, + + % Abbrev 3: DW_TAG_label (for labels) + % Abbreviation code + 3, + % Tag + ?DW_TAG_label, + % Has no children + 0, + % Name attribute (label name) + ?DW_AT_name, + ?DW_FORM_string, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % End of attributes + 0, + 0, + + % Abbrev 4: DW_TAG_subprogram (for functions) + % Abbreviation code + 4, + % Tag + ?DW_TAG_subprogram, + % Has children (ctx parameter) + 1, + % Name attribute (module:function/arity) + ?DW_AT_name, + ?DW_FORM_string, + % Low PC + ?DW_AT_low_pc, + ?DW_FORM_addr, + % High PC + ?DW_AT_high_pc, + ?DW_FORM_addr, + % End of attributes + 0, + 0, + + % Abbrev 5: DW_TAG_formal_parameter (for ctx parameter with type) + % Abbreviation code + 5, + % Tag + ?DW_TAG_formal_parameter, + % Has no children + 0, + % Name attribute (parameter name) + ?DW_AT_name, + ?DW_FORM_string, + % Type attribute (reference to type DIE) + ?DW_AT_type, + ?DW_FORM_ref4, + % Location attribute (register location) + ?DW_AT_location, + ?DW_FORM_exprloc, + % End of attributes + 0, + 0, + + % Abbrev 6: DW_TAG_base_type (for term/uintptr_t) + % Abbreviation code + 6, + % Tag + ?DW_TAG_base_type, + % Has no children + 0, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Byte size + ?DW_AT_byte_size, + ?DW_FORM_data1, + % Encoding + ?DW_AT_encoding, + ?DW_FORM_data1, + % End of attributes + 0, + 0, + + % Abbrev 7: DW_TAG_pointer_type (for Context*) + % Abbreviation code + 7, + % Tag + ?DW_TAG_pointer_type, + % Has no children + 0, + % Byte size + ?DW_AT_byte_size, + ?DW_FORM_data1, + % Type attribute (points to Context structure) + ?DW_AT_type, + ?DW_FORM_ref4, + % End of attributes + 0, + 0, + + % Abbrev 8: DW_TAG_structure_type (for Context) + % Abbreviation code + 8, + % Tag + ?DW_TAG_structure_type, + % Has children (members) + 1, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Byte size + ?DW_AT_byte_size, + ?DW_FORM_data4, + % End of attributes + 0, + 0, + + % Abbrev 9: DW_TAG_member (for structure members) + % Abbreviation code + 9, + % Tag + ?DW_TAG_member, + % Has no children + 0, + % Name attribute + ?DW_AT_name, + ?DW_FORM_string, + % Type attribute + ?DW_AT_type, + ?DW_FORM_ref4, + % Data member location (offset from structure start) + ?DW_AT_data_member_location, + ?DW_FORM_data4, + % End of attributes + 0, + 0, + + % Abbrev 10: DW_TAG_array_type (for term x[MAX_REG+1]) + % Abbreviation code + 10, + % Tag + ?DW_TAG_array_type, + % Has children (subrange) + 1, + % Type attribute (element type) + ?DW_AT_type, + ?DW_FORM_ref4, + % End of attributes + 0, + 0, + + % Abbrev 11: DW_TAG_subrange_type (for array bounds) + % Abbreviation code + 11, + % Tag + ?DW_TAG_subrange_type, + % Has no children + 0, + % Upper bound + ?DW_AT_upper_bound, + ?DW_FORM_data1, + % End of attributes + 0, + 0, + + % End of abbreviations + 0 + >>. + +generate_debug_info_section_with_opcodes( + #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName, backend = Backend} = + State, + SourceFile +) -> + % Calculate address ranges + {LowPC, HighPC} = calculate_address_range(State), + + % Get word size from backend and convert to bits + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + + % Build content first to calculate actual length + CompileUnitContent = << + % DWARF version + 4:16/little, + % Abbreviation offset + 0:32/little, + % Address size + WordSize, + % Compilation unit DIE (abbreviation 1) + 1, + % DW_AT_name + SourceFile/binary, + 0, + % DW_AT_comp_dir + "/tmp", + 0, + % DW_AT_producer + "AtomVM JIT Compiler v0.7.0", + 0, + % DW_AT_language + ?DW_LANG_Erlang:32/little, % for now, we always say Erlang + % DW_AT_low_pc + LowPC:WordSizeInBits/little, + % DW_AT_high_pc + HighPC:WordSizeInBits/little, + % DW_AT_stmt_list (offset into .debug_line) + 0:32/little + >>, + + % Calculate base offset for type DIEs + % DW_FORM_ref4 offsets are relative to start of compile unit (the length field itself) + % So we need to add 4 bytes for the length field + % CompileUnitContent already includes the header (version + abbrev_offset + addr_size) + TypeDIEsBaseOffset = 4 + byte_size(CompileUnitContent), + io:format("DEBUG CU: ContentSize=~p + 4 (length) = ~p~n", + [byte_size(CompileUnitContent), TypeDIEsBaseOffset]), + + % Generate type DIEs and get the Context* type offset + {TypeDIEs, ContextPtrTypeOffset} = generate_type_dies(State, TypeDIEsBaseOffset), + + % Generate DIEs for functions, opcodes and labels + FunctionDIEs = generate_function_dies_with_module(Functions, ModuleName, State, ContextPtrTypeOffset, HighPC), + OpcodeDIEs = generate_opcode_dies(Opcodes, Backend), + LabelDIEs = generate_label_dies(Labels, Backend), + + % End of children marker + EndMarker = <<0>>, + + % Calculate actual unit length (everything after the length field) + Content = + <>, + UnitLength = byte_size(Content), + + % Build final section with correct length + <>. + +generate_debug_line_section(#dwarf{lines = Lines, opcodes = _Opcodes}, SourceFile) -> + % Build header content first to calculate actual lengths + HeaderContent = << + % DWARF version + 4:16/little, + % Header length (placeholder, calculated below) + 0:32/little, + % Minimum instruction length (Thumb) + 2, + % Maximum operations per instruction + 1, + % Default is_stmt + 1, + % Line base + (-5):8/signed, + % Line range + 14, + % Opcode base + 13 + >>, + + % Standard opcode lengths (for opcodes 1-12, opcode_base-1 entries) + % DW_LNS_copy(1)=0, DW_LNS_advance_pc(2)=1, DW_LNS_advance_line(3)=1, etc. + StdOpcodeLengths = <<0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1>>, + + % Build file table with actual filenames from line data + UniqueFullPaths = + case Lines of + [] -> + [SourceFile]; + _ -> + % Extract unique filenames from Lines, don't add SourceFile as it may be a duplicate + Filenames = [Filename || {_Offset, Filename, _LineNum} <- Lines], + lists:usort(Filenames) + end, + + % Split paths into directories and filenames, avoiding duplicates + {Directories, FileEntries, _} = lists:foldl( + fun(FullPath, {DirAcc, FileAcc, FileSet}) -> + case filename:split(binary_to_list(FullPath)) of + [Basename] -> + % Just a filename, no directory + FileKey = {Basename, 0}, + case sets:is_element(FileKey, FileSet) of + % Skip duplicate + true -> {DirAcc, FileAcc, FileSet}; + false -> {DirAcc, [FileKey | FileAcc], sets:add_element(FileKey, FileSet)} + end; + PathParts -> + DirParts = lists:droplast(PathParts), + Dir = filename:join(DirParts), + Basename = lists:last(PathParts), + % Find or add directory to get proper index (1-based) + {NewDirAcc, DirIndex} = + case lists:search(fun(D) -> D =:= Dir end, DirAcc) of + {value, _} -> + % Find index of existing directory (1-based) + ExistingIndex = + length(lists:takewhile(fun(D) -> D =/= Dir end, DirAcc)) + 1, + {DirAcc, ExistingIndex}; + false -> + % Add new directory and return its 1-based index + NewIndex = length(DirAcc) + 1, + {DirAcc ++ [Dir], NewIndex} + end, + FileKey = {Basename, DirIndex}, + case sets:is_element(FileKey, FileSet) of + % Skip duplicate + true -> + {NewDirAcc, FileAcc, FileSet}; + false -> + {NewDirAcc, [FileKey | FileAcc], sets:add_element(FileKey, FileSet)} + end + end + end, + {[], [], sets:new()}, + UniqueFullPaths + ), + + % Build directory table + DirectoryTable = lists:foldl( + fun(Dir, Acc) -> + DirBin = list_to_binary(Dir), + <> + end, + <<>>, + Directories + ), + + % Build file table entries with proper ULEB128 encoding for directory index + FileTableEntries = lists:foldl( + fun({Filename, DirIndex}, Acc) -> + DirIndexEncoded = encode_uleb128(DirIndex), + <> + end, + <<>>, + lists:reverse(FileEntries) + ), + + FileTable = << + % Directory table + DirectoryTable/binary, + % End of directory table + 0, + % File table entries + FileTableEntries/binary, + % End of file table + 0 + >>, + + % Line number program - using actual line data with file mapping + FileMapping = lists:zip(UniqueFullPaths, lists:seq(1, length(FileEntries))), + Program = generate_line_program(Lines, FileMapping), + + % Calculate actual header length (everything after header_length field to end of file table) + HeaderPlusTablesContent = <>, + % -6 to exclude version (2 bytes) and header_length field itself (4 bytes) + DebugHeaderContentSize = byte_size(HeaderContent), + DebugStdOpcodeSize = byte_size(StdOpcodeLengths), + DebugFileTableSize = byte_size(FileTable), + DebugHeaderPlusTablesSize = byte_size(HeaderPlusTablesContent), + io:format("DEBUG: HeaderContent=~p StdOpcodes=~p FileTable=~p HeaderPlusTables=~p~n", + [DebugHeaderContentSize, DebugStdOpcodeSize, DebugFileTableSize, DebugHeaderPlusTablesSize]), + HeaderLength = byte_size(HeaderContent) - 6 + byte_size(HeaderPlusTablesContent), + io:format("DEBUG: HeaderLength = ~p - 6 + ~p = ~p~n", + [DebugHeaderContentSize, DebugHeaderPlusTablesSize, HeaderLength]), + + % Build corrected header with actual length + CorrectedHeader = << + % DWARF version + 4:16/little, + % Header length (actual) + HeaderLength:32/little, + % Minimum instruction length (Thumb) + 2, + % Maximum operations per instruction + 1, + % Default is_stmt + 1, + % Line base + (-5):8/signed, + % Line range + 14, + % Opcode base + 13 + >>, + + % Calculate total unit length (everything after unit length field) + ContentAfterLength = + <>, + UnitLength = byte_size(ContentAfterLength), + + <>. + +create_elf_header_and_sections(Backend, Sections) -> + % Determine ELF format based on backend word size + WordSize = Backend:word_size(), + % 32 or 64 bits + WordSizeInBits = WordSize * 8, + ElfClass = + case WordSize of + 8 -> ?ELFCLASS64; + 4 -> ?ELFCLASS32 + end, + + % ELF format dependent sizes + {ElfHeaderSize, SectionHeaderSize} = + case WordSize of + % ELF64 + 8 -> {64, 64}; + % ELF32 + 4 -> {52, 40} + end, + + % Create section name string table (dynamic based on sections) + SectionNames = + [<<>>] ++ [SectionName || {SectionName, _Section} <- Sections] ++ [<<".shstrtab">>], + ShStrTab = create_string_table(SectionNames), + + % Calculate offsets + % null + debug sections + shstrtab + SectionCount = length(SectionNames), + + % String table index is the last section + ShStrTabIndex = SectionCount - 1, + + % Section data layout: debug sections + string table + {SectionData, SectionOffsets} = layout_sections(Sections, ShStrTab, ElfHeaderSize), + + % Section headers start after all section data + SectionHeaderOffset = ElfHeaderSize + byte_size(SectionData), + + % Get machine type and flags for this backend + MachineType = backend_to_machine_type(Backend), + ElfFlags = backend_to_elf_flags(Backend), + + % ELF header + ElfHeader = << + % Magic + ?EI_MAG0, + ?EI_MAG1, + ?EI_MAG2, + ?EI_MAG3, + % ELF class (32-bit or 64-bit) + ElfClass, + % Little endian + ?ELFDATA2LSB, + % ELF version + ?EV_CURRENT, + % OS ABI + 0, + % ABI version + 0, + % Padding + 0, + 0, + 0, + 0, + 0, + 0, + 0, + % Relocatable file + ?ET_REL:16/little, + % Architecture from backend + MachineType:16/little, + % Version + 1:32/little, + % Entry point - 32 or 64 bit depending on word size + 0:WordSizeInBits/little, + % Program header offset - 32 or 64 bit depending on word size + 0:WordSizeInBits/little, + % Section header offset - 32 or 64 bit depending on word size + SectionHeaderOffset:WordSizeInBits/little, + % Flags + ElfFlags:32/little, + % ELF header size + ElfHeaderSize:16/little, + % Program header entry size + 0:16/little, + % Program header count + 0:16/little, + % Section header entry size + SectionHeaderSize:16/little, + % Section count + SectionCount:16/little, + % String table index (.shstrtab) + ShStrTabIndex:16/little + >>, + + % Generate section headers + SectionHeaders = create_section_headers_proper( + SectionNames, Sections, SectionOffsets, ShStrTab, Backend, WordSizeInBits + ), + + <>. + +%% Helper functions +calculate_address_range(#dwarf{opcodes = Opcodes}) -> + case Opcodes of + [] -> + {0, 0}; + _ -> + % Use the new 3-tuple format {Offset, Opcode, Size} + OffsetsAndSizes = [{Offset, Size} || {Offset, _, Size} <- Opcodes], + Offsets = [Offset || {Offset, _} <- OffsetsAndSizes], + MinOffset = lists:min(Offsets), + % For max, use offset + size, or fallback to offset + 4 if size is 0 + MaxOffset = lists:max([ + case Size of + % Fallback for opcodes without calculated size + 0 -> Offset + 4; + _ -> Offset + Size + end + || {Offset, Size} <- OffsetsAndSizes + ]), + {MinOffset, MaxOffset} + end. + +generate_line_program(Lines, FileMapping) -> + case Lines of + [] -> + % No line data - generate simple program + << + % Set file to 1 using DW_LNS_set_file (opcode 4) with file index 1 + 4, + 1, + % End sequence: extended opcode + % Extended opcode prefix + 0, + % Length of extended opcode + 1, + % DW_LNE_end_sequence + 1 + >>; + _ -> + % Sort lines by offset + SortedLines = lists:sort( + fun({OffsetA, _, _}, {OffsetB, _, _}) -> + OffsetA =< OffsetB + end, + Lines + ), + generate_line_program_entries(SortedLines, FileMapping, 0, 1, 0) + end. + +generate_line_program_entries([], _FileMapping, _LastOffset, _LastLine, _LastFileIndex) -> + % End the sequence + << + % End sequence: extended opcode + % Extended opcode prefix + 0, + % Length of extended opcode + 1, + % DW_LNE_end_sequence + 1 + >>; +generate_line_program_entries( + [{Offset, Filename, LineNumber} | Rest], FileMapping, LastOffset, LastLine, LastFileIndex +) -> + % Generate DWARF line program opcodes + % For simplicity, we'll use DW_LNS_advance_pc and DW_LNS_advance_line + + % Find file index from mapping + FileIndex = + case lists:keyfind(Filename, 1, FileMapping) of + {Filename, Index} -> Index; + % Default to first file if not found + false -> 1 + end, + + % Calculate address and line deltas + AddressDelta = Offset - LastOffset, + LineDelta = LineNumber - LastLine, + + % Build opcodes + FileOpcodes = + if + FileIndex =/= LastFileIndex -> + % DW_LNS_set_file (opcode 4) with file index + <<4, FileIndex>>; + true -> + <<>> + end, + + InitialOpcodes = + if + LastOffset == 0 -> + % Set initial file index + <<4, FileIndex>>; + true -> + FileOpcodes + end, + + Opcodes = << + InitialOpcodes/binary, + % DW_LNS_advance_pc (opcode 2) with ULEB128 delta + 2, + (encode_uleb128(AddressDelta))/binary, + % DW_LNS_advance_line (opcode 3) with SLEB128 delta + 3, + (encode_sleb128(LineDelta))/binary, + % DW_LNS_copy (opcode 1) - emit a new row + 1 + >>, + + RestOpcodes = generate_line_program_entries(Rest, FileMapping, Offset, LineNumber, FileIndex), + <>. + +% Encode unsigned LEB128 +encode_uleb128(Value) when Value < 128 -> + <>; +encode_uleb128(Value) -> + Byte = (Value band 16#7F) bor 16#80, + Rest = encode_uleb128(Value bsr 7), + <>. + +% Encode signed LEB128 +encode_sleb128(Value) when Value >= -64, Value < 64 -> + ByteValue = Value band 16#7F, + <>; +encode_sleb128(Value) when Value >= 0 -> + encode_uleb128(Value); +encode_sleb128(Value) -> + encode_sleb128_negative(Value). + +encode_sleb128_negative(Value) -> + Byte = Value band 16#7F, + NewValue = Value bsr 7, + if + NewValue == -1, (Byte band 16#40) =/= 0 -> + <>; + true -> + ByteWithCont = Byte bor 16#80, + Rest = encode_sleb128_negative(NewValue), + <> + end. + +%% Generate type DIEs for Context structure and return the Context* type offset +generate_type_dies(#dwarf{backend = Backend}, BaseOffset) -> + % Get word size from backend + WordSize = Backend:word_size(), + + % Abbrev 6: term base type (uintptr_t) + TermTypeDIE = << + 6, % Abbreviation code + "term", 0, % Name + WordSize, % Byte size + ?DW_ATE_unsigned % Encoding (unsigned) + >>, + TermTypeOffset = BaseOffset, + io:format("DEBUG TYPE OFFSETS: Base=~p Term=~p~n", [BaseOffset, TermTypeOffset]), + + % Abbrev 10: Array type for x[MAX_REG+1] (term x[17]) + % Abbrev 11: Subrange type + XArraySubrangeDIE = << + 11, % Abbreviation code + 16 % Upper bound (MAX_REG = 16, so array is [0..16]) + >>, + XArrayTypeDIE = << + 10, % Abbreviation code + TermTypeOffset:32/little, % Type (term) + XArraySubrangeDIE/binary, + 0 % End of children + >>, + XArrayTypeOffset = BaseOffset + byte_size(TermTypeDIE), + + % Abbrev 8: Context structure type + % Only include the x array member for now (most important for debugging) + XOffset = case Backend of + jit_x86_64 -> 16#30; + jit_aarch64 -> 16#30; + _ -> 16#18 % riscv32 and armv6m + end, + XMemberDIE = << + 9, % Abbreviation code + "x", 0, % Name + XArrayTypeOffset:32/little, % Type (term array) + XOffset:32/little % Data member location + >>, + % Estimate Context size (actual size varies, but this is good enough) + ContextSize = 512, + ContextStructDIE = << + 8, % Abbreviation code + "Context", 0, % Name + ContextSize:32/little, % Byte size + XMemberDIE/binary, + 0 % End of children + >>, + ContextStructOffset = BaseOffset + byte_size(TermTypeDIE) + byte_size(XArrayTypeDIE), + + % Abbrev 7: Context* pointer type + ContextPtrTypeDIE = << + 7, % Abbreviation code + WordSize, % Byte size + ContextStructOffset:32/little % Type (Context) + >>, + ContextPtrTypeOffset = BaseOffset + byte_size(TermTypeDIE) + byte_size(XArrayTypeDIE) + byte_size(ContextStructDIE), + + % Combine all type DIEs + AllTypes = <>, + + {AllTypes, ContextPtrTypeOffset}. + +%% Generate DIEs for functions as DW_TAG_subprogram with module:func/arity naming +generate_function_dies_with_module(Functions, ModuleName, #dwarf{backend = Backend}, ContextPtrTypeOffset, CodeSize) -> + % Filter and sort functions by address + ValidFunctions = lists:sort([ + {Offset, FunctionName, Arity} + || {Offset, FunctionName, Arity} <- Functions, Offset >= 0 + ]), + + % Calculate function sizes by finding the next function's offset + % For the last function, use CodeSize to determine its end + FunctionsWithSizes = case ValidFunctions of + [] -> []; + _ -> + lists:zipwith( + fun({Offset, Name, Arity}, NextFunc) -> + Size = case NextFunc of + {NextOffset, _, _} -> NextOffset - Offset; + end_of_code -> CodeSize - Offset % Last function extends to end of code + end, + {Offset, Name, Arity, Size} + end, + ValidFunctions, + tl(ValidFunctions) ++ [end_of_code] + ) + end, + + % Generate DIE for each function + FunctionDIEsList = [ + generate_function_die_with_module(Offset, FunctionName, Arity, Size, ModuleName, Backend, ContextPtrTypeOffset) + || {Offset, FunctionName, Arity, Size} <- FunctionsWithSizes + ], + iolist_to_binary(FunctionDIEsList). + +%% Generate DIE for a single function with module name +generate_function_die_with_module(Offset, FunctionName, Arity, FunctionSize, ModuleName, Backend, ContextPtrTypeOffset) -> + % Create module:function/arity format + FunctionString = list_to_binary(io_lib:format("~s:~s/~B", [ModuleName, FunctionName, Arity])), + + % Get the DWARF register number for ctx from the backend + CtxRegNum = Backend:dwarf_ctx_register(), + + % Generate ctx parameter DIE + CtxParamDIE = generate_ctx_parameter_die(CtxRegNum, ContextPtrTypeOffset), + + % Get word size for addresses and convert to bits + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + + << + % Abbreviation code (4 = DW_TAG_subprogram) + 4, + % DW_AT_name + FunctionString/binary, + 0, + % DW_AT_low_pc + Offset:WordSizeInBits/little, + % DW_AT_high_pc (low_pc + size) + (Offset + FunctionSize):WordSizeInBits/little, + % Child: ctx parameter + CtxParamDIE/binary, + % End of children marker + 0 + >>. + +%% Generate DIE for ctx parameter +generate_ctx_parameter_die(CtxRegNum, ContextPtrTypeOffset) -> + % DW_FORM_exprloc requires a ULEB128 length followed by the expression + % Expression: DW_OP_reg0 + register_number (single byte) + % DW_OP_regN means the value is in register N + RegOpcode = ?DW_OP_reg0 + CtxRegNum, + LocationExpr = <>, + LocationExprLen = encode_uleb128(byte_size(LocationExpr)), + + << + % Abbreviation code (5 = DW_TAG_formal_parameter) + 5, + % DW_AT_name + "ctx", + 0, + % DW_AT_type (reference to Context* type) + ContextPtrTypeOffset:32/little, + % DW_AT_location (exprloc: length + expression) + LocationExprLen/binary, + LocationExpr/binary + >>. + +%% Generate DIEs for opcodes as DW_TAG_lexical_block +generate_opcode_dies(Opcodes, Backend) -> + % Filter and sort opcodes by address + ValidOpcodes = lists:sort([{Offset, Opcode} || {Offset, Opcode} <- Opcodes, Offset >= 0]), + + % Generate DIE for each opcode + OpcodeDIEsList = [generate_opcode_die(Offset, Opcode, Backend) || {Offset, Opcode} <- ValidOpcodes], + iolist_to_binary(OpcodeDIEsList). + +%% Generate DIE for a single opcode +generate_opcode_die(Offset, Opcode, Backend) -> + OpcodeString = list_to_binary(io_lib:format("~s@~B", [Opcode, Offset])), + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + + << + % Abbreviation code (2 = DW_TAG_lexical_block) + 2, + % DW_AT_name + OpcodeString/binary, + 0, + % DW_AT_low_pc + Offset:WordSizeInBits/little + >>. + +%% Generate DIEs for labels as DW_TAG_label +generate_label_dies(Labels, Backend) -> + % Filter and sort labels by address + ValidLabels = lists:sort([{Offset, Label} || {Offset, Label} <- Labels, Offset >= 0]), + + % Generate DIE for each label + LabelDIEsList = [generate_label_die(Offset, Label, Backend) || {Offset, Label} <- ValidLabels], + iolist_to_binary(LabelDIEsList). + +%% Generate DIE for a single label +generate_label_die(Offset, Label, Backend) -> + LabelString = list_to_binary(io_lib:format("label_~B", [Label])), + WordSize = Backend:word_size(), + WordSizeInBits = WordSize * 8, + + << + % Abbreviation code (3 = DW_TAG_label) + 3, + % DW_AT_name + LabelString/binary, + 0, + % DW_AT_low_pc + Offset:WordSizeInBits/little + >>. + +%% Generate symbol table for function names and opcode symbols +generate_symbol_table( + #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName}, + Backend +) -> + % Determine ELF format based on backend word size + WordSize = Backend:word_size(), + % Build string table for symbol names (functions) with module:function/arity format + FunctionNames = [ + list_to_binary(io_lib:format("~s:~s/~B", [ModuleName, FunctionName, Arity])) + || {_Offset, FunctionName, Arity} <- Functions + ], + % Build string table for opcode symbols with module:op_opcode@offset format + OpcodeNames = [ + list_to_binary(io_lib:format("~s:op_~s@~w", [ModuleName, Opcode, Offset])) + || {Offset, Opcode, _Size} <- Opcodes + ], + % Build string table for label symbols with module:label_X@offset format + LabelNames = [ + list_to_binary(io_lib:format("~s:label_~w@~w", [ModuleName, LabelNum, Offset])) + || {Offset, LabelNum} <- Labels + ], + % Add ARM mapping symbol to indicate Thumb code (for armv6m backend) + MappingSymbols = + case Backend of + % Thumb mapping symbol at start of .text section + jit_armv6m -> [<<"$t">>]; + _ -> [] + end, + SymbolNames = FunctionNames ++ OpcodeNames ++ LabelNames ++ MappingSymbols, + + % String table starts with null string + StrtabContent = lists:foldl( + fun(Name, Acc) -> + <> + end, + % Start with null string at offset 0 + <<0>>, + SymbolNames + ), + + % Calculate string offsets + {_, StringOffsets} = lists:foldl( + fun(Name, {CurrentOffset, Offsets}) -> + % +1 for null terminator + NextOffset = CurrentOffset + byte_size(Name) + 1, + {NextOffset, [CurrentOffset | Offsets]} + end, + % Start after null string + {1, []}, + SymbolNames + ), + ReversedOffsets = lists:reverse(StringOffsets), + + % Generate symbol table entries + % First entry is always the null symbol + NullSymbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + <<0:32/little, 0, 0, 0:16/little, 0:64/little, 0:64/little>>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + <<0:32/little, 0:32/little, 0:32/little, 0, 0, 0:16/little>> + end, + + % Generate function symbols + FunctionSymbols = lists:foldl( + fun({{Offset, _FunctionName, _Arity}, StringOffset}, Acc) -> + % Function name is now module:function/arity (already in FunctionNames) + % Estimated function size + FuncSize = 100, + + % Use raw offset for symbol address (no Thumb bit) + FunctionAddress = Offset, + + % Symbol table entry (format depends on word size) + Symbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + << + StringOffset:32/little, + % st_info (STB_GLOBAL << 4 | STT_FUNC) + 16#12, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little, + % st_value (function address) + FunctionAddress:64/little, + % st_size (function size) + FuncSize:64/little + >>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + << + StringOffset:32/little, + FunctionAddress:32/little, + FuncSize:32/little, + 16#12, + 0, + 1:16/little + >> + end, + <> + end, + <<>>, + lists:zip(Functions, lists:sublist(ReversedOffsets, length(Functions))) + ), + + % Generate opcode symbols + OpcodeStringOffsets = lists:sublist(ReversedOffsets, length(Functions) + 1, length(Opcodes)), + OpcodeSymbols = lists:foldl( + fun({{Offset, _Opcode, Size}, StringOffset}, Acc) -> + % Use raw offset for symbol address (no Thumb bit) + OpcodeAddress = Offset, + + % Symbol table entry (format depends on word size) + Symbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + << + StringOffset:32/little, + % st_info (STB_GLOBAL << 4 | STT_NOTYPE) + 16#10, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little, + % st_value (opcode address) + OpcodeAddress:64/little, + % st_size (actual calculated opcode size) + Size:64/little + >>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + << + StringOffset:32/little, + OpcodeAddress:32/little, + Size:32/little, + 16#10, + 0, + 1:16/little + >> + end, + <> + end, + <<>>, + lists:zip(Opcodes, OpcodeStringOffsets) + ), + + % Generate label symbols + LabelStringOffsets = lists:sublist( + ReversedOffsets, length(Functions) + length(Opcodes) + 1, length(Labels) + ), + LabelSymbols = lists:foldl( + fun({{Offset, _LabelNum}, StringOffset}, Acc) -> + % Use raw offset for symbol address + LabelAddress = Offset, + + % Symbol table entry (format depends on word size) + Symbol = + case WordSize of + 8 -> + % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8) + << + StringOffset:32/little, + % st_info (STB_GLOBAL << 4 | STT_NOTYPE) + 16#10, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little, + % st_value (label address) + LabelAddress:64/little, + % st_size (label size - 0 for point labels) + 0:64/little + >>; + 4 -> + % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2) + << + StringOffset:32/little, + LabelAddress:32/little, + 0:32/little, + 16#10, + 0, + 1:16/little + >> + end, + <> + end, + <<>>, + lists:zip(Labels, LabelStringOffsets) + ), + + % Generate mapping symbols for ARM (Thumb indicator) + MappingSymbolOffsets = + case Backend of + jit_armv6m -> + lists:sublist( + ReversedOffsets, length(Functions) + length(Opcodes) + length(Labels) + 1, 1 + ); + _ -> + [] + end, + MappingSymbolBinaries = + case Backend of + jit_armv6m -> + [StringOffset] = MappingSymbolOffsets, + % $t mapping symbol at address 0 (start of .text) to indicate Thumb code + MappingSymbol = << + % st_name (offset in string table for "$t") + StringOffset:32/little, + % st_value (address 0 - start of .text section) + 0:32/little, + % st_size (0 for mapping symbols) + 0:32/little, + % st_info (STB_LOCAL << 4 | STT_NOTYPE) - local symbol + 16#00, + % st_other + 0, + % st_shndx (section index - .text will be section 1) + 1:16/little + >>, + <>; + _ -> + <<>> + end, + + % Symbol table must have local symbols first, then global symbols + SymtabContent = + <>, + + {SymtabContent, StrtabContent}. + +%% Create string table from list of binaries +create_string_table(Binaries) -> + <<<> || Binary <- Binaries>>. + +%% Layout sections in memory and calculate offsets +layout_sections(Sections, ShStrTab, BaseOffset) -> + {Data, Offsets} = lists:foldl( + fun({_Name, SectionData}, {AccData, AccOffsets}) -> + Offset = BaseOffset + byte_size(AccData), + NewData = <>, + NewOffsets = [Offset | AccOffsets], + {NewData, NewOffsets} + end, + {<<>>, []}, + Sections + ), + + % Add string table at the end + ShStrTabOffset = BaseOffset + byte_size(Data), + FinalData = <>, + FinalOffsets = [ShStrTabOffset | lists:reverse(Offsets)], + + {FinalData, FinalOffsets}. + +%% Create properly formatted section headers +create_section_headers_proper( + SectionNames, Sections, SectionOffsets, ShStrTab, Backend, WordSizeInBits +) -> + % Create null section header (index 0) + % Size depends on ELF format: 40 bytes (ELF32) or 64 bytes (ELF64) + SectionHeaderSizeBits = + case WordSizeInBits of + % 64 bytes * 8 bits + 64 -> 512; + % 40 bytes * 8 bits + 32 -> 320 + end, + NullHeader = <<0:SectionHeaderSizeBits/little>>, + + % Create section headers for all sections (indices 1-6) + % SectionOffsets from layout_sections: [ShStrTabOffset, ...SectionOffsets in order...] + [_ShStrTabOffset | SectionOffsetsInOrder] = SectionOffsets, + + SectionHeaders = lists:foldl( + fun({_Index, {{SectionName, SectionData}, FileOffset}}, Acc) -> + % Calculate name offset in string table by finding the null-terminated section name + SectionNameWithNull = <>, + {NameOffset, _Length} = binary:match(ShStrTab, SectionNameWithNull), + + % Determine section type, properties, and flags + {SectionType, SectionFlags, Link, Info, EntrySize} = + case SectionName of + <<".symtab">> -> + % Find .strtab index dynamically + StrtabIndex = find_section_index(<<".strtab">>, SectionNames), + % Local symbols: null symbol + mapping symbol (for armv6m) + NumLocalSymbols = + case Backend of + % null + $t mapping symbol + jit_armv6m -> 2; + % only null symbol + _ -> 1 + end, + % SHT_SYMTAB, link to strtab, info = first non-local symbol, entsize = 16 + SymTabEntrySize = + case WordSizeInBits of + 32 -> 16; + 64 -> 24 + end, + {?SHT_SYMTAB, 0, StrtabIndex, NumLocalSymbols, SymTabEntrySize}; + % SHT_STRTAB + <<".strtab">> -> + {3, 0, 0, 0, 0}; + % ARM attributes + <<".ARM.attributes">> -> + {?SHT_ARM_ATTRIBUTES, 0, 0, 0, 0}; + % .text section - executable code + <<".text">> -> + {?SHT_PROGBITS, ?SHF_ALLOC bor ?SHF_EXECINSTR, 0, 0, 0}; + % Debug sections and other progbits + _ -> + {?SHT_PROGBITS, 0, 0, 0, 0} + end, + + Header = << + % Name offset - always 32-bit + NameOffset:32/little, + % Type - always 32-bit + SectionType:32/little, + % Flags - 32/64 bit depending on word size + SectionFlags:WordSizeInBits/little, + % Address - 32/64 bit depending on word size + 0:WordSizeInBits/little, + % File offset - 32/64 bit depending on word size + FileOffset:WordSizeInBits/little, + % Size - 32/64 bit depending on word size + (byte_size(SectionData)):WordSizeInBits/little, + % Link - always 32-bit + Link:32/little, + % Info - always 32-bit + Info:32/little, + % Address align - 32/64 bit depending on word size + 1:WordSizeInBits/little, + % Entry size - 32/64 bit depending on word size + EntrySize:WordSizeInBits/little + >>, + <> + end, + <<>>, + lists:zip(lists:seq(1, length(Sections)), lists:zip(Sections, SectionOffsetsInOrder)) + ), + + % Create string table section header (index 7, the last section) + + % Calculate offset for ".shstrtab" + ShStrTabNameWithNull = <<".shstrtab", 0>>, + {ShStrTabNameOffset, _Length} = binary:match(ShStrTab, ShStrTabNameWithNull), + % First in offsets (ShStrTabOffset is added at the beginning) + ShStrTabFileOffset = lists:nth(1, SectionOffsets), + ShStrTabHeader = << + % Name offset - always 32-bit + ShStrTabNameOffset:32/little, + % Type - always 32-bit + ?SHT_STRTAB:32/little, + % Flags - 32/64 bit depending on word size + 0:WordSizeInBits/little, + % Address - 32/64 bit depending on word size + 0:WordSizeInBits/little, + % File offset - 32/64 bit depending on word size + ShStrTabFileOffset:WordSizeInBits/little, + % Size - 32/64 bit depending on word size + (byte_size(ShStrTab)):WordSizeInBits/little, + % Link - always 32-bit + 0:32/little, + % Info - always 32-bit + 0:32/little, + % Address align - 32/64 bit depending on word size + 1:WordSizeInBits/little, + % Entry size - 32/64 bit depending on word size + 0:WordSizeInBits/little + >>, + + <>. + +%% @doc Add .text section containing native code to existing debug-only ELF +%% @doc Create complete ELF with .text section and debug sections from the start +create_elf_with_text_and_debug_sections(Backend, DebugSections, NativeCode) -> + % Add .text section as the first section + TextSection = {<<".text">>, NativeCode}, + AllSections = [TextSection | DebugSections], + + % Calculate text section offset: it's the first section after the ELF header + WordSize = Backend:word_size(), + TextSectionOffset = + case WordSize of + % ELF64 header size + 8 -> 64; + % ELF32 header size + 4 -> 52 + end, + + % Create complete ELF with all sections + ElfBinary = create_elf_header_and_sections(Backend, AllSections), + + {ElfBinary, TextSectionOffset}. + +-endif. diff --git a/libs/jit/src/jit_dwarf.hrl b/libs/jit/src/jit_dwarf.hrl new file mode 100644 index 0000000000..4c071d5273 --- /dev/null +++ b/libs/jit/src/jit_dwarf.hrl @@ -0,0 +1,104 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +%% DWARF Tag constants +-define(DW_TAG_compile_unit, 16#11). +-define(DW_TAG_subprogram, 16#2e). +-define(DW_TAG_lexical_block, 16#0b). +-define(DW_TAG_label, 16#0a). +-define(DW_TAG_formal_parameter, 16#05). +-define(DW_TAG_pointer_type, 16#0f). +-define(DW_TAG_structure_type, 16#13). +-define(DW_TAG_member, 16#0d). +-define(DW_TAG_array_type, 16#01). +-define(DW_TAG_subrange_type, 16#21). +-define(DW_TAG_base_type, 16#24). + +%% DWARF Attribute constants +-define(DW_AT_name, 16#03). +-define(DW_AT_comp_dir, 16#1b). +-define(DW_AT_producer, 16#25). +-define(DW_AT_language, 16#13). +-define(DW_AT_low_pc, 16#11). +-define(DW_AT_high_pc, 16#12). +-define(DW_AT_stmt_list, 16#10). +-define(DW_AT_type, 16#49). +-define(DW_AT_data_member_location, 16#38). +-define(DW_AT_byte_size, 16#0b). +-define(DW_AT_encoding, 16#3e). +-define(DW_AT_location, 16#02). +-define(DW_AT_upper_bound, 16#2f). + +%% DWARF Form constants +-define(DW_FORM_string, 16#08). +-define(DW_FORM_addr, 16#01). +-define(DW_FORM_data4, 16#06). +-define(DW_FORM_data1, 16#0b). +-define(DW_FORM_udata, 16#0f). +-define(DW_FORM_ref4, 16#13). +-define(DW_FORM_sec_offset, 16#17). +-define(DW_FORM_exprloc, 16#18). + +%% DWARF Encoding constants +-define(DW_ATE_unsigned, 16#07). +-define(DW_ATE_signed, 16#05). + +%% DWARF Location expression opcodes +-define(DW_OP_reg0, 16#50). +-define(DW_OP_fbreg, 16#91). + +%% DWARF Language constants +-define(DW_LANG_C, 16#02). +-define(DW_LANG_Erlang, 16#46). +-define(DW_LANG_Elixir, 16#47). +-define(DW_LANG_Gleam, 16#48). + +%% ELF constants +-define(EI_MAG0, 16#7f). +-define(EI_MAG1, $E). +-define(EI_MAG2, $L). +-define(EI_MAG3, $F). +-define(ELFCLASS32, 1). +-define(ELFCLASS64, 2). +-define(ELFDATA2LSB, 1). +-define(EV_CURRENT, 1). +-define(ET_REL, 1). +-define(EM_ARM, 40). +-define(EM_X86_64, 62). +-define(EM_AARCH64, 183). +-define(EM_RISCV, 243). +-define(SHT_PROGBITS, 1). +-define(SHT_SYMTAB, 2). +-define(SHT_STRTAB, 3). +-define(SHT_ARM_ATTRIBUTES, 16#70000003). +-define(SHF_ALLOC, 2). +-define(SHF_EXECINSTR, 4). + +%% ARM EABI flags +-define(EF_ARM_EABI_VER5, 16#05000000). +-define(EF_ARM_ABI_FLOAT_SOFT, 16#00000200). +-define(EF_ARM_ARCH_V6M, 16#00000009). + +%% DWARF register numbers +%% These follow the DWARF register numbering conventions for each architecture +-define(DWARF_RDI_REG_X86_64, 5). % rdi register in x86_64 +-define(DWARF_X0_REG_AARCH64, 0). % x0 register in aarch64 +-define(DWARF_A0_REG_RISCV32, 10). % a0 register in RISC-V +-define(DWARF_R0_REG_ARMV6M, 0). % r0 register in ARM diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl index 151e470c54..bd0476f68d 100644 --- a/libs/jit/src/jit_precompile.erl +++ b/libs/jit/src/jit_precompile.erl @@ -19,19 +19,47 @@ % -module(jit_precompile). --export([start/0, compile/3, atom_resolver/1, type_resolver/1]). +-export([start/0, compile/4, atom_resolver/1, type_resolver/1]). -include_lib("jit.hrl"). +-include("compact_term.hrl"). + %% @doc Precompile BEAM files on command line start() -> - [Target, Dir | Files] = init:get_plain_arguments(), - lists:foreach(fun(File) -> compile(Target, Dir, File) end, Files). + [Target, Dir | Files0] = init:get_plain_arguments(), + {Files, Dwarf} = case Files0 of + ["-g" | FilesT] -> {FilesT, true}; + _ -> {Files0, true} + end, + lists:foreach(fun(File) -> compile(Target, Dir, Dwarf, File) end, Files). -compile(Target, Dir, Path) -> +%% @doc Parse target string to extract base architecture and requested variant +%% Examples: +%% "armv6m" -> {"armv6m", ?JIT_VARIANT_PIC} +%% "armv6m+float32" -> {"armv6m", ?JIT_VARIANT_PIC + ?JIT_VARIANT_FLOAT32} +%% "x86_64" -> {"x86_64", ?JIT_VARIANT_PIC} +parse_target(Target) -> + case string:split(Target, "+", all) of + [BaseTarget] -> + {BaseTarget, ?JIT_VARIANT_PIC}; + [BaseTarget | Variants] -> + RequestedVariant = lists:foldl( + fun(Variant, Acc) -> + case Variant of + "float32" -> Acc + ?JIT_VARIANT_FLOAT32 + end + end, + ?JIT_VARIANT_PIC, + Variants + ), + {BaseTarget, RequestedVariant} + end. + +compile(Target, Dir, Dwarf, Path) -> try {ok, InitialBinary} = file:read_file(Path), - {ok, _Module, InitialChunks} = beam_lib:all_chunks(InitialBinary), + {ok, Module, InitialChunks} = beam_lib:all_chunks(InitialBinary), FilteredChunks0 = lists:keydelete("avmN", 1, InitialChunks), FilteredChunks = lists:keydelete("Code", 1, FilteredChunks0), {"Code", CodeChunk} = lists:keyfind("Code", 1, InitialChunks), @@ -62,27 +90,66 @@ compile(Target, Dir, Path) -> end, TypeResolver = type_resolver(TypesChunk), - Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = - CodeChunk, + % Parse line table (Line chunk) for DWARF line information + LineResolver = + case lists:keyfind("Line", 1, InitialChunks) of + {"Line", LineTable} -> + fun(LineRef) -> resolve_line_info(Module, LineTable, LineRef) end; + false -> + io:format("LineResolver -- Line chunk not found\n"), + % No line table - return false + fun(_LineRef) -> false end + end, + + % Parse target to extract arch and variant + {BaseTarget, RequestedVariant} = parse_target(Target), + Backend = list_to_atom("jit_" ++ BaseTarget), Arch = - case Target of + case BaseTarget of "x86_64" -> ?JIT_ARCH_X86_64; "aarch64" -> ?JIT_ARCH_AARCH64; + "armv6m" -> ?JIT_ARCH_ARMV6M; + "riscv32" -> ?JIT_ARCH_RISCV32; _ -> error({unsupported_target, Target}) end, - Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC) - ), - Backend = list_to_atom("jit_" ++ Target), - Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = + CodeChunk, + <> = jit:beam_chunk_header(LabelsCount, Arch, RequestedVariant), + + Stream2 = case Dwarf of + true -> + Stream0 = jit_dwarf:new(Backend, Module, jit_stream_binary, 0, LineResolver), + Backend:new(RequestedVariant, jit_dwarf, Stream0); + false -> + Backend:new(RequestedVariant, jit_stream_binary, <>) + end, + {LabelsCount, Stream3} = jit:compile( CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 ), - NativeCode = Backend:stream(Stream3), - UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}], + + NewChunks = + case Dwarf of + true -> + DwarfStream = Backend:stream(Stream3), + NativeCode = jit_dwarf:stream(DwarfStream), + + case jit_dwarf:elf(DwarfStream, NativeCode) of + false -> + % No debug info - just store native code with info header + [{"avmN", <>}]; + {ok, TextSectionOffset, ELF} -> + % Update BEAM chunk header structure and combine with ELF. + EmbeddedElfChunk = update_avmn_chunk_with_elf(Info, ELF, TextSectionOffset), + [{"avmN", EmbeddedElfChunk}] + end; + false -> + [{"avmN", Backend:stream(Stream3)}] + end, + + UpdatedChunks = FilteredChunks ++ NewChunks, {ok, Binary} = beam_lib:build_module(UpdatedChunks), Basename = filename:basename(Path), UpdatedFile = filename:join(Dir, Basename), @@ -224,3 +291,133 @@ parse_extra(0, 0, 1, <>, LowerBound, UpperBound, parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Value + 1); parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Unit) -> {Rest, LowerBound, UpperBound, Unit}. + +%% @doc Update existing Info by updating offset +update_avmn_chunk_with_elf(Info, ElfBinary, TextSectionOffset) -> + % Parse Info to update the offset: LabelsCount + Version + ArchCount + NativeCodeArch + <> = Info, + + % Calculate new offset: from start of ELF to .text section + NewOffset = TextSectionOffset, + + % Create updated Info with new offset + UpdatedInfo = <>, + + % Build updated chunk: InfoSize + UpdatedInfo + ELF + <<(byte_size(UpdatedInfo)):32, UpdatedInfo/binary, ElfBinary/binary>>. + +%% @doc Resolve a line reference to filename and line number +resolve_line_info( + Module, + <>, + LineRef +) when Version =:= 0, LineRef > 0, LineRef =< NumRefs -> + resolve_line_info0(Module, 1, 0, LineRef, NumRefs, Rest, false); +resolve_line_info(_Module, <>, _) when Version =/= 0 -> + io:format("resolve_line_info -- unknown Line table version (~p)\n", [Version]), + false; +resolve_line_info( + _Module, + <<_Version:32, _Flags:32, _NumInstr:32, _NumRefs:32, _NumFilenames:32, _Rest/binary>>, + 0 +) -> + false; +resolve_line_info( + _Module, + <<_Version:32, _Flags:32, _NumInstr:32, NumRefs:32, _NumFilenames:32, _Rest/binary>>, + LineRef +) -> + io:format("resolve_line_info -- invalid lineref (~p) (NumRefs = ~p)\n", [LineRef, NumRefs]), + false. + +resolve_line_info0( + Module, CurrentLineRef, _CurrentLocationIx, _LineRef, NumRefs, LocationData, {Line, LocationIx} +) when CurrentLineRef > NumRefs -> + resolve_line_info1(Module, LocationIx, LocationData, Line); +resolve_line_info0( + Module, + LineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <<_:4, ?COMPACT_INTEGER:4, _/binary>> = Bin, + false +) -> + {Line, Rest} = jit:decode_value64(Bin), + resolve_line_info0( + Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx} + ); +resolve_line_info0( + Module, + CurrentLineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <<_:4, ?COMPACT_INTEGER:4, _/binary>> = Bin, + Acc +) -> + {_Line, Rest} = jit:decode_value64(Bin), + resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc); +resolve_line_info0( + Module, + LineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <>, + false +) -> + Line = (Val bsl 8) bor NextByte, + resolve_line_info0( + Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx} + ); +resolve_line_info0( + Module, + CurrentLineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <<_Val:3, ?COMPACT_LARGE_INTEGER_11BITS:5, _NextByte, Rest/binary>>, + Acc +) -> + resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc); +resolve_line_info0( + Module, + LineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <>, + false +) -> + resolve_line_info0( + Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx} + ); +resolve_line_info0( + Module, + CurrentLineRef, + CurrentLocationIx, + LineRef, + NumRefs, + <>, + Acc +) -> + resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc); +resolve_line_info0( + Module, + CurrentLineRef, + _CurrentLocationIx, + LineRef, + NumRefs, + <<_:4, AtomTag:4, _/binary>> = Bin, + Acc +) when AtomTag =:= ?COMPACT_LARGE_ATOM; AtomTag =:= ?COMPACT_ATOM -> + {NewLocationIx, Rest} = jit:decode_value64(Bin), + resolve_line_info0(Module, CurrentLineRef, NewLocationIx, LineRef, NumRefs, Rest, Acc). + +resolve_line_info1(Module, 0, _LocationData, Line) -> + {ok, <<(atom_to_binary(Module, utf8))/binary, ".erl">>, Line}; +resolve_line_info1(_Module, 1, <>, Line) -> + {ok, Filename, Line}; +resolve_line_info1(Module, N, <>, Line) -> + resolve_line_info1(Module, N - 1, Rest, Line). diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl new file mode 100644 index 0000000000..f27bc35e40 --- /dev/null +++ b/libs/jit/src/jit_riscv32.erl @@ -0,0 +1,3075 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32). + +-export([ + word_size/0, + new/3, + stream/1, + offset/1, + flush/1, + debugger/1, + used_regs/1, + available_regs/1, + free_native_registers/2, + assert_all_native_free/1, + jump_table/2, + update_branches/1, + call_primitive/3, + call_primitive_last/3, + call_primitive_with_cp/3, + return_if_not_equal_to_ctx/2, + jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, + if_block/3, + if_else_block/4, + shift_right/3, + shift_left/3, + move_to_vm_register/3, + move_to_native_register/2, + move_to_native_register/3, + move_to_cp/2, + move_array_element/4, + move_to_array_element/4, + move_to_array_element/5, + set_bs/2, + copy_to_native_register/2, + get_array_element/3, + increment_sp/2, + set_continuation_to_label/2, + set_continuation_to_offset/1, + continuation_entry_point/1, + get_module_index/1, + and_/3, + or_/3, + add/3, + sub/3, + mul/3, + decrement_reductions_and_maybe_schedule_next/1, + call_or_schedule_next/2, + call_only_or_schedule_next/2, + call_func_ptr/3, + return_labels_and_lines/2, + add_label/2, + add_label/3 +]). + +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2, + dwarf_ctx_register/0 +]). +-endif. + +-compile([warnings_as_errors]). + +-include_lib("jit.hrl"). + +-include("primitives.hrl"). + +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + +-define(ASSERT(Expr), true = Expr). + +%% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers). +%% a0-a1 are used for return values (a0 for 32-bit, a0-a1 for 64-bit returns). +%% s0-s11 are callee-saved registers (must be preserved across calls). +%% t0-t6 are caller-saved temporary registers. +%% sp is the stack pointer. +%% ra is the return address register. +%% zero (x0) is hardwired to constant 0. +%% This implementation uses RV32IMC (base + multiply/compressed extensions). +%% +%% See: RISC-V Calling Convention +%% https://riscv.org/wp-content/uploads/2024/12/riscv-calling.pdf +%% +%% Registers used by the JIT backend (RISC-V32): +%% - Argument/return: a0-a7 (up to 8 args in registers) +%% - Callee-saved: s0-s11 (must preserve) +%% - Temporaries: t0-t6 (caller-saved) +%% - Stack pointer: sp +%% - Return address: ra +%% - Zero register: zero (always 0) +%% - Available for JIT scratch: t0-t6 (7 temp registers) +%% +%% Note: RISC-V32 instructions are fixed 32-bit with uniform encoding, +%% allowing access to all 32 registers. +%% +%% For more details, refer to the RISC-V ILP32 Procedure Call Standard. + +-type riscv32_register() :: + a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | t0 + | t1 + | t2 + | t3 + | t4 + | t5 + | t6 + | s0 + | s1 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | sp + | ra. + +-define(IS_GPR(Reg), + (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse + Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= t0 orelse Reg =:= t1 orelse + Reg =:= t2 orelse Reg =:= t3 orelse Reg =:= t4 orelse Reg =:= t5 orelse Reg =:= t6 orelse + Reg =:= s0 orelse Reg =:= s1 orelse Reg =:= s2 orelse Reg =:= s3 orelse Reg =:= s4 orelse + Reg =:= s5 orelse Reg =:= s6 orelse Reg =:= s7 orelse Reg =:= s8 orelse Reg =:= s9 orelse + Reg =:= s10 orelse Reg =:= s11 orelse Reg =:= sp orelse Reg =:= ra) +). + +-type stream() :: any(). + +-record(state, { + stream_module :: module(), + stream :: stream(), + offset :: non_neg_integer(), + branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], + available_regs :: [riscv32_register()], + used_regs :: [riscv32_register()], + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() +}). + +-type state() :: #state{}. +-type immediate() :: non_neg_integer(). +-type vm_register() :: + {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}. +-type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}. +-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}. + +-type maybe_free_riscv32_register() :: + {free, riscv32_register()} | riscv32_register(). + +-type condition() :: + {riscv32_register(), '<', integer()} + | {maybe_free_riscv32_register(), '<', riscv32_register()} + | {maybe_free_riscv32_register(), '==', integer()} + | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(int)', maybe_free_riscv32_register(), '==', integer()} + | {'(int)', maybe_free_riscv32_register(), '!=', riscv32_register() | integer()} + | {'(bool)', maybe_free_riscv32_register(), '==', false} + | {'(bool)', maybe_free_riscv32_register(), '!=', false} + | {maybe_free_riscv32_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, riscv32_register()}, '==', {free, riscv32_register()}}. + +% Context offsets (32-bit architecture) +% ctx->e is 0x14 +% ctx->x is 0x18 +-define(CTX_REG, a0). +-define(NATIVE_INTERFACE_REG, a2). +-define(Y_REGS, {?CTX_REG, 16#14}). +-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}). +-define(CP, {?CTX_REG, 16#5C}). +-define(FP_REGS, {?CTX_REG, 16#60}). +-define(BS, {?CTX_REG, 16#64}). +-define(BS_OFFSET, {?CTX_REG, 16#68}). +% JITSTATE is in a1 register (no prolog, following aarch64 model) +-define(JITSTATE_REG, a1). +% Return address register (like LR in AArch64) +-define(RA_REG, ra). +-define(JITSTATE_MODULE_OFFSET, 0). +-define(JITSTATE_CONTINUATION_OFFSET, 16#4). +-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8). +-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}). +-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}). + +-define(JUMP_TABLE_ENTRY_SIZE, 8). + +%% RISC-V32 register mappings + +%% Use t3 as temporary for some operations +-define(IP_REG, t3). + +-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127). +-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000). +-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255). +-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000). +-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X), + is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000 +). + +%% RISC-V32 ILP32 ABI register allocation: +%% - a0: context pointer (reserved, passed as first parameter) +%% - a1-a5: available for parameters to native functions (up to 6 params) +%% - a2: native interface pointer (reserved) +%% - t0-t6: temporaries, caller-saved, available for JIT use +%% - s0-s11: callee-saved (would need to be saved/restored) +-define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]). +-define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]). +-define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]). + +-include("jit_backend_dwarf_impl.hrl"). + +%%----------------------------------------------------------------------------- +%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. +%% sizeof(uintptr_t) +%% +%% C code equivalent is: +%% #if UINTPTR_MAX == UINT32_MAX +%% #define TERM_BYTES 4 +%% #elif UINTPTR_MAX == UINT64_MAX +%% #define TERM_BYTES 8 +%% #else +%% #error "Term size must be either 32 bit or 64 bit." +%% #endif +%% +%% @end +%% @return Word size in bytes +%%----------------------------------------------------------------------------- +-spec word_size() -> 4 | 8. +word_size() -> 4. + +%%----------------------------------------------------------------------------- +%% @doc Create a new backend state for provided variant, module and stream. +%% @end +%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC) +%% @param StreamModule module to stream instructions +%% @param Stream stream state +%% @return New backend state +%%----------------------------------------------------------------------------- +-spec new(any(), module(), stream()) -> state(). +new(Variant, StreamModule, Stream) -> + #state{ + stream_module = StreamModule, + stream = Stream, + branches = [], + offset = StreamModule:offset(Stream), + available_regs = ?AVAILABLE_REGS, + used_regs = [], + labels = [], + variant = Variant + }. + +%%----------------------------------------------------------------------------- +%% @doc Access the stream object. +%% @end +%% @param State current backend state +%% @return The stream object +%%----------------------------------------------------------------------------- +-spec stream(state()) -> stream(). +stream(#state{stream = Stream}) -> + Stream. + +%%----------------------------------------------------------------------------- +%% @doc Get the current offset in the stream +%% @end +%% @param State current backend state +%% @return The current offset +%%----------------------------------------------------------------------------- +-spec offset(state()) -> non_neg_integer(). +offset(#state{stream_module = StreamModule, stream = Stream}) -> + StreamModule:offset(Stream). + +%%----------------------------------------------------------------------------- +%% @doc Flush the stream. +%% @end +%% @param State current backend state +%% @return The new state +%%----------------------------------------------------------------------------- +-spec flush(state()) -> stream(). +flush(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:flush(Stream0), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a debugger of breakpoint instruction. This is used for debugging +%% and not in production. +%% @end +%% @param State current backend state +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec debugger(state()) -> state(). +debugger(#state{stream_module = StreamModule, stream = Stream0} = State) -> + Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:c_ebreak()), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently used native registers. This is used for +%% debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of used registers +%%----------------------------------------------------------------------------- +-spec used_regs(state()) -> [riscv32_register()]. +used_regs(#state{used_regs = Used}) -> Used. + +%%----------------------------------------------------------------------------- +%% @doc Return the list of currently available native scratch registers. This +%% is used for debugging and not in production. +%% @end +%% @param State current backend state +%% @return The list of available registers +%%----------------------------------------------------------------------------- +-spec available_regs(state()) -> [riscv32_register()]. +available_regs(#state{available_regs = Available}) -> Available. + +%%----------------------------------------------------------------------------- +%% @doc Free native registers. The passed list of registers can contain +%% registers, pointer to registers or other values that are ignored. +%% @end +%% @param State current backend state +%% @param Regs list of registers or other values +%% @return The updated backend state +%%----------------------------------------------------------------------------- +-spec free_native_registers(state(), [value()]) -> state(). +free_native_registers(State, []) -> + State; +free_native_registers(State, [Reg | Rest]) -> + State1 = free_native_register(State, Reg), + free_native_registers(State1, Rest). + +-spec free_native_register(state(), value()) -> state(). +free_native_register( + #state{available_regs = Available0, used_regs = Used0} = State, + Reg +) when + is_atom(Reg) +-> + {Available1, Used1} = free_reg(Available0, Used0, Reg), + State#state{available_regs = Available1, used_regs = Used1}; +free_native_register(State, {ptr, Reg}) -> + free_native_register(State, Reg); +free_native_register(State, _Other) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Assert that all native scratch registers are available. This is used +%% for debugging and not in production. +%% @end +%% @param State current backend state +%% @return ok +%%----------------------------------------------------------------------------- +-spec assert_all_native_free(state()) -> ok. +assert_all_native_free(#state{ + available_regs = ?AVAILABLE_REGS, used_regs = [] +}) -> + ok. + +%%----------------------------------------------------------------------------- +%% @doc Emit the jump table at the beginning of the module. Branches will be +%% updated afterwards with update_branches/2. Emit branches for labels from +%% 0 (special entry for lines and labels information) to LabelsCount included +%% (special entry for OP_INT_CALL_END). +%% +%% On this platform, each jump table entry is 12 bytes. +%% ``` +%% ldr a3, pc+4 +%% push {a1, r4, r5, r6, r7, lr} +%% add pc, pc, a3 +%% nop() +%% offset_to_label0 +%% ``` +%% +%% @end +%% @param State current backend state +%% @param LabelsCount number of labels in the module. +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec jump_table(state(), pos_integer()) -> state(). +jump_table(State, LabelsCount) -> + jump_table0(State, 0, LabelsCount). + +jump_table0(State, N, LabelsCount) when N > LabelsCount -> + State; +jump_table0( + #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State, + N, + LabelsCount +) -> + % Create jump table entry: AUIPC + JALR (8 bytes total) + % This will be patched later in update_branches/2 + Offset = StreamModule:offset(Stream0), + JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + Stream1 = StreamModule:append(Stream0, JumpEntry), + + % Record both AUIPC and JALR offsets for patching + Reloc = {N, Offset, jump_table_auipc_jalr}, + UpdatedState = State#state{stream = Stream1, branches = [Reloc | Branches]}, + + jump_table0(UpdatedState, N + 1, LabelsCount). + +%%----------------------------------------------------------------------------- +%% @doc Rewrite stream to update all branches for labels. +%% @end +%% @param State current backend state +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec update_branches(state()) -> state(). +update_branches(#state{branches = []} = State) -> + State; +update_branches( + #state{ + stream_module = StreamModule, + stream = Stream0, + branches = [{Label, Offset, Type} | BranchesT], + labels = Labels + } = State +) -> + {Label, LabelOffset} = lists:keyfind(Label, 1, Labels), + Rel = LabelOffset - Offset, + NewInstr = + case Type of + {adr, Reg} when Rel rem 4 =:= 0 -> + % Generate pc_relative_address and pad to 8 bytes with NOP + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {adr, Reg} when Rel rem 4 =:= 2; Rel rem 4 =:= -2 -> + % Handle 2-byte aligned offsets and pad to 8 bytes + % Handle both positive and negative offsets (Erlang rem can be negative) + I = pc_relative_address(Reg, Rel), + case byte_size(I) of + 4 -> <>; + 6 -> <>; + 8 -> I + end; + {far_branch, TempReg} -> + % Check if branch can now be optimized to near branch + if + Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 -> + % RISC-V jal has ±1MB range + % Optimize to near branch: jal + nops to fill original size + DirectBranch = jit_riscv32_asm:jal(zero, Rel), + case byte_size(DirectBranch) of + 2 -> + <>; + 4 -> + <> + end; + true -> + % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes) + % Split the relative offset into upper 20 bits and lower 12 bits + Hi20 = (Rel + 16#800) bsr 12, + Lo12 = Rel - (Hi20 bsl 12), + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + Entry = <>, + case byte_size(Entry) of + 6 -> <>; + 8 -> Entry + end + end; + jump_table_auipc_jalr -> + % Calculate PC-relative offset from AUIPC instruction to target + % AUIPC is at Offset, JALR is at Offset+4 + % Target is at LabelOffset + % Offset from AUIPC PC to target + PCRelOffset = LabelOffset - Offset, + + % Split into upper 20 bits and lower 12 bits + % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12) + % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper + Upper20 = (PCRelOffset + 16#800) bsr 12, + Lower12 = PCRelOffset band 16#FFF, + % Sign-extend lower 12 bits for JALR immediate + Lower12Signed = + if + Lower12 >= 16#800 -> Lower12 - 16#1000; + true -> Lower12 + end, + + % Encode AUIPC and JALR with computed offsets + I1 = jit_riscv32_asm:auipc(a3, Upper20), + I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed), + % Map to 8 bytes + JumpTableEntry = <>, + case byte_size(JumpTableEntry) of + 6 -> <>; + 8 -> JumpTableEntry + end + end, + Stream1 = StreamModule:replace(Stream0, Offset, NewInstr), + update_branches(State#state{stream = Stream1, branches = BranchesT}). + +%%----------------------------------------------------------------------------- +%% @doc Generate code to load a primitive function pointer into a register +%% @param Primitive index to the primitive to call +%% @param TargetReg register to load the function pointer into +%% @return Binary instruction sequence +%%----------------------------------------------------------------------------- +-spec load_primitive_ptr(non_neg_integer(), riscv32_register()) -> binary(). +load_primitive_ptr(Primitive, TargetReg) -> + case Primitive of + 0 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, 0); + N when N * 4 =< 124 -> + jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, N * 4); + N when N * 4 < 256 -> + % Can encode N * 4 directly in li instruction + I1 = jit_riscv32_asm:li(TargetReg, N * 4), + I2 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I3 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <>; + N -> + % For very large primitive numbers, load N and shift left by 2 (multiply by 4) + I1 = jit_riscv32_asm:li(TargetReg, N), + I2 = jit_riscv32_asm:slli(TargetReg, TargetReg, 2), + I3 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG), + I4 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0), + <> + end. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call (call with return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. It also saves scratch registers we need to preserve. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), riscv32_register()}. +call_primitive( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [TempReg | RestRegs], + used_regs = UsedRegs + } = State, + Primitive, + Args +) -> + % Use a low register for LDR since ARM Thumb LDR only works with low registers + PrepCall = load_primitive_ptr(Primitive, TempReg), + Stream1 = StreamModule:append(Stream0, PrepCall), + StateCall = State#state{ + stream = Stream1, + available_regs = RestRegs, + used_regs = [TempReg | UsedRegs] + }, + call_func_ptr(StateCall, {free, TempReg}, Args); +call_primitive( + #state{available_regs = []} = State, + Primitive, + Args +) -> + call_func_ptr(State, {primitive, Primitive}, Args). + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump (call without return) to a primitive with arguments. This +%% function converts arguments and pass them following the backend ABI +%% convention. +%% @end +%% @param State current backend state +%% @param Primitive index to the primitive to call +%% @param Args arguments to pass to the primitive +%% @return Updated backend state +%%----------------------------------------------------------------------------- +call_primitive_last( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + Primitive, + Args +) -> + % We need a register for the function pointer that should not be used as a parameter + % Since we're not returning, we can use all scratch registers except + % registers used for parameters + ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)), + ArgsRegs = args_regs(Args), + ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs, + [Temp | AvailableRegs1] = ScratchRegs, + UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1, + PrepCall = load_primitive_ptr(Primitive, Temp), + Stream1 = StreamModule:append(Stream0, PrepCall), + + State1 = State0#state{ + stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs + }, + + % Preprocess offset special arg + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + % In RISC-V, all up to 8 arguments fit in registers (a0-a7) + % Always use tail call when calling primitives in tail position + State4 = + case Args1 of + [FirstArg, jit_state | ArgsT] -> + % Use tail call + ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT], + State2 = set_registers_args(State1, ArgsForTailCall, 0), + tail_call_with_jit_state_registers_only(State2, Temp) + end, + State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}. + +%%----------------------------------------------------------------------------- +%% @doc Tail call to address in register. +%% RA is preserved across regular calls (call_func_ptr saves/restores it), +%% so when the called C primitive returns, it returns to opcodesswitch.h. +%% @end +%% @param State current backend state +%% @param Reg register containing the target address +%% @return Updated backend state +%%----------------------------------------------------------------------------- +tail_call_with_jit_state_registers_only( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + Reg +) -> + % Jump to address in register (tail call) + I1 = jit_riscv32_asm:jr(Reg), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a return of a value if it's not equal to ctx. +%% This logic is used to break out to the scheduler, typically after signal +%% messages have been processed. +%% @end +%% @param State current backend state +%% @param Reg register to compare to (should be {free, Reg} as it's always freed) +%% @return Updated backend state +%%----------------------------------------------------------------------------- +return_if_not_equal_to_ctx( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + {free, Reg} +) -> + % RISC-V doesn't have a separate cmp instruction, use beq directly + I2 = + case Reg of + % Return value is already in a0 + a0 -> <<>>; + % Move to a0 (return register) + _ -> jit_riscv32_asm:mv(a0, Reg) + end, + I3 = jit_riscv32_asm:ret(), + % Branch if equal (skip the return) + % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3 + I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)), + Stream1 = StreamModule:append(Stream0, <>), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, Reg + ), + State#state{ + stream = Stream1, + available_regs = AvailableRegs1, + used_regs = UsedRegs1 + }. + +%%----------------------------------------------------------------------------- +%% @doc Emit a jump to a label. The offset of the relocation is saved and will +%% be updated with `update_branches/2`. +%% @end +%% @param State current backend state +%% @param Label to jump to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_label( + #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label +) -> + LabelLookupResult = lists:keyfind(Label, 1, Labels), + Offset = StreamModule:offset(Stream0), + {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State1#state{stream = Stream1}. + +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + CodeBlock = branch_to_offset_code(State, Offset, TargetOffset), + Stream1 = StreamModule:append(Stream0, CodeBlock), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Jump to address in continuation pointer register +%% Calculate absolute address and jump to it. +%% @end +%% @param State current backend state +%% @param {free, OffsetReg} register containing the offset value +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + offset = BaseOffset + } = State0, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset, + + % Get native code base address into temporary register + I1 = pc_relative_address(Temp, NetOffset), + % Add target offset to get final absolute address + I2 = jit_riscv32_asm:add(Temp, Temp, OffsetReg), + % Indirect branch to the calculated absolute address + I3 = jit_riscv32_asm:jr(Temp), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + +branch_to_offset_code(_State, Offset, TargetOffset) when + TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044 +-> + % Near branch: use direct J instruction + Rel = TargetOffset - Offset, + jit_riscv32_asm:j(Rel); +branch_to_offset_code( + #state{available_regs = [TempReg | _]}, Offset, TargetOffset +) -> + % Far branch: use auipc + jalr sequence for PC-relative addressing + % This computes: PC + Immediate and jumps to it + + Rel = TargetOffset - Offset, + % Split the relative offset into upper 20 bits and lower 12 bits + % RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12) + % Since jalr's imm12 is sign-extended, if bit 11 of Rel is set, + % we need to add 0x800 before splitting to compensate + Hi20 = (Rel + 16#800) bsr 12, + Lo12Unsigned = Rel band 16#FFF, + % Convert to signed 12-bit value: if bit 11 is set, subtract 4096 + Lo12 = + if + Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000; + true -> Lo12Unsigned + end, + + % TempReg = PC + (Hi20 << 12) + I1 = jit_riscv32_asm:auipc(TempReg, Hi20), + % Jump to TempReg + sign_extend(Lo12) + I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12), + <>. + +branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) -> + CodeBlock = branch_to_offset_code(State, Offset, LabelOffset), + {State, CodeBlock}; +branch_to_label_code( + #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes) + + % Placeholder: auipc TempReg, 0 + % Placeholder: jalr zero, TempReg, 0 + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, TempReg}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code( + #state{available_regs = [], branches = Branches} = State0, Offset, Label, false +) -> + % RISC-V: Use t6 as scratch (caller-saved, safe to clobber) + % Far branch sequence using PC-relative auipc + jalr (8 bytes) + + % Placeholder: auipc t6, 0 + % Placeholder: jalr zero, t6, 0 + CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>, + % Add relocation entry + Reloc = {Label, Offset, {far_branch, t6}}, + State1 = State0#state{branches = [Reloc | Branches]}, + {State1, CodeBlock}; +branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) -> + error({no_available_registers, _LabelLookup}). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally +%% execute a block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockFn function to emit the block that may be executed +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state(). +if_block( + #state{stream_module = StreamModule} = State0, + {'and', CondList}, + BlockFn +) -> + {Replacements, State1} = lists:foldl( + fun(Cond, {AccReplacements, AccState}) -> + Offset = StreamModule:offset(AccState#state.stream), + {NewAccState, BranchInfo, ReplaceDelta} = if_block_cond(AccState, Cond), + {[{Offset + ReplaceDelta, BranchInfo} | AccReplacements], NewAccState} + end, + {[], State0}, + CondList + ), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + Stream3 = lists:foldl( + fun({ReplacementOffset, {BranchFunc, Reg, Operand}}, AccStream) -> + BranchOffset = OffsetAfter - ReplacementOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr) + end, + Stream2, + Replacements + ), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs); +if_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + State2 = BlockFn(State1), + Stream2 = State2#state.stream, + OffsetAfter = StreamModule:offset(Stream2), + %% Patch the conditional branch instruction to jump to the end of the block + BranchInstrOffset = Offset + BranchInstrDelta, + BranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]), + Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr), + merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs). + +%%----------------------------------------------------------------------------- +%% @doc Emit an if else block, i.e. emit a test of a condition and +%% conditionnally execute a block or another block. +%% @end +%% @param State current backend state +%% @param Cond condition to test +%% @param BlockTrueFn function to emit the block that is executed if condition is true +%% @param BlockFalseFn function to emit the block that is executed if condition is false +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) -> + state(). +if_else_block( + #state{stream_module = StreamModule, stream = Stream0} = State0, + Cond, + BlockTrueFn, + BlockFalseFn +) -> + Offset = StreamModule:offset(Stream0), + {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond), + BranchInstrOffset = Offset + BranchInstrDelta, + State2 = BlockTrueFn(State1), + Stream2 = State2#state.stream, + %% Emit unconditional branch to skip the else block (will be replaced) + ElseJumpOffset = StreamModule:offset(Stream2), + ElseJumpInstr = jit_riscv32_asm:j(0), + Stream3 = StreamModule:append(Stream2, ElseJumpInstr), + %% Else block starts here. + OffsetAfter = StreamModule:offset(Stream3), + %% Patch the conditional branch to jump to the else block + ElseBranchOffset = OffsetAfter - BranchInstrOffset, + NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, ElseBranchOffset]), + Stream4 = StreamModule:replace(Stream3, BranchInstrOffset, NewBranchInstr), + %% Build the else block + StateElse = State2#state{ + stream = Stream4, + used_regs = State1#state.used_regs, + available_regs = State1#state.available_regs + }, + State3 = BlockFalseFn(StateElse), + Stream5 = State3#state.stream, + OffsetFinal = StreamModule:offset(Stream5), + %% Patch the unconditional branch to jump to the end + FinalJumpOffset = OffsetFinal - ElseJumpOffset, + NewElseJumpInstr = jit_riscv32_asm:j(FinalJumpOffset), + Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr), + merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs). + +-spec if_block_cond(state(), condition()) -> + { + state(), + {beq | bne | blt | bge, atom(), atom() | integer()}, + non_neg_integer() + }. +if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) -> + %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0) + BranchInstr = jit_riscv32_asm:bge(Reg, zero, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + {State1, {bge, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 -> + % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than) + % Load immediate into a temp register for comparison + [Temp | _] = State0#state.available_regs, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {Reg, '<', Val} +) when is_atom(Reg), is_integer(Val) -> + % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than) + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '<', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than) + BranchInstr = jit_riscv32_asm:bge(Reg, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bge, Reg, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0) + BranchInstr = jit_riscv32_asm:bne(Reg, zero, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, zero}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '==', RegB} +) when is_atom(RegB) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(Reg, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {bne, Reg, RegB}, 0}; +%% Delegate (int) forms to regular forms since we only have 32-bit words +if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) -> + if_block_cond(State, {RegOrTuple, '==', 0}); +if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '==', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then beq Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {RegOrTuple, '!=', Val} +) when ?IS_GPR(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal) + BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream1}, + {State2, {beq, Reg, Val}, 0}; +if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) -> + if_block_cond(State, {RegOrTuple, '!=', Val}); +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Load immediate into temp, then bne Reg, Temp, offset + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0} = State0, + {{free, RegA}, '==', {free, RegB}} +) -> + %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(RegA, RegB, 0), + Stream1 = StreamModule:append(Stream0, BranchInstr), + State1 = State0#state{stream = Stream1}, + State2 = if_block_free_reg({free, RegA}, State1), + State3 = if_block_free_reg({free, RegB}, State2), + {State3, {bne, RegA, RegB}, 0}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '==', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal) + BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {bne, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + {RegOrTuple, '!=', Val} +) when is_integer(Val) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + OffsetBefore = StreamModule:offset(Stream0), + State1 = mov_immediate(State0, Temp, Val), + Stream1 = State1#state.stream, + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal) + BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = if_block_free_reg(RegOrTuple, State1), + State3 = State2#state{stream = Stream2}, + {State3, {beq, Reg, Temp}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '==', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = jit_riscv32_asm:blt(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {blt, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {'(bool)', RegOrTuple, '!=', false} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true) + I1 = jit_riscv32_asm:slli(Temp, Reg, 31), + Stream1 = StreamModule:append(Stream0, I1), + BranchInstr = jit_riscv32_asm:bge(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {bge, Temp, zero}, byte_size(I1)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {RegOrTuple, '&', Val, '!=', 0} +) -> + Reg = + case RegOrTuple of + {free, Reg0} -> Reg0; + RegOrTuple -> RegOrTuple + end, + %% RISC-V: Test bits using ANDI or li+and + TestCode = + if + Val >= -2048 andalso Val =< 2047 -> + %% Can use ANDI instruction directly + jit_riscv32_asm:andi(Temp, Reg, Val); + true -> + %% Need to load immediate into temp register first + TestCode0 = jit_riscv32_asm:li(Temp, Val), + TestCode1 = jit_riscv32_asm:and_(Temp, Reg, Temp), + <> + end, + OffsetBefore = StreamModule:offset(Stream0), + Stream1 = StreamModule:append(Stream0, TestCode), + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + %% Branch if result is zero (no bits set, NOT != 0) + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = if_block_free_reg(RegOrTuple, State0), + State2 = State1#state{stream = Stream2}, + {State2, {beq, Temp, zero}, BranchDelta}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + {Reg, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Temp, Reg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State0, + {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F} +) when ?IS_GPR(Reg) -> + %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG + I1 = jit_riscv32_asm:not_(Reg, Reg), + I2 = jit_riscv32_asm:slli(Reg, Reg, 28), + Stream1 = StreamModule:append(Stream0, <>), + BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State1 = State0#state{stream = Stream2}, + State2 = if_block_free_reg(RegTuple, State1), + {State2, {beq, Reg, zero}, byte_size(I1) + byte_size(I2)}; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] + } = State0, + {Reg, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + I1 = jit_riscv32_asm:mv(Temp, Reg), + Stream1 = StreamModule:append(Stream0, I1), + State1 = State0#state{stream = Stream1}, + State2 = and_(State1#state{available_regs = AT}, Temp, Mask), + Stream2 = State2#state.stream, + %% Compare Temp with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, Val, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{ + stream = Stream3, available_regs = [Temp | State2#state.available_regs] + }, + {State3, {beq, Temp, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need second temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT2] = AT, + State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val), + Stream3 = State3#state.stream, + BranchDelta = StreamModule:offset(Stream3) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Temp, MaskReg, 0), + Stream4 = StreamModule:append(Stream3, BranchInstr), + State4 = State3#state{ + stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs] + }, + {State4, {beq, Temp, MaskReg}, BranchDelta} + end; +if_block_cond( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailRegs + } = State0, + {{free, Reg} = RegTuple, '&', Mask, '!=', Val} +) when ?IS_GPR(Reg) -> + %% RISC-V: AND with mask, then compare with value + OffsetBefore = StreamModule:offset(Stream0), + State1 = and_(State0, Reg, Mask), + Stream1 = State1#state.stream, + %% Compare Reg with Val and branch if equal (NOT != Val) + case Val of + 0 -> + %% Optimize comparison with zero + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, zero}, BranchDelta}; + _ when ?IS_GPR(Val) -> + %% Val is a register + BranchDelta = StreamModule:offset(Stream1) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0), + Stream2 = StreamModule:append(Stream1, BranchInstr), + State2 = State1#state{stream = Stream2}, + State3 = if_block_free_reg(RegTuple, State2), + {State3, {beq, Reg, Val}, BranchDelta}; + _ -> + %% Val is an immediate - need temp register + %% Reuse the mask register for the comparison value + [MaskReg | AT] = State1#state.available_regs, + State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val), + Stream2 = State2#state.stream, + BranchDelta = StreamModule:offset(Stream2) - OffsetBefore, + BranchInstr = jit_riscv32_asm:beq(Reg, MaskReg, 0), + Stream3 = StreamModule:append(Stream2, BranchInstr), + State3 = State2#state{stream = Stream3, available_regs = AvailRegs}, + State4 = if_block_free_reg(RegTuple, State3), + {State4, {beq, Reg, MaskReg}, BranchDelta} + end. + +-spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state(). +if_block_free_reg({free, Reg}, State0) -> + #state{available_regs = AvR0, used_regs = UR0} = State0, + {AvR1, UR1} = free_reg(AvR0, UR0, Reg), + State0#state{ + available_regs = AvR1, + used_regs = UR1 + }; +if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) -> + State0. + +-spec merge_used_regs(state(), [riscv32_register()]) -> state(). +merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [ + Reg | T +]) -> + case lists:member(Reg, UR0) of + true -> + merge_used_regs(State, T); + false -> + AvR1 = lists:delete(Reg, AvR0), + UR1 = [Reg | UR0], + merge_used_regs( + State#state{used_regs = UR1, available_regs = AvR1}, T + ) + end; +merge_used_regs(State, []) -> + State. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register right by a fixed number of bits, effectively +%% dividing it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +-spec shift_right(#state{}, maybe_free_riscv32_register(), non_neg_integer()) -> + {#state{}, riscv32_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ResultReg | T], + used_regs = UR + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I = jit_riscv32_asm:srli(ResultReg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a shift register left by a fixed number of bits, effectively +%% multiplying it by 2^Shift +%% @param State current state +%% @param Reg register to shift +%% @param Shift number of bits to shift +%% @return new state +%%----------------------------------------------------------------------------- +shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when + is_atom(Reg) +-> + I = jit_riscv32_asm:slli(Reg, Reg, Shift), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a call to a function pointer with arguments. This function converts +%% arguments and passes them following the backend ABI convention. +%% @end +%% @param State current backend state +%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex} +%% @param Args arguments to pass to the function +%% @return Updated backend state and return register +%%----------------------------------------------------------------------------- +-spec call_func_ptr(state(), {free, riscv32_register()} | {primitive, non_neg_integer()}, [arg()]) -> + {state(), riscv32_register()}. +call_func_ptr( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State0, + FuncPtrTuple, + Args +) -> + FreeRegs = lists:flatmap( + fun + ({free, {ptr, Reg}}) -> [Reg]; + ({free, Reg}) when is_atom(Reg) -> [Reg]; + (_) -> [] + end, + [FuncPtrTuple | Args] + ), + UsedRegs1 = UsedRegs0 -- FreeRegs, + % Save RA (like AArch64 saves LR) so it's preserved across jalr calls + SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1], + + % Calculate available registers + FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS), + AvailableRegs1 = FreeGPRegs ++ AvailableRegs0, + + % Calculate stack space: round up to 16-byte boundary for RISC-V ABI + NumRegs = length(SavedRegs), + StackBytes = NumRegs * 4, + AlignedStackBytes = ((StackBytes + 15) div 16) * 16, + + Stream1 = push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0), + + % Set up arguments following RISC-V ILP32 calling convention + % Arguments are passed in a0-a7 (up to 8 register arguments) + Args1 = lists:map( + fun(Arg) -> + case Arg of + offset -> StreamModule:offset(Stream1); + _ -> Arg + end + end, + Args + ), + + RegArgs0 = Args1, + RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0), + + % We pushed registers to stack, so we can use these registers we saved + % and the currently available registers + SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0, + State1 = State0#state{ + available_regs = SetArgsRegsOnlyAvailableArgs, + used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs, + stream = Stream1 + }, + + ParameterRegs = parameter_regs(RegArgs0), + {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} = + case FuncPtrTuple of + {free, FuncPtrReg0} -> + % If FuncPtrReg is in parameter regs, we must swap it with a free reg. + case lists:member(FuncPtrReg0, ParameterRegs) of + true -> + case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of + [] -> + % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0 + % that is not in ParameterRegs + [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs, + [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs, + MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1), + MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg), + { + StreamModule:append( + State1#state.stream, <> + ), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs1 + }; + [FuncPtrReg1 | _] -> + MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0), + SetArgsAvailableArgs1 = + (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++ + [FuncPtrReg0], + { + StreamModule:append(State1#state.stream, MovInstr), + SetArgsAvailableArgs1, + FuncPtrReg1, + RegArgs0 + } + end; + false -> + SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0} + end; + {primitive, Primitive} -> + [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs, + SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0], + PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0), + Stream2 = StreamModule:append(State1#state.stream, PrepCall), + {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0} + end, + + State3 = State1#state{ + available_regs = SetArgsAvailableRegs, + used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs, + stream = Stream3 + }, + + StackOffset = AlignedStackBytes, + State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset), + Stream4 = State4#state.stream, + + % Call the function pointer (using JALR for call with return) + Call = jit_riscv32_asm:jalr(ra, FuncPtrReg, 0), + Stream5 = StreamModule:append(Stream4, Call), + + % For result, we need a free register (including FuncPtrReg). + % If none are available (all registers were pushed to the stack), + % we write the result to the stack position of FuncPtrReg + {Stream6, UsedRegs2} = + case length(SavedRegs) of + N when N >= 7 andalso element(1, FuncPtrTuple) =:= free -> + % We use original FuncPtrReg then as we know it's available. + % Calculate stack offset: find register index in SavedRegs * 4 bytes + ResultReg = element(2, FuncPtrTuple), + RegIndex = index_of(ResultReg, SavedRegs), + case RegIndex >= 0 of + true -> + StoreResultStackOffset = RegIndex * 4, + StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset), + {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]}; + false -> + % FuncPtrReg was not in SavedRegs, use an available register + [ResultReg1 | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg1, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]} + end; + _ -> + % Use any free that is not in SavedRegs + [ResultReg | _] = AvailableRegs1 -- SavedRegs, + MoveResult = jit_riscv32_asm:mv(ResultReg, a0), + {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]} + end, + + Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6), + + AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1), + AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2), + { + State4#state{ + stream = Stream8, + available_regs = AvailableRegs3, + used_regs = UsedRegs2 + }, + ResultReg + }. + +arg_to_reg_list({free, {ptr, Reg}}) -> [Reg]; +arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg]; +arg_to_reg_list(_) -> []. + +index_of(Item, List) -> index_of(Item, List, 0). + +index_of(_, [], _) -> -1; +index_of(Item, [Item | _], Index) -> Index; +index_of(Item, [_ | Rest], Index) -> index_of(Item, Rest, Index + 1). + +push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: addi sp, sp, -AlignedStackBytes then sw reg, offset(sp) for each reg + StackAdjust = jit_riscv32_asm:addi(sp, sp, -AlignedStackBytes), + Stream1 = StreamModule:append(Stream0, StackAdjust), + {Stream2, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Store = jit_riscv32_asm:sw(sp, Reg, Offset), + {StreamModule:append(StreamAcc, Store), Offset + 4} + end, + {Stream1, 0}, + SavedRegs + ), + Stream2; +push_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 -> + % RISC-V: lw reg, offset(sp) for each reg then addi sp, sp, AlignedStackBytes + {Stream1, _} = lists:foldl( + fun(Reg, {StreamAcc, Offset}) -> + Load = jit_riscv32_asm:lw(Reg, sp, Offset), + {StreamModule:append(StreamAcc, Load), Offset + 4} + end, + {Stream0, 0}, + SavedRegs + ), + StackAdjust = jit_riscv32_asm:addi(sp, sp, AlignedStackBytes), + StreamModule:append(Stream1, StackAdjust); +pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) -> + Stream0. + +set_registers_args(State0, Args, StackOffset) -> + ParamRegs = parameter_regs(Args), + set_registers_args(State0, Args, ParamRegs, StackOffset). + +set_registers_args( + #state{used_regs = UsedRegs} = State0, + Args, + ParamRegs, + StackOffset +) -> + ArgsRegs = args_regs(Args), + AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs, + State1 = set_registers_args0( + State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset + ), + Stream1 = State1#state.stream, + NewUsedRegs = lists:foldl( + fun + ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed); + ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed); + (_, AccUsed) -> AccUsed + end, + UsedRegs, + Args + ), + State1#state{ + stream = Stream1, + available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs, + used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs) + }. + +parameter_regs(Args) -> + parameter_regs0(Args, ?PARAMETER_REGS, []). + +% ILP32: 64-bit arguments require double-word alignment (even register number) +parameter_regs0([], _, Acc) -> + lists:reverse(Acc); +parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) -> + parameter_regs0(T, Rest, [a1, a0 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) -> + parameter_regs0(T, Rest, [a2, a1 | Acc]); +parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) -> + parameter_regs0(T, Rest, [a3, a2 | Acc]); +parameter_regs0([_Other | T], [Reg | Rest], Acc) -> + parameter_regs0(T, Rest, [Reg | Acc]). + +replace_reg(Args, Reg1, Reg2) -> + replace_reg0(Args, Reg1, Reg2, []). + +replace_reg0([Reg | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) -> + lists:reverse(Acc, [Replacement | T]); +replace_reg0([Other | T], Reg, Replacement, Acc) -> + replace_reg0(T, Reg, Replacement, [Other | Acc]). + +set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) -> + State; +set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) -> + set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +% Handle 64-bit arguments that need two registers according to ILP32 +set_registers_args0( + State, + [{avm_int64_t, Value} | ArgsT], + ArgsRegs, + ParamRegs, + AvailGP, + StackOffset +) when is_integer(Value) -> + LowPartUnsigned = Value band 16#FFFFFFFF, + HighPartUnsigned = (Value bsr 32) band 16#FFFFFFFF, + % Convert to signed 32-bit values for RISC-V li instruction + LowPart = + if + LowPartUnsigned > 16#7FFFFFFF -> LowPartUnsigned - 16#100000000; + true -> LowPartUnsigned + end, + HighPart = + if + HighPartUnsigned > 16#7FFFFFFF -> HighPartUnsigned - 16#100000000; + true -> HighPartUnsigned + end, + set_registers_args0( + State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset + ); +% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't +% want to replace it +set_registers_args0( + State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset +) -> + false = lists:member(?CTX_REG, ArgsRegs), + State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset); +set_registers_args0( + #state{stream_module = StreamModule} = State0, + [Arg | ArgsT], + [_ArgReg | ArgsRegsT], + [ParamReg | ParamRegsT], + AvailGP, + StackOffset +) -> + case lists:member(ParamReg, ArgsRegsT) of + false -> + State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset), + set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset); + true -> + [Avail | AvailGPT] = AvailGP, + I = jit_riscv32_asm:mv(Avail, ParamReg), + Stream1 = StreamModule:append(State0#state.stream, I), + State1 = set_registers_args1( + State0#state{stream = Stream1}, Arg, ParamReg, StackOffset + ), + NewArgsT = replace_reg(ArgsT, ParamReg, Avail), + set_registers_args0( + State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset + ) + end. + +set_registers_args1(State, Reg, Reg, _Offset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + jit_state, + ParamReg, + _StackOffset +) -> + % jit_state is always in a1, so we only need to move it if the param reg is different + case ParamReg of + a1 -> + State; + _ -> + I = jit_riscv32_asm:mv(ParamReg, a1), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1} + end; +% For tail calls, jit_state is already in a1 +set_registers_args1(State, jit_state_tail_call, a1, _StackOffset) -> + State; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, + {x_reg, extra}, + Reg, + _StackOffset +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset +) -> + {XReg, X_REGOffset} = ?X_REG(X), + I = jit_riscv32_asm:lw(Reg, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset +) -> + I = jit_riscv32_asm:lw(Reg, Source, 0), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State, + {y_reg, X}, + Reg, + _StackOffset +) -> + Code = ldr_y_reg(Reg, X, AvailRegs), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +set_registers_args1( + #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset +) when + ?IS_GPR(ArgReg) +-> + I = jit_riscv32_asm:mv(Reg, ArgReg), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) -> + mov_immediate(State, Reg, Value). + +%%----------------------------------------------------------------------------- +%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg) +%% from an immediate, a native register or another vm register. +%% @end +%% @param State current backend state +%% @param Src value to move to vm register +%% @param Dest vm register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) -> + state(). +% Native register to VM register +move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) -> + {BaseReg, Off} = ?X_REG(X), + I1 = jit_riscv32_asm:sw(BaseReg, Src, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) -> + I1 = jit_riscv32_asm:sw(Reg, Src, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State0#state{stream = Stream1}; +move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when + is_atom(Src) +-> + Code = str_y_reg(Src, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State0#state{stream = Stream1}; +% Source is an integer to y_reg (optimized: ldr first, then movs) +move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp2, N), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, <>), + State0#state{stream = Stream1}; +% Source is an integer (0-255 for movs, negative values need different handling) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N), N >= 0, N =< 255 +-> + I1 = jit_riscv32_asm:li(Temp, N), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +%% Handle large values using simple literal pool (branch-over pattern) +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when + is_integer(N) +-> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N), + State2 = move_to_vm_register(State1, Temp, Dest), + State2#state{available_regs = AR0}; +% Source is a VM register +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) -> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, 0), + Stream1 = (State0#state.stream_module):append(State0#state.stream, I1), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) -> + Code = ldr_y_reg(Temp, Y, AT), + Stream1 = (State0#state.stream_module):append(State0#state.stream, Code), + State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest), + State1#state{available_regs = AR0}; +% term_to_float +move_to_vm_register( + #state{ + stream_module = StreamModule, + available_regs = [Temp1, Temp2 | _], + stream = Stream0, + variant = Variant + } = + State0, + {free, {ptr, Reg, 1}}, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off), + I2 = jit_riscv32_asm:lw(Temp2, Reg, 4), + case Variant band ?JIT_VARIANT_FLOAT32 of + 0 -> + % Double precision: write both 32-bit parts + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8), + I4 = jit_riscv32_asm:lw(Temp2, Reg, 8), + I5 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8 + 4), + Code = <>; + _ -> + % Single precision: write only first 32-bit part + I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 4), + Code = <> + end, + Stream1 = StreamModule:append(Stream0, Code), + State1 = free_native_register(State0, Reg), + State1#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Emit a move of an array element (reg[x]) to a vm or a native register. +%% @end +%% @param State current backend state +%% @param Reg base register of the array +%% @param Index index in the array, as an integer or a native register +%% @param Dest vm or native register to move to +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec move_array_element( + state(), + riscv32_register(), + non_neg_integer() | riscv32_register(), + vm_register() | riscv32_register() +) -> state(). +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + {BaseReg, Off} = ?X_REG(X), + I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + Reg, + Index, + {ptr, Dest} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4), + I2 = jit_riscv32_asm:sw(Dest, Temp, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} = + State, + Reg, + Index, + {y_reg, Y} +) when is_atom(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4), + YCode = str_y_reg(Temp2, Y, Temp1, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} = + State, + {free, Reg}, + Index, + {y_reg, Y} +) when is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + YCode = str_y_reg(Reg, Y, Temp, AT), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest +) when is_atom(Dest) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:lw(Dest, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {x_reg, X} +) when X < ?MAX_REG andalso is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + {BaseReg, Off} = ?X_REG(X), + I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off), + {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {ptr, PtrReg} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0), + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append(Stream0, <>), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }; +move_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | AT] = AvailableRegs0, + used_regs = UsedRegs0 + } = State, + Reg, + {free, IndexReg}, + {y_reg, Y} +) when is_atom(IndexReg) -> + I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2), + I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg), + I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0), + Code = str_y_reg(IndexReg, Y, Temp, AT), + I4 = Code, + {AvailableRegs1, UsedRegs1} = free_reg( + AvailableRegs0, UsedRegs0, IndexReg + ), + Stream1 = StreamModule:append( + Stream0, <> + ), + State#state{ + available_regs = AvailableRegs1, + used_regs = UsedRegs1, + stream = Stream1 + }. + +%% @doc move reg[x] to a vm or native register +-spec get_array_element( + state(), riscv32_register() | {free, riscv32_register()}, non_neg_integer() +) -> + {state(), riscv32_register()}. +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {free, Reg}, + Index +) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1}, Reg}; +get_array_element( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [ElemReg | AvailableT], + used_regs = UsedRegs0 + } = State, + Reg, + Index +) -> + I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4), + Stream1 = StreamModule:append(Stream0, <>), + { + State#state{ + stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0] + }, + ElemReg + }. + +%% @doc move an integer, a vm or native register to reg[x] +-spec move_to_array_element( + state(), integer() | vm_register() | riscv32_register(), riscv32_register(), non_neg_integer() +) -> state(). +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0} = State0, + ValueReg, + Reg, + Index +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) -> + I1 = jit_riscv32_asm:sw(Reg, ValueReg, Index * 4), + Stream1 = StreamModule:append(Stream0, I1), + State0#state{stream = Stream1}; +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + ValueReg, + Reg, + IndexReg +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) -> + I1 = jit_riscv32_asm:mv(Temp, IndexReg), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, Reg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + Reg, + Index +) -> + {State1, Temp} = copy_to_native_register(State0, Value), + State2 = move_to_array_element(State1, Temp, Reg, Index), + free_native_register(State2, Temp). + +move_to_array_element( + State, + Value, + BaseReg, + IndexReg, + Offset +) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 -> + move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8)); +move_to_array_element( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State, + ValueReg, + BaseReg, + IndexReg, + Offset +) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) -> + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = StreamModule:append(Stream0, <>), + State#state{stream = Stream1}; +move_to_array_element( + State0, + Value, + BaseReg, + IndexReg, + Offset +) -> + {State1, ValueReg} = copy_to_native_register(State0, Value), + [Temp | _] = State1#state.available_regs, + I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset), + I2 = jit_riscv32_asm:slli(Temp, Temp, 2), + I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp), + I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0), + Stream1 = (State1#state.stream_module):append( + State1#state.stream, <> + ), + State2 = State1#state{stream = Stream1}, + free_native_register(State2, ValueReg). + +-spec move_to_native_register(state(), value() | cp) -> {state(), riscv32_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + {BaseReg, Off} = ?CP, + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register(State, Reg) when is_atom(Reg) -> + {State, Reg}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(Reg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1}, Reg}; +move_to_native_register( + #state{ + available_regs = [Reg | AvailT], + used_regs = Used + } = State0, + Imm +) when + is_integer(Imm) +-> + State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT}, + {move_to_native_register(State1, Imm, Reg), Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, extra} +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {x_reg, X} +) when + X < ?MAX_REG +-> + {BaseReg, Offset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + {y_reg, Y} +) -> + Code = ldr_y_reg(Reg, Y, AvailT), + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [RegA, RegB | AvailT], + used_regs = Used + } = State, + {fp_reg, F} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]}, + {fp, RegA, RegB} + }. + +-spec move_to_native_register(state(), value(), riscv32_register()) -> state(). +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst +) when is_atom(RegSrc) -> + I = jit_riscv32_asm:mv(RegDst, RegSrc), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) -> + mov_immediate(State, RegDst, ValSrc); +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst +) when ?IS_GPR(Reg) -> + I1 = jit_riscv32_asm:lw(RegDst, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst +) -> + {BaseReg, Off} = ?X_REG(?MAX_REG), + I1 = jit_riscv32_asm:lw(RegDst, BaseReg, Off), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst +) when + X < ?MAX_REG +-> + {XReg, X_REGOffset} = ?X_REG(X), + I1 = jit_riscv32_asm:lw(RegDst, XReg, X_REGOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +move_to_native_register( + #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State, + {y_reg, Y}, + RegDst +) -> + Code = ldr_y_reg(RegDst, Y, AT), + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}; +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0 + } = State, + {fp_reg, F}, + {fp, RegA, RegB} +) -> + {BaseReg, Off} = ?FP_REGS, + I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off), + I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8), + I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +-spec copy_to_native_register(state(), value()) -> {state(), riscv32_register()}. +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + Reg +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:mv(SaveReg, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [SaveReg | AvailT], + used_regs = Used + } = State, + {ptr, Reg} +) when is_atom(Reg) -> + I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg}; +copy_to_native_register(State, Reg) -> + move_to_native_register(State, Reg). + +move_to_cp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State, + {y_reg, Y} +) -> + I1 = ldr_y_reg(Reg, Y, AvailT), + {BaseReg, Off} = ?CP, + I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +increment_sp( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State, + Offset +) -> + {BaseReg1, Off1} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1), + I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4), + {BaseReg2, Off2} = ?Y_REGS, + I3 = jit_riscv32_asm:sw(BaseReg2, Reg, Off2), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1}. + +set_continuation_to_label( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State, + Label +) -> + % Similar to AArch64: use pc_relative_address with a relocation that will be + % resolved to point directly to the label's actual address (not the jump table entry) + Offset = StreamModule:offset(Stream0), + % Emit placeholder for pc_relative_address (auipc + addi) + % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming + % The relocation will replace these with the correct offset + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {Label, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State#state{stream = Stream1, branches = [Reloc | Branches]}. + +%% @doc Set the contination to a given offset +%% Return a reference so the offset will be updated with update_branches +%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current +%% code and not too far, so on Thumb we can use adr instruction. +set_continuation_to_offset( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _], + branches = Branches + } = State +) -> + OffsetRef = make_ref(), + Offset = StreamModule:offset(Stream0), + % Reserve 8 bytes with all-1s placeholder for flash programming + I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + Reloc = {OffsetRef, Offset, {adr, Temp}}, + % Store continuation (jit_state is in a1) + I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}. + +%% @doc Implement a continuation entry point. +-spec continuation_entry_point(#state{}) -> #state{}. +continuation_entry_point(State) -> + State. + +get_module_index( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailableT], + used_regs = UsedRegs0 + } = State +) -> + % Load module from jit_state (which is in a1) + I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET), + I2 = jit_riscv32_asm:lw(Reg, Reg, 0), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + { + State#state{ + stream = Stream1, + available_regs = AvailableT, + used_regs = [Reg | UsedRegs0] + }, + Reg + }. + +%% @doc Perform an AND of a register with an immediate. +%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to +%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool +%% by using BICS for -4. +and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) -> + I1 = jit_riscv32_asm:slli(Reg, Reg, 8), + I2 = jit_riscv32_asm:srli(Reg, Reg, 8), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)), + Stream1 = State1#state.stream, + % RISC-V doesn't have bics, use not + and + I1 = jit_riscv32_asm:not_(Temp, Temp), + I2 = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, <>), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:and_(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) when Val < 0 andalso Val >= -256 -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, bnot (Val)), + Stream2 = State1#state.stream, + % Perform BICS operation (RISC-V: not + and) + I1 = jit_riscv32_asm:not_(a0, a0), + I2 = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, <>), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}; +and_( + #state{stream_module = StreamModule, available_regs = []} = State0, + Reg, + Val +) -> + % No available registers, use a0 as temp and save it to t3 + Stream0 = State0#state.stream, + % Save a0 to t3 + Save = jit_riscv32_asm:mv(?IP_REG, a0), + Stream1 = StreamModule:append(Stream0, Save), + % Load immediate value into a0 + State1 = mov_immediate(State0#state{stream = Stream1}, a0, Val), + Stream2 = State1#state.stream, + % Perform ANDS operation + I = jit_riscv32_asm:and_(Reg, Reg, a0), + Stream3 = StreamModule:append(Stream2, I), + % Restore a0 from t3 + Restore = jit_riscv32_asm:mv(a0, ?IP_REG), + Stream4 = StreamModule:append(Stream3, Restore), + State0#state{stream = Stream4}. + +or_( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:or_(Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I = jit_riscv32_asm:addi(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:add(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State0#state{stream = Stream1}; +add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:add(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= -16#800, Val =< 16#7FF +-> + % RISC-V li can handle 12-bit signed immediates in a single instruction (addi) + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) -> + % For values outside 12-bit range, li will use lui + addi (2 instructions) + % which is efficient enough, no need for literal pool + I = jit_riscv32_asm:li(Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}. + +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + Val >= 0 andalso Val =< 255 +-> + I1 = jit_riscv32_asm:addi(Reg, Reg, -Val), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when + is_atom(Val) +-> + I = jit_riscv32_asm:sub(Reg, Reg, Val), + Stream1 = StreamModule:append(Stream0, I), + State#state{stream = Stream1}; +sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) -> + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:sub(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{available_regs = [Temp | AT], stream = Stream2}. + +mul(State, _Reg, 1) -> + State; +mul(State, Reg, 2) -> + shift_left(State, Reg, 1); +mul(#state{available_regs = [Temp | _]} = State, Reg, 3) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 1), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 4) -> + shift_left(State, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 5) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 2), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 6) -> + State1 = mul(State0, Reg, 3), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 7) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 8) -> + shift_left(State, Reg, 3); +mul(#state{available_regs = [Temp | _]} = State, Reg, 9) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 3), + I2 = jit_riscv32_asm:add(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State0, Reg, 10) -> + State1 = mul(State0, Reg, 5), + mul(State1, Reg, 2); +mul(#state{available_regs = [Temp | _]} = State, Reg, 15) -> + I1 = jit_riscv32_asm:slli(Temp, Reg, 4), + I2 = jit_riscv32_asm:sub(Reg, Temp, Reg), + Stream1 = (State#state.stream_module):append(State#state.stream, <>), + State#state{stream = Stream1}; +mul(State, Reg, 16) -> + shift_left(State, Reg, 4); +mul(State, Reg, 32) -> + shift_left(State, Reg, 5); +mul(State, Reg, 64) -> + shift_left(State, Reg, 6); +mul( + #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, + Reg, + Val +) -> + % multiply by decomposing by power of 2 + State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val), + Stream1 = State1#state.stream, + I = jit_riscv32_asm:mul(Reg, Reg, Temp), + Stream2 = StreamModule:append(Stream1, I), + State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}. + +%% +%% Analysis of AArch64 pattern and RISC-V32 implementation: +%% +%% AArch64 layout (from call_ext_only_test): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: b.ne 0x20 ; Branch if reductions != 0 to continuation +%% 0x10-0x1c: adr/str/ldr/br sequence for scheduling next process +%% 0x20: [CONTINUATION POINT] - Actual function starts here +%% +%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers): +%% 0x0-0x8: Decrement reductions, store back +%% 0xc: bne continuation ; Branch if reductions != 0 to continuation +%% 0x10-0x?: adr/sw/ldr/jalr sequence for scheduling next process +%% continuation: [actual function body] +%% +%% Key insight: With 32 registers, RISC-V32 doesn't need prolog/epilog like ARM Thumb. +%% When reductions != 0, we branch directly to continue execution. +%% When reductions == 0, we schedule the next process, and resume at the continuation point. +%% +-spec decrement_reductions_and_maybe_schedule_next(state()) -> state(). +decrement_reductions_and_maybe_schedule_next( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0 +) -> + % Load reduction count + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + BNEOffset = StreamModule:offset(Stream1), + % Branch if reduction count is not zero + I4 = jit_riscv32_asm:bne(Temp, zero, 0), + % Set continuation to the next instruction + ADROffset = BNEOffset + byte_size(I4), + % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address + % This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes) + I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>, + I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET), + % Append the instructions to the stream + Stream2 = StreamModule:append(Stream1, <>), + State1 = State0#state{stream = Stream2}, + State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]), + % Rewrite the branch and adr instructions + #state{stream = Stream3} = State2, + NewOffset = StreamModule:offset(Stream3), + NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset), + NewI5Offset = NewOffset - ADROffset, + % Generate the new pc_relative_address instruction, padding with NOP if needed + NewI5 = + case pc_relative_address(Temp, NewI5Offset) of + I when byte_size(I) =:= 4 -> + % Only auipc, pad with NOP (4 bytes) + <>; + I when byte_size(I) =:= 6 -> + % auipc + c.addi, pad with c.nop (2 bytes) + <>; + I when byte_size(I) =:= 8 -> + % auipc + addi, no padding needed + I + end, + Stream4 = StreamModule:replace( + Stream3, BNEOffset, <> + ), + merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs). + +-spec call_or_schedule_next(state(), non_neg_integer()) -> state(). +call_or_schedule_next(State0, Label) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_only_or_schedule_next(State1, Label), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +call_only_or_schedule_next( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Temp | _] + } = State0, + Label +) -> + % Load reduction count (jit_state is in a1) + I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + % Decrement reduction count + I2 = jit_riscv32_asm:addi(Temp, Temp, -1), + % Store back the decremented value + I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET), + Stream1 = StreamModule:append(Stream0, <>), + % Use trampoline technique: branch if zero (eq) to skip over the long branch + % If not zero, we want to continue execution at Label + % If zero, we want to fall through to scheduling code + + % Look up label once to avoid duplicate lookup in helper + LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels), + + BccOffset = StreamModule:offset(Stream1), + + State4 = + case LabelLookupResult of + {Label, LabelOffset} -> + % Label is known, check if we can optimize the conditional branch + % After branch instruction + Rel = LabelOffset - BccOffset, + + if + Rel >= -4096 andalso Rel =< 4094 andalso (Rel rem 2) =:= 0 -> + % Near branch: use direct conditional branch (RISC-V has ±4KB range) + + % Branch if NOT zero (temp != 0) + I4 = jit_riscv32_asm:bne(Temp, zero, Rel), + Stream2 = StreamModule:append(Stream1, I4), + State0#state{stream = Stream2}; + true -> + % Far branch: use trampoline with helper + % Get the code block size for the far branch sequence that will follow + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code( + State0, FarSeqOffset, Label, LabelLookupResult + ), + FarSeqSize = byte_size(FarCodeBlock), + % Skip over the far branch sequence if zero (temp == 0) + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end; + false -> + % Label not known, get the far branch size for the skip + + % RISC-V branch is 4 bytes + FarSeqOffset = BccOffset + 4, + {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false), + FarSeqSize = byte_size(FarCodeBlock), + I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4), + Stream2 = StreamModule:append(Stream1, I4), + Stream3 = StreamModule:append(Stream2, FarCodeBlock), + State1#state{stream = Stream3} + end, + State5 = set_continuation_to_label(State4, Label), + call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]). + +call_primitive_with_cp(State0, Primitive, Args) -> + {State1, RewriteOffset, TempReg} = set_cp(State0), + State2 = call_primitive_last(State1, Primitive, Args), + rewrite_cp_offset(State2, RewriteOffset, TempReg). + +-spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}. +set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) -> + % Reserve a temporary register for the offset BEFORE calling get_module_index + % to avoid running out of available registers + State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]}, + % get module index (dynamically) + { + #state{stream_module = StreamModule, stream = Stream0} = State1, + Reg + } = get_module_index( + State0b + ), + + Offset = StreamModule:offset(Stream0), + % build cp with module_index << 24 + I1 = jit_riscv32_asm:slli(Reg, Reg, 24), + % Reserve space for offset load instruction + % li can generate 1 instruction (4 bytes) for small immediates (< 2048) + % or 2 instructions (8 bytes) for large immediates + % Since we don't know the final CP value yet (it depends on code size), + % we must always reserve 2 instructions (8 bytes) to be safe + % The final CP value is (final_offset << 2), and final_offset is unknown + % Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0) + I2 = <<16#FFFFFFFF:32/little>>, + I3 = <<16#FFFFFFFF:32/little>>, + MOVOffset = Offset + byte_size(I1), + % OR the module index with the offset (loaded in temp register) + I4 = jit_riscv32_asm:or_(Reg, TempReg), + {BaseReg, Off} = ?CP, + I5 = jit_riscv32_asm:sw(BaseReg, Reg, Off), + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + State2 = State1#state{stream = Stream1}, + State3 = free_native_register(State2, Reg), + State4 = free_native_register(State3, TempReg), + {State4, MOVOffset, TempReg}. + +-spec rewrite_cp_offset(state(), non_neg_integer(), riscv32_register()) -> state(). +rewrite_cp_offset( + #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0, + RewriteOffset, + TempReg +) -> + NewOffset = StreamModule:offset(Stream0) - CodeOffset, + CPValue = NewOffset bsl 2, + NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue), + % We reserved 8 bytes (2 instructions) for the CP value + % Pad with NOP if needed to maintain alignment + PaddedInstr = + case byte_size(NewMoveInstr) of + 4 -> <>; + 6 -> <>; + 8 -> NewMoveInstr + end, + Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr), + State0#state{stream = Stream1}. + +set_bs( + #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0, + TermReg +) -> + {BaseReg1, Off1} = ?BS, + I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1), + I2 = jit_riscv32_asm:li(Temp, 0), + {BaseReg2, Off2} = ?BS_OFFSET, + I3 = jit_riscv32_asm:sw(BaseReg2, Temp, Off2), + Stream1 = StreamModule:append(Stream0, <>), + State0#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @param State current state +%% @param SortedLines line information, sorted by offset +%% @doc Build labels and line tables and encode a function that returns it. +%% In this case, the function returns the effective address of what immediately +%% follows. +%% @end +%% @return New state +%%----------------------------------------------------------------------------- +return_labels_and_lines( + #state{ + stream_module = StreamModule, + stream = Stream0, + labels = Labels + } = State, + SortedLines +) -> + SortedLabels = lists:keysort(2, [ + {Label, LabelOffset} + || {Label, LabelOffset} <- Labels, is_integer(Label) + ]), + + I2 = jit_riscv32_asm:ret(), + % Assume total size is 10 bytes (8-byte I1 + 2-byte c.ret) + % If actual is 8 bytes (6-byte I1 + 2-byte c.ret), we'll pad with 2 bytes + I1 = pc_relative_address(a0, 10), + Prologue = <>, + ProloguePadded = + case byte_size(Prologue) of + 10 -> Prologue; + % 2-byte padding + 8 -> <> + end, + LabelsTable = <<<> || {Label, Offset} <- SortedLabels>>, + LinesTable = <<<> || {Line, Offset} <- SortedLines>>, + Stream1 = StreamModule:append( + Stream0, + <> + ), + State#state{stream = Stream1}. + +%% @doc Generate PC-relative address calculation using AUIPC + ADDI +%% This replaces the ARM-style 'adr' pseudo-instruction with native RISC-V instructions +-spec pc_relative_address(riscv32_register(), integer()) -> binary(). +pc_relative_address(Rd, 0) -> + % Simple case: just get current PC + jit_riscv32_asm:auipc(Rd, 0); +pc_relative_address(Rd, Offset) -> + % PC-relative address calculation + % Split offset into upper 20 bits and lower 12 bits + % AUIPC can represent offsets in range: (-524288 << 12) to (524287 << 12) + % Combined with ADDI: (-524288 << 12) - 2048 to (524287 << 12) + 2047 + Lower = Offset band 16#FFF, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + % Compute upper 20 bits, adjusting if lower is negative + % Use arithmetic right shift (bsr) which preserves sign in Erlang + Upper = + if + LowerSigned < 0 -> + (Offset bsr 12) + 1; + true -> + Offset bsr 12 + end, + % Validate that Upper is in valid range for AUIPC + if + Upper < -16#80000; Upper > 16#7FFFF -> + error({offset_out_of_range, Offset, Upper, -16#80000, 16#7FFFF}); + true -> + ok + end, + case {Upper, LowerSigned} of + {0, 0} -> + % Zero offset + jit_riscv32_asm:auipc(Rd, 0); + {0, _} -> + % Only lower bits needed: auipc + addi + AuipcInstr = jit_riscv32_asm:auipc(Rd, 0), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <>; + {_, 0} -> + % Only upper bits needed + jit_riscv32_asm:auipc(Rd, Upper); + {_, _} -> + % Both upper and lower bits + AuipcInstr = jit_riscv32_asm:auipc(Rd, Upper), + AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned), + <> + end. + +%% Helper function to generate str instruction with y_reg offset, handling large offsets +str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4), + <>; +str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) -> + % Large offset - use register arithmetic with second available register + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:li(TempReg2, Offset), + I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1), + I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0), + <>; +str_y_reg(SrcReg, Y, TempReg1, []) -> + % Large offset - no additional registers available, use IP_REG as second temp + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, TempReg1), + I3 = jit_riscv32_asm:li(TempReg1, Offset), + I4 = jit_riscv32_asm:add(TempReg1, TempReg1, ?IP_REG), + I5 = jit_riscv32_asm:sw(TempReg1, SrcReg, 0), + <>. + +%% Helper function to generate ldr instruction with y_reg offset, handling large offsets +ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 -> + % Small offset - use immediate addressing + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, [TempReg | _]) -> + % Large offset - use DstReg as second temp register for arithmetic + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off), + I2 = jit_riscv32_asm:li(DstReg, Offset), + I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg), + I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>; +ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 -> + % Small offset, no registers available - use DstReg as temp + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4), + <>; +ldr_y_reg(DstReg, Y, []) -> + % Large offset, no registers available - use IP_REG as temp register + % Note: IP_REG (t3) can only be used with mov, not ldr directly + Offset = Y * 4, + {BaseReg, Off} = ?Y_REGS, + I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off), + I2 = jit_riscv32_asm:mv(?IP_REG, DstReg), + I3 = jit_riscv32_asm:li(DstReg, Offset), + I4 = jit_riscv32_asm:add(DstReg, DstReg, ?IP_REG), + I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0), + <>. + +free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) -> + AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []), + true = lists:member(Reg, UsedRegs0), + UsedRegs1 = lists:delete(Reg, UsedRegs0), + {AvailableRegs1, UsedRegs1}. + +free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) -> + lists:reverse(Acc, [Reg | PrevRegs0]); +free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) -> + free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]); +free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) -> + free_reg0(SortedT, PrevRegs, Reg, Acc). + +args_regs(Args) -> + lists:map( + fun + ({free, {ptr, Reg}}) -> Reg; + ({free, Reg}) when is_atom(Reg) -> Reg; + ({free, Imm}) when is_integer(Imm) -> imm; + (offset) -> imm; + (ctx) -> ?CTX_REG; + (jit_state) -> jit_state; + (jit_state_tail_call) -> jit_state; + (stack) -> stack; + (Reg) when is_atom(Reg) -> Reg; + (Imm) when is_integer(Imm) -> imm; + ({ptr, Reg}) -> Reg; + ({x_reg, _}) -> ?CTX_REG; + ({y_reg, _}) -> ?CTX_REG; + ({fp_reg, _}) -> ?CTX_REG; + ({free, {x_reg, _}}) -> ?CTX_REG; + ({free, {y_reg, _}}) -> ?CTX_REG; + ({free, {fp_reg, _}}) -> ?CTX_REG; + ({avm_int64_t, _}) -> imm + end, + Args + ). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at the current offset. +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference()) -> state(). +add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) -> + Offset0 = StreamModule:offset(Stream0), + add_label(State0, Label, Offset0). + +%%----------------------------------------------------------------------------- +%% @doc Add a label at a specific offset +%% @end +%% @param State current backend state +%% @param Label the label number or reference +%% @param Offset the explicit offset for this label +%% @return Updated backend state +%%----------------------------------------------------------------------------- +-spec add_label(state(), integer() | reference(), integer()) -> state(). +add_label(#state{labels = Labels} = State, Label, Offset) -> + State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (a0 in RISC-V) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_A0_REG_RISCV32. +-endif. diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl new file mode 100644 index 0000000000..25bf1ff574 --- /dev/null +++ b/libs/jit/src/jit_riscv32_asm.erl @@ -0,0 +1,1802 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm). + +-export([ + % R-type arithmetic and logical instructions + add/3, + sub/3, + and_/3, + or_/2, + or_/3, + xor_/3, + sll/3, + srl/3, + sra/3, + slt/3, + sltu/3, + % I-type immediate instructions + addi/3, + andi/3, + ori/3, + xori/3, + slli/3, + srli/3, + srai/3, + slti/3, + sltiu/3, + % Load instructions + lw/2, + lw/3, + lh/2, + lh/3, + lhu/2, + lhu/3, + lb/2, + lb/3, + lbu/2, + lbu/3, + % Store instructions + sw/2, + sw/3, + sh/2, + sh/3, + sb/2, + sb/3, + % Branch instructions + beq/3, + bne/3, + blt/3, + bge/3, + bltu/3, + bgeu/3, + % Jump instructions + jal/2, + jalr/3, + jalr/2, + % Upper immediate instructions + lui/2, + auipc/2, + % Pseudo-instructions + nop/0, + li/2, + mv/2, + not_/2, + neg/2, + j/1, + jr/1, + ret/0, + call/2, + % M extension (multiply/divide) + mul/3, + % C extension (compressed) - arithmetic/logical + c_add/2, + c_sub/2, + c_and/2, + c_or/2, + c_xor/2, + c_mv/2, + % C extension - immediate instructions + c_addi/2, + c_andi/2, + c_li/2, + c_lui/2, + c_addi16sp/1, + c_addi4spn/2, + % C extension - shift instructions + c_slli/2, + c_srli/2, + c_srai/2, + % C extension - load/store + c_lw/2, + c_sw/2, + c_lwsp/2, + c_swsp/2, + % C extension - branches and jumps + c_beqz/2, + c_bnez/2, + c_j/1, + c_jal/1, + c_jr/1, + c_jalr/1, + % C extension - system instructions + c_ebreak/0, + % C extension - pseudo-instructions + c_nop/0 +]). + +-export_type([ + riscv_register/0 +]). + +%% RISC-V 32-bit (RV32I) Assembler +%% +%% This module provides an assembler for the RISC-V 32-bit instruction set. +%% It generates binary machine code for RISC-V instructions following the +%% RV32I base integer instruction set architecture. +%% +%% RISC-V Register Set (32 registers): +%% x0 (zero) - Hardwired zero (reads as 0, writes ignored) +%% x1 (ra) - Return address +%% x2 (sp) - Stack pointer +%% x3 (gp) - Global pointer +%% x4 (tp) - Thread pointer +%% x5 (t0) - Temporary register 0 +%% x6 (t1) - Temporary register 1 +%% x7 (t2) - Temporary register 2 +%% x8 (s0/fp)- Saved register 0 / Frame pointer +%% x9 (s1) - Saved register 1 +%% x10 (a0) - Function argument 0 / Return value 0 +%% x11 (a1) - Function argument 1 / Return value 1 +%% x12 (a2) - Function argument 2 +%% x13 (a3) - Function argument 3 +%% x14 (a4) - Function argument 4 +%% x15 (a5) - Function argument 5 +%% x16 (a6) - Function argument 6 +%% x17 (a7) - Function argument 7 +%% x18 (s2) - Saved register 2 +%% x19 (s3) - Saved register 3 +%% x20 (s4) - Saved register 4 +%% x21 (s5) - Saved register 5 +%% x22 (s6) - Saved register 6 +%% x23 (s7) - Saved register 7 +%% x24 (s8) - Saved register 8 +%% x25 (s9) - Saved register 9 +%% x26 (s10) - Saved register 10 +%% x27 (s11) - Saved register 11 +%% x28 (t3) - Temporary register 3 +%% x29 (t4) - Temporary register 4 +%% x30 (t5) - Temporary register 5 +%% x31 (t6) - Temporary register 6 +%% +%% RISC-V Calling Convention (ILP32): +%% - Arguments: a0-a7 (x10-x17) +%% - Return values: a0-a1 (x10-x11) +%% - Caller-saved: t0-t6, a0-a7 +%% - Callee-saved: s0-s11, sp, ra +%% - Stack grows downward +%% - Stack must be 16-byte aligned at function call boundaries +%% +%% Instruction Encoding: +%% All RV32I instructions are 32 bits (4 bytes). +%% Bit ordering is little-endian within each 32-bit word. +%% +%% See: RISC-V Instruction Set Manual, Volume I: User-Level ISA +%% https://riscv.org/technical/specifications/ +%% https://github.com/riscv/riscv-isa-manual/ + +-type riscv_register() :: + zero + | ra + | sp + | gp + | tp + | t0 + | t1 + | t2 + | s0 + | fp + | s1 + | a0 + | a1 + | a2 + | a3 + | a4 + | a5 + | a6 + | a7 + | s2 + | s3 + | s4 + | s5 + | s6 + | s7 + | s8 + | s9 + | s10 + | s11 + | t3 + | t4 + | t5 + | t6. + +%%----------------------------------------------------------------------------- +%% Helper functions +%%----------------------------------------------------------------------------- + +%% Convert register atoms to register numbers (0-31) +-spec reg_to_num(riscv_register()) -> 0..31. +% ABI names +reg_to_num(zero) -> 0; +reg_to_num(ra) -> 1; +reg_to_num(sp) -> 2; +reg_to_num(gp) -> 3; +reg_to_num(tp) -> 4; +reg_to_num(t0) -> 5; +reg_to_num(t1) -> 6; +reg_to_num(t2) -> 7; +reg_to_num(s0) -> 8; +reg_to_num(fp) -> 8; +reg_to_num(s1) -> 9; +reg_to_num(a0) -> 10; +reg_to_num(a1) -> 11; +reg_to_num(a2) -> 12; +reg_to_num(a3) -> 13; +reg_to_num(a4) -> 14; +reg_to_num(a5) -> 15; +reg_to_num(a6) -> 16; +reg_to_num(a7) -> 17; +reg_to_num(s2) -> 18; +reg_to_num(s3) -> 19; +reg_to_num(s4) -> 20; +reg_to_num(s5) -> 21; +reg_to_num(s6) -> 22; +reg_to_num(s7) -> 23; +reg_to_num(s8) -> 24; +reg_to_num(s9) -> 25; +reg_to_num(s10) -> 26; +reg_to_num(s11) -> 27; +reg_to_num(t3) -> 28; +reg_to_num(t4) -> 29; +reg_to_num(t5) -> 30; +reg_to_num(t6) -> 31. + +%%----------------------------------------------------------------------------- +%% R-type instruction encoding +%%----------------------------------------------------------------------------- + +%% R-type instruction format: +%% funct7 (7) | rs2 (5) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_r_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Funct7 :: 0..127 +) -> binary(). +encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct7 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADD - Add +%% rd = rs1 + rs2 +-spec add(riscv_register(), riscv_register(), riscv_register()) -> binary(). +add(Rd, Rs1, Rs2) when Rd =:= Rs1, Rd =/= zero, Rs2 =/= zero -> + % Use c.add when rd == rs1 and neither register is zero + c_add(Rd, Rs2); +add(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00). + +%% SUB - Subtract +%% rd = rs1 - rs2 +-spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sub(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_sub(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20) + end; +sub(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20). + +%% AND - Bitwise AND +%% rd = rs1 & rs2 +-spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +and_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_and(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00) + end; +and_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR +%% rd = rs1 | rs2 +-spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_or(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00) + end; +or_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00). + +%% OR - Bitwise OR (in-place) +%% rd = rd | rs +-spec or_(riscv_register(), riscv_register()) -> binary(). +or_(Rd, Rs) -> + or_(Rd, Rd, Rs). + +%% XOR - Bitwise XOR +%% rd = rs1 ^ rs2 +-spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary(). +xor_(Rd, Rs1, Rs2) when Rd =:= Rs1 -> + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of + true -> c_xor(Rd, Rs2); + false -> encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00) + end; +xor_(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00). + +%% SLL - Shift Left Logical +%% rd = rs1 << rs2[4:0] +-spec sll(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sll(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 001, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#1, Rs1, Rs2, 16#00). + +%% SRL - Shift Right Logical +%% rd = rs1 >> rs2[4:0] (zero-extend) +-spec srl(riscv_register(), riscv_register(), riscv_register()) -> binary(). +srl(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#00). + +%% SRA - Shift Right Arithmetic +%% rd = rs1 >> rs2[4:0] (sign-extend) +-spec sra(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sra(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0100000 + encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#20). + +%% SLT - Set Less Than +%% rd = (rs1 < rs2) ? 1 : 0 (signed) +-spec slt(riscv_register(), riscv_register(), riscv_register()) -> binary(). +slt(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 010, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#2, Rs1, Rs2, 16#00). + +%% SLTU - Set Less Than Unsigned +%% rd = (rs1 < rs2) ? 1 : 0 (unsigned) +-spec sltu(riscv_register(), riscv_register(), riscv_register()) -> binary(). +sltu(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 011, Funct7: 0000000 + encode_r_type(16#33, Rd, 16#3, Rs1, Rs2, 16#00). + +%%----------------------------------------------------------------------------- +%% I-type instruction encoding +%%----------------------------------------------------------------------------- + +%% I-type instruction format: +%% imm[11:0] (12) | rs1 (5) | funct3 (3) | rd (5) | opcode (7) +%% Bits: 31-20 19-15 14-12 11-7 6-0 + +-spec encode_i_type( + Opcode :: 0..127, + Rd :: riscv_register(), + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) -> + RdNum = reg_to_num(Rd), + Rs1Num = reg_to_num(Rs1), + % Sign-extend and mask to 12 bits + ImmMasked = Imm band 16#FFF, + Instr = + (ImmMasked bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% I-type immediate arithmetic and logical instructions +%%----------------------------------------------------------------------------- + +%% ADDI - Add Immediate +%% rd = rs1 + imm +-spec addi(riscv_register(), riscv_register(), integer()) -> binary(). +addi(Rd, Rs1, Imm) when Rd =:= Rs1, Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.addi when rd == rs1, rd != zero, and imm fits in 6 bits (signed) + c_addi(Rd, Imm); +addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 000 + encode_i_type(16#13, Rd, 16#0, Rs1, Imm); +addi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ANDI - AND Immediate +%% rd = rs1 & imm +-spec andi(riscv_register(), riscv_register(), integer()) -> binary(). +andi(Rd, Rs1, Imm) when Rd =:= Rs1, Imm >= -32, Imm =< 31 -> + case is_compressed_reg(Rd) of + true -> c_andi(Rd, Imm); + false -> encode_i_type(16#13, Rd, 16#7, Rs1, Imm) + end; +andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 111 + encode_i_type(16#13, Rd, 16#7, Rs1, Imm); +andi(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% ORI - OR Immediate +%% rd = rs1 | imm +-spec ori(riscv_register(), riscv_register(), integer()) -> binary(). +ori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 110 + encode_i_type(16#13, Rd, 16#6, Rs1, Imm); +ori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% XORI - XOR Immediate +%% rd = rs1 ^ imm +-spec xori(riscv_register(), riscv_register(), integer()) -> binary(). +xori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 100 + encode_i_type(16#13, Rd, 16#4, Rs1, Imm); +xori(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTI - Set Less Than Immediate +%% rd = (rs1 < imm) ? 1 : 0 (signed) +-spec slti(riscv_register(), riscv_register(), integer()) -> binary(). +slti(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 010 + encode_i_type(16#13, Rd, 16#2, Rs1, Imm); +slti(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%% SLTIU - Set Less Than Immediate Unsigned +%% rd = (rs1 < imm) ? 1 : 0 (unsigned) +-spec sltiu(riscv_register(), riscv_register(), integer()) -> binary(). +sltiu(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 -> + % Opcode: 0010011 (0x13), Funct3: 011 + encode_i_type(16#13, Rd, 16#3, Rs1, Imm); +sltiu(_Rd, _Rs1, Imm) -> + error({immediate_out_of_range, Imm, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% I-type immediate shift instructions +%%----------------------------------------------------------------------------- + +%% SLLI - Shift Left Logical Immediate +%% rd = rs1 << shamt +-spec slli(riscv_register(), riscv_register(), 0..31) -> binary(). +slli(Rd, Rs1, Shamt) when Rd =:= Rs1, Rd =/= zero, Shamt >= 1, Shamt =< 31 -> + % Use c.slli when rd == rs1, rd != zero, and shamt != 0 (c.slli with shamt=0 is reserved) + c_slli(Rd, Shamt); +slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#1, Rs1, Shamt); +slli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRLI - Shift Right Logical Immediate +%% rd = rs1 >> shamt (zero-extend) +-spec srli(riscv_register(), riscv_register(), 0..31) -> binary(). +srli(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> c_srli(Rd, Shamt); + false -> encode_i_type(16#13, Rd, 16#5, Rs1, Shamt) + end; +srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000 + encode_i_type(16#13, Rd, 16#5, Rs1, Shamt); +srli(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%% SRAI - Shift Right Arithmetic Immediate +%% rd = rs1 >> shamt (sign-extend) +-spec srai(riscv_register(), riscv_register(), 0..31) -> binary(). +srai(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 -> + case is_compressed_reg(Rd) of + true -> + c_srai(Rd, Shamt); + false -> + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30) + end; +srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 -> + % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000 + % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI + ImmWithBit30 = Shamt bor (1 bsl 10), + encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30); +srai(_Rd, _Rs1, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 31}). + +%%----------------------------------------------------------------------------- +%% Load instructions (I-type) +%%----------------------------------------------------------------------------- + +%% LW - Load Word +%% rd = mem[rs1 + offset] (32-bit) +-spec lw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lw(Rd, {Rs1, Offset}) -> + lw(Rd, Rs1, Offset); +lw(Rd, Rs1) when is_atom(Rs1) -> + lw(Rd, Rs1, 0). + +-spec lw(riscv_register(), riscv_register(), integer()) -> binary(). +lw(Rd, sp, Offset) when Rd =/= zero, Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.lwsp for loads from sp with aligned offset in range + c_lwsp(Rd, Offset); +lw(Rd, Rs1, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.lw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rd) andalso is_compressed_reg(Rs1) of + true -> c_lw(Rd, {Rs1, Offset}); + false -> encode_i_type(16#03, Rd, 16#2, Rs1, Offset) + end; +lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 010 + encode_i_type(16#03, Rd, 16#2, Rs1, Offset); +lw(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LH - Load Halfword (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][15:0]) +-spec lh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lh(Rd, {Rs1, Offset}) -> + lh(Rd, Rs1, Offset); +lh(Rd, Rs1) when is_atom(Rs1) -> + lh(Rd, Rs1, 0). + +-spec lh(riscv_register(), riscv_register(), integer()) -> binary(). +lh(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 001 + encode_i_type(16#03, Rd, 16#1, Rs1, Offset); +lh(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LHU - Load Halfword Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][15:0]) +-spec lhu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lhu(Rd, {Rs1, Offset}) -> + lhu(Rd, Rs1, Offset); +lhu(Rd, Rs1) when is_atom(Rs1) -> + lhu(Rd, Rs1, 0). + +-spec lhu(riscv_register(), riscv_register(), integer()) -> binary(). +lhu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 101 + encode_i_type(16#03, Rd, 16#5, Rs1, Offset); +lhu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LB - Load Byte (sign-extended) +%% rd = sign_extend(mem[rs1 + offset][7:0]) +-spec lb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lb(Rd, {Rs1, Offset}) -> + lb(Rd, Rs1, Offset); +lb(Rd, Rs1) when is_atom(Rs1) -> + lb(Rd, Rs1, 0). + +-spec lb(riscv_register(), riscv_register(), integer()) -> binary(). +lb(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 000 + encode_i_type(16#03, Rd, 16#0, Rs1, Offset); +lb(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% LBU - Load Byte Unsigned (zero-extended) +%% rd = zero_extend(mem[rs1 + offset][7:0]) +-spec lbu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +lbu(Rd, {Rs1, Offset}) -> + lbu(Rd, Rs1, Offset); +lbu(Rd, Rs1) when is_atom(Rs1) -> + lbu(Rd, Rs1, 0). + +-spec lbu(riscv_register(), riscv_register(), integer()) -> binary(). +lbu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0000011 (0x03), Funct3: 100 + encode_i_type(16#03, Rd, 16#4, Rs1, Offset); +lbu(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% S-type instruction encoding (for stores) +%%----------------------------------------------------------------------------- + +%% S-type instruction format: +%% imm[11:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:0] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 + +-spec encode_s_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer() +) -> binary(). +encode_s_type(Opcode, Funct3, Rs1, Rs2, Imm) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Split immediate: imm[11:5] goes to bits 31-25, imm[4:0] goes to bits 11-7 + ImmMasked = Imm band 16#FFF, + Imm11_5 = (ImmMasked bsr 5) band 16#7F, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Imm11_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_0 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Store instructions (S-type) +%%----------------------------------------------------------------------------- + +%% SW - Store Word +%% mem[rs1 + offset] = rs2[31:0] +-spec sw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sw(Rs2, {Rs1, Offset}) -> + sw(Rs1, Rs2, Offset); +sw(Rs2, Rs1) when is_atom(Rs1) -> + sw(Rs1, Rs2, 0). + +-spec sw(riscv_register(), riscv_register(), integer()) -> binary(). +sw(sp, Rs2, Offset) when Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 -> + % Use c.swsp for stores to sp with aligned offset in range + c_swsp(Rs2, Offset); +sw(Rs1, Rs2, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 -> + % Use c.sw when both registers are in compressed set and offset is aligned + case is_compressed_reg(Rs1) andalso is_compressed_reg(Rs2) of + true -> c_sw(Rs2, {Rs1, Offset}); + false -> encode_s_type(16#23, 16#2, Rs1, Rs2, Offset) + end; +sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 010 + encode_s_type(16#23, 16#2, Rs1, Rs2, Offset); +sw(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SH - Store Halfword +%% mem[rs1 + offset][15:0] = rs2[15:0] +-spec sh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sh(Rs2, {Rs1, Offset}) -> + sh(Rs1, Rs2, Offset); +sh(Rs2, Rs1) when is_atom(Rs1) -> + sh(Rs1, Rs2, 0). + +-spec sh(riscv_register(), riscv_register(), integer()) -> binary(). +sh(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 001 + encode_s_type(16#23, 16#1, Rs1, Rs2, Offset); +sh(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% SB - Store Byte +%% mem[rs1 + offset][7:0] = rs2[7:0] +-spec sb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) -> + binary(). +sb(Rs2, {Rs1, Offset}) -> + sb(Rs1, Rs2, Offset); +sb(Rs2, Rs1) when is_atom(Rs1) -> + sb(Rs1, Rs2, 0). + +-spec sb(riscv_register(), riscv_register(), integer()) -> binary(). +sb(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 0100011 (0x23), Funct3: 000 + encode_s_type(16#23, 16#0, Rs1, Rs2, Offset); +sb(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%%----------------------------------------------------------------------------- +%% B-type instruction encoding (for branches) +%%----------------------------------------------------------------------------- + +%% B-type instruction format: +%% imm[12|10:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:1|11] (5) | opcode (7) +%% Bits: 31-25 24-20 19-15 14-12 11-7 6-0 +%% +%% The immediate is split across the instruction and represents a signed offset +%% in multiples of 2 bytes (must be 2-byte aligned). +%% Range: ±4 KiB (±4096 bytes) + +-spec encode_b_type( + Opcode :: 0..127, + Funct3 :: 0..7, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Offset :: integer() +) -> binary(). +encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) -> + Rs1Num = reg_to_num(Rs1), + Rs2Num = reg_to_num(Rs2), + % Offset must be 2-byte aligned and in range [-4096, 4094] + % Extract bits: imm[12], imm[10:5], imm[4:1], imm[11] + OffsetMasked = Offset band 16#1FFF, + % imm[12] -> bit 31 + Imm12 = (OffsetMasked bsr 12) band 1, + % imm[10:5] -> bits 30-25 + Imm10_5 = (OffsetMasked bsr 5) band 16#3F, + % imm[4:1] -> bits 11-8 + Imm4_1 = (OffsetMasked bsr 1) band 16#F, + % imm[11] -> bit 7 + Imm11 = (OffsetMasked bsr 11) band 1, + Instr = + (Imm12 bsl 31) bor + (Imm10_5 bsl 25) bor + (Rs2Num bsl 20) bor + (Rs1Num bsl 15) bor + (Funct3 bsl 12) bor + (Imm4_1 bsl 8) bor + (Imm11 bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Branch instructions (B-type) +%%----------------------------------------------------------------------------- + +%% BEQ - Branch if Equal +%% if (rs1 == rs2) pc += offset +-spec beq(riscv_register(), riscv_register(), integer()) -> binary(). +beq(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.beqz when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_beqz(Rs1, Offset); + false -> encode_b_type(16#63, 16#0, Rs1, zero, Offset) + end; +beq(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 000 + encode_b_type(16#63, 16#0, Rs1, Rs2, Offset); +beq(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +beq(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BNE - Branch if Not Equal +%% if (rs1 != rs2) pc += offset +-spec bne(riscv_register(), riscv_register(), integer()) -> binary(). +bne(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 -> + % Use c.bnez when comparing with zero and offset fits + case is_compressed_reg(Rs1) of + true -> c_bnez(Rs1, Offset); + false -> encode_b_type(16#63, 16#1, Rs1, zero, Offset) + end; +bne(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 001 + encode_b_type(16#63, 16#1, Rs1, Rs2, Offset); +bne(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bne(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLT - Branch if Less Than (signed) +%% if (rs1 < rs2) pc += offset +-spec blt(riscv_register(), riscv_register(), integer()) -> binary(). +blt(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 100 + encode_b_type(16#63, 16#4, Rs1, Rs2, Offset); +blt(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +blt(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGE - Branch if Greater or Equal (signed) +%% if (rs1 >= rs2) pc += offset +-spec bge(riscv_register(), riscv_register(), integer()) -> binary(). +bge(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 101 + encode_b_type(16#63, 16#5, Rs1, Rs2, Offset); +bge(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bge(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BLTU - Branch if Less Than Unsigned +%% if (rs1 < rs2) pc += offset (unsigned) +-spec bltu(riscv_register(), riscv_register(), integer()) -> binary(). +bltu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 110 + encode_b_type(16#63, 16#6, Rs1, Rs2, Offset); +bltu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bltu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%% BGEU - Branch if Greater or Equal Unsigned +%% if (rs1 >= rs2) pc += offset (unsigned) +-spec bgeu(riscv_register(), riscv_register(), integer()) -> binary(). +bgeu(Rs1, Rs2, Offset) when + Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0 +-> + % Opcode: 1100011 (0x63), Funct3: 111 + encode_b_type(16#63, 16#7, Rs1, Rs2, Offset); +bgeu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +bgeu(_Rs1, _Rs2, Offset) -> + error({offset_out_of_range, Offset, -4096, 4094}). + +%%----------------------------------------------------------------------------- +%% J-type instruction encoding (for JAL) +%%----------------------------------------------------------------------------- + +%% J-type instruction format (JAL): +%% imm[20|10:1|11|19:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 +%% +%% The immediate represents a signed offset in multiples of 2 bytes. +%% Range: ±1 MiB (±1048576 bytes) + +-spec encode_j_type( + Opcode :: 0..127, Rd :: riscv_register(), Offset :: integer() +) -> binary(). +encode_j_type(Opcode, Rd, Offset) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits: imm[20], imm[10:1], imm[11], imm[19:12] + OffsetMasked = Offset band 16#1FFFFF, + % imm[20] -> bit 31 + Imm20 = (OffsetMasked bsr 20) band 1, + % imm[10:1] -> bits 30-21 + Imm10_1 = (OffsetMasked bsr 1) band 16#3FF, + % imm[11] -> bit 20 + Imm11 = (OffsetMasked bsr 11) band 1, + % imm[19:12] -> bits 19-12 + Imm19_12 = (OffsetMasked bsr 12) band 16#FF, + Instr = + (Imm20 bsl 31) bor + (Imm10_1 bsl 21) bor + (Imm11 bsl 20) bor + (Imm19_12 bsl 12) bor + (RdNum bsl 7) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% U-type instruction encoding (for LUI, AUIPC) +%%----------------------------------------------------------------------------- + +%% U-type instruction format: +%% imm[31:12] (20) | rd (5) | opcode (7) +%% Bits: 31-12 11-7 6-0 + +-spec encode_u_type( + Opcode :: 0..127, Rd :: riscv_register(), Imm :: integer() +) -> binary(). +encode_u_type(Opcode, Rd, Imm) -> + RdNum = reg_to_num(Rd), + % Upper 20 bits of immediate + ImmUpper = (Imm bsr 12) band 16#FFFFF, + Instr = (ImmUpper bsl 12) bor (RdNum bsl 7) bor Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% Jump and link instructions +%%----------------------------------------------------------------------------- + +%% JAL - Jump and Link +%% rd = pc + 4; pc += offset +-spec jal(riscv_register(), integer()) -> binary(). +jal(zero, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.j when rd is zero (no link) and offset fits + c_j(Offset); +jal(ra, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 -> + % Use c.jal when rd is ra and offset fits (RV32C only) + c_jal(Offset); +jal(Rd, Offset) when + Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0 +-> + % Opcode: 1101111 (0x6F) + encode_j_type(16#6F, Rd, Offset); +jal(_Rd, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +jal(_Rd, Offset) -> + error({offset_out_of_range, Offset, -1048576, 1048574}). + +%% JALR - Jump and Link Register +%% rd = pc + 4; pc = (rs1 + offset) & ~1 +-spec jalr(riscv_register(), riscv_register(), integer()) -> binary(). +jalr(zero, Rs1, 0) when Rs1 =/= zero -> + % Use c.jr for jump to register without link (rd=zero, offset=0) + c_jr(Rs1); +jalr(ra, Rs1, 0) when Rs1 =/= zero -> + % Use c.jalr for jump to register with link (rd=ra, offset=0) + c_jalr(Rs1); +jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 -> + % Opcode: 1100111 (0x67), Funct3: 000 + encode_i_type(16#67, Rd, 16#0, Rs1, Offset); +jalr(_Rd, _Rs1, Offset) -> + error({offset_out_of_range, Offset, -2048, 2047}). + +%% JALR - Jump and Link Register (no offset) +%% rd = pc + 4; pc = rs1 & ~1 +-spec jalr(riscv_register(), riscv_register()) -> binary(). +jalr(Rd, Rs1) -> + jalr(Rd, Rs1, 0). + +%%----------------------------------------------------------------------------- +%% Upper immediate instructions +%%----------------------------------------------------------------------------- + +%% LUI - Load Upper Immediate +%% rd = imm << 12 +-spec lui(riscv_register(), integer()) -> binary(). +lui(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31, Imm =/= 0 -> + % Use c.lui when rd != zero and imm fits in 6 bits (signed) and imm != 0 + c_lui(Rd, Imm); +lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0110111 (0x37) + encode_u_type(16#37, Rd, Imm bsl 12); +lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%% AUIPC - Add Upper Immediate to PC +%% rd = pc + (imm << 12) +-spec auipc(riscv_register(), integer()) -> binary(). +auipc(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF -> + % Opcode: 0010111 (0x17) + encode_u_type(16#17, Rd, Imm bsl 12); +auipc(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}). + +%%----------------------------------------------------------------------------- +%% Pseudo-instructions +%%----------------------------------------------------------------------------- +%% These are convenience instructions that map to actual RV32I instructions + +%% NOP - No Operation +%% Expands to: addi x0, x0, 0 +-spec nop() -> binary(). +nop() -> + addi(zero, zero, 0). + +%% LI - Load Immediate +%% Load a 32-bit immediate value into a register +%% For small immediates (-2048 to 2047): addi rd, x0, imm +%% For larger immediates: lui + addi sequence +-spec li(riscv_register(), integer()) -> binary(). +li(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31 -> + % Use c.li when rd != zero and imm fits in 6 bits (signed) + c_li(Rd, Imm); +li(Rd, Imm) when Imm >= -2048, Imm =< 2047 -> + % Small immediate: addi rd, x0, imm + addi(Rd, zero, Imm); +li(Rd, Imm) when Imm >= -16#80000000, Imm =< 16#7FFFFFFF -> + % Large immediate: lui + addi + % Split into upper 20 bits and lower 12 bits + % Need to account for sign extension of lower 12 bits + Lower = Imm band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + UpperRaw = + if + Lower >= 16#800 -> + (Imm bsr 12) + 1; + true -> + Imm bsr 12 + end, + % Mask to 20 bits first, then sign extend if needed + UpperMasked = UpperRaw band 16#FFFFF, + Upper = + if + UpperMasked band 16#80000 =/= 0 -> + % Bit 19 is set, so this is negative in 20-bit representation + % Sign extend from 20 bits + UpperMasked - 16#100000; + true -> + % Positive value + UpperMasked + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + LuiInstr = lui(Rd, Upper), + AddiInstr = addi(Rd, Rd, LowerSigned), + <>; +li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}). + +%% MV - Move (copy register) +%% Expands to: addi rd, rs, 0 or c.mv rd, rs +-spec mv(riscv_register(), riscv_register()) -> binary(). +mv(Rd, Rs) when Rd =/= zero, Rs =/= zero -> + % Use c.mv when both rd and rs are not zero + c_mv(Rd, Rs); +mv(Rd, Rs) -> + addi(Rd, Rs, 0). + +%% NOT - Bitwise NOT +%% Expands to: xori rd, rs, -1 +-spec not_(riscv_register(), riscv_register()) -> binary(). +not_(Rd, Rs) -> + xori(Rd, Rs, -1). + +%% NEG - Negate (two's complement) +%% Expands to: sub rd, x0, rs +-spec neg(riscv_register(), riscv_register()) -> binary(). +neg(Rd, Rs) -> + sub(Rd, zero, Rs). + +%% J - Unconditional Jump +%% Expands to: jal x0, offset +-spec j(integer()) -> binary(). +j(Offset) -> + jal(zero, Offset). + +%% JR - Jump Register +%% Expands to: jalr x0, rs, 0 +-spec jr(riscv_register()) -> binary(). +jr(Rs) -> + jalr(zero, Rs, 0). + +%% RET - Return from subroutine +%% Expands to: jalr x0, ra, 0 +-spec ret() -> binary(). +ret() -> + jalr(zero, ra, 0). + +%% CALL - Call function (far call using AUIPC + JALR) +%% This is a two-instruction sequence for calling functions beyond JAL range +%% Expands to: auipc ra, offset[31:12]; jalr ra, ra, offset[11:0] +-spec call(riscv_register(), integer()) -> binary(). +call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF -> + % Split offset into upper 20 bits and lower 12 bits + Lower = Offset band 16#FFF, + % If lower 12 bits has sign bit set, we need to add 1 to upper + Upper = + if + Lower >= 16#800 -> + ((Offset bsr 12) + 1) band 16#FFFFF; + true -> + (Offset bsr 12) band 16#FFFFF + end, + % Sign extend lower 12 bits + LowerSigned = + if + Lower >= 16#800 -> Lower - 16#1000; + true -> Lower + end, + AuipcInstr = auipc(Rd, Upper), + JalrInstr = jalr(ra, Rd, LowerSigned), + <>; +call(_Rd, Offset) -> + error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}). + +%% MUL - Multiply (RV32M extension) +%% Multiplies rs1 by rs2 and places the lower 32 bits in rd +%% Format: mul rd, rs1, rs2 +%% Encoding: R-type with opcode=0x33, funct3=0x0, funct7=0x01 +-spec mul(riscv_register(), riscv_register(), riscv_register()) -> binary(). +mul(Rd, Rs1, Rs2) -> + % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001 + encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01). + +%%----------------------------------------------------------------------------- +%% C Extension (RV32C) - Compressed Instructions +%%----------------------------------------------------------------------------- +%% The C extension adds 16-bit compressed instructions to reduce code size. +%% All compressed instructions are 16 bits (2 bytes) and use a different +%% encoding format from the base 32-bit instructions. +%% +%% Register encoding for compressed instructions: +%% - Some instructions use the full 5-bit register encoding (x0-x31) +%% - Others use 3-bit encoding for registers x8-x15 (s0, s1, a0-a5) +%% This is called the "compressed register set" or "C register set" +%% +%% Instruction formats: +%% - CR (Register): funct4 | rd/rs1 | rs2 | op +%% - CI (Immediate): funct3 | imm | rd/rs1 | imm | op +%% - CSS (Stack Store): funct3 | imm | rs2 | op +%% - CIW (Wide Immediate): funct3 | imm | rd' | op +%% - CL (Load): funct3 | imm | rs1' | imm | rd' | op +%% - CS (Store): funct3 | imm | rs1' | imm | rs2' | op +%% - CA (Arithmetic): funct6 | rd'/rs1' | funct2 | rs2' | op +%% - CB (Branch): funct3 | offset | rs1' | offset | op +%% - CJ (Jump): funct3 | jump target | op +%% +%% See: RISC-V Instruction Set Manual, Volume I, Chapter 16 +%%----------------------------------------------------------------------------- + +%% Convert register to compressed register encoding (3 bits for x8-x15) +%% Returns the 3-bit encoding (0-7 maps to x8-x15) +-spec reg_to_c_num(riscv_register()) -> 0..7. +reg_to_c_num(s0) -> 0; +reg_to_c_num(fp) -> 0; +reg_to_c_num(s1) -> 1; +reg_to_c_num(a0) -> 2; +reg_to_c_num(a1) -> 3; +reg_to_c_num(a2) -> 4; +reg_to_c_num(a3) -> 5; +reg_to_c_num(a4) -> 6; +reg_to_c_num(a5) -> 7; +reg_to_c_num(Reg) -> error({register_not_in_compressed_set, Reg, 's0/fp, s1, a0-a5'}). + +%% Check if a register is in the compressed register set (s0/fp, s1, a0-a5) +-spec is_compressed_reg(riscv_register()) -> boolean(). +is_compressed_reg(s0) -> true; +is_compressed_reg(fp) -> true; +is_compressed_reg(s1) -> true; +is_compressed_reg(a0) -> true; +is_compressed_reg(a1) -> true; +is_compressed_reg(a2) -> true; +is_compressed_reg(a3) -> true; +is_compressed_reg(a4) -> true; +is_compressed_reg(a5) -> true; +is_compressed_reg(_) -> false. + +%%----------------------------------------------------------------------------- +%% CR-type instruction encoding (Compressed Register format) +%%----------------------------------------------------------------------------- +%% CR format: funct4 (4) | rd/rs1 (5) | rs2 (5) | op (2) +%% Bits: 15-12 11-7 6-2 1-0 + +-spec encode_cr_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct4 :: 0..15 +) -> binary(). +encode_cr_type(Opcode, Rd, Rs2, Funct4) -> + RdNum = reg_to_num(Rd), + Rs2Num = reg_to_num(Rs2), + Instr = + (Funct4 bsl 12) bor + (RdNum bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CI-type instruction encoding (Compressed Immediate format) +%%----------------------------------------------------------------------------- +%% CI format: funct3 (3) | imm[5] (1) | rd/rs1 (5) | imm[4:0] (5) | op (2) +%% Bits: 15-13 12 11-7 6-2 1-0 + +-spec encode_ci_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ci_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_num(Rd), + % Extract immediate bits + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (Funct3 bsl 13) bor + (Imm5 bsl 12) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CSS-type instruction encoding (Compressed Stack Store format) +%%----------------------------------------------------------------------------- +%% CSS format: funct3 (3) | imm[5:0] (6) | rs2 (5) | op (2) +%% Bits: 15-13 12-7 6-2 1-0 + +-spec encode_css_type( + Opcode :: 0..3, + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_css_type(Opcode, Rs2, Imm, Funct3) -> + Rs2Num = reg_to_num(Rs2), + % Extract immediate bits (typically scaled for word access) + ImmMasked = Imm band 16#3F, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 7) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CIW-type instruction encoding (Compressed Wide Immediate format) +%%----------------------------------------------------------------------------- +%% CIW format: funct3 (3) | imm[7:0] (8) | rd' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 + +-spec encode_ciw_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_ciw_type(Opcode, Rd, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#FF, + Instr = + (Funct3 bsl 13) bor + (ImmMasked bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CL-type instruction encoding (Compressed Load format) +%%----------------------------------------------------------------------------- +%% CL format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rd' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cl_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs1 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cl_type(Opcode, Rd, Rs1, Imm, Funct3) -> + RdNum = reg_to_c_num(Rd), + Rs1Num = reg_to_c_num(Rs1), + % For LW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (RdNum bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CS-type instruction encoding (Compressed Store format) +%%----------------------------------------------------------------------------- +%% CS format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rs2' (3) | op (2) +%% Bits: 15-13 12-10 9-7 6-5 4-2 1-0 + +-spec encode_cs_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Rs2 :: riscv_register(), + Imm :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cs_type(Opcode, Rs1, Rs2, Imm, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + Rs2Num = reg_to_c_num(Rs2), + % For SW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5 + ImmMasked = Imm band 16#7F, + Imm5_3 = (ImmMasked bsr 3) band 7, + Imm2 = (ImmMasked bsr 2) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Instr = + (Funct3 bsl 13) bor + (Imm5_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Imm2 bsl 6) bor + (Imm6 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CA-type instruction encoding (Compressed Arithmetic format) +%%----------------------------------------------------------------------------- +%% CA format: funct6 (6) | rd'/rs1' (3) | funct2 (2) | rs2' (3) | op (2) +%% Bits: 15-10 9-7 6-5 4-2 1-0 + +-spec encode_ca_type( + Opcode :: 0..3, + Rd :: riscv_register(), + Rs2 :: riscv_register(), + Funct2 :: 0..3, + Funct6 :: 0..63 +) -> binary(). +encode_ca_type(Opcode, Rd, Rs2, Funct2, Funct6) -> + RdNum = reg_to_c_num(Rd), + Rs2Num = reg_to_c_num(Rs2), + Instr = + (Funct6 bsl 10) bor + (RdNum bsl 7) bor + (Funct2 bsl 5) bor + (Rs2Num bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CB-type instruction encoding (Compressed Branch format) +%%----------------------------------------------------------------------------- +%% CB format: funct3 (3) | offset (8) | rs1' (3) | op (2) +%% Bits: 15-13 12-5 4-2 1-0 +%% Offset encoding: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + +-spec encode_cb_type( + Opcode :: 0..3, + Rs1 :: riscv_register(), + Offset :: integer(), + Funct3 :: 0..7 +) -> binary(). +encode_cb_type(Opcode, Rs1, Offset, Funct3) -> + Rs1Num = reg_to_c_num(Rs1), + % Extract offset bits: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2] + OffsetMasked = Offset band 16#1FF, + Offset8 = (OffsetMasked bsr 8) band 1, + Offset4_3 = (OffsetMasked bsr 3) band 3, + Offset7_6 = (OffsetMasked bsr 6) band 3, + Offset2_1 = (OffsetMasked bsr 1) band 3, + Offset5 = (OffsetMasked bsr 5) band 1, + Instr = + (Funct3 bsl 13) bor + (Offset8 bsl 12) bor + (Offset4_3 bsl 10) bor + (Rs1Num bsl 7) bor + (Offset7_6 bsl 5) bor + (Offset2_1 bsl 3) bor + (Offset5 bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% CJ-type instruction encoding (Compressed Jump format) +%%----------------------------------------------------------------------------- +%% CJ format: funct3 (3) | jump target (11) | op (2) +%% Bits: 15-13 12-2 1-0 +%% Target encoding: target[11|4|9:8|10|6|7|3:1|5] -> bits [12|11|10:9|8|7|6|5:3|2] + +-spec encode_cj_type(Opcode :: 0..3, Offset :: integer(), Funct3 :: 0..7) -> binary(). +encode_cj_type(Opcode, Offset, Funct3) -> + % Extract offset bits: offset[11|4|9:8|10|6|7|3:1|5] + OffsetMasked = Offset band 16#FFF, + Offset11 = (OffsetMasked bsr 11) band 1, + Offset4 = (OffsetMasked bsr 4) band 1, + Offset9_8 = (OffsetMasked bsr 8) band 3, + Offset10 = (OffsetMasked bsr 10) band 1, + Offset6 = (OffsetMasked bsr 6) band 1, + Offset7 = (OffsetMasked bsr 7) band 1, + Offset3_1 = (OffsetMasked bsr 1) band 7, + Offset5 = (OffsetMasked bsr 5) band 1, + OffsetBits = + (Offset11 bsl 10) bor + (Offset4 bsl 9) bor + (Offset9_8 bsl 7) bor + (Offset10 bsl 6) bor + (Offset6 bsl 5) bor + (Offset7 bsl 4) bor + (Offset3_1 bsl 1) bor + Offset5, + Instr = + (Funct3 bsl 13) bor + (OffsetBits bsl 2) bor + Opcode, + <>. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical Instructions +%%----------------------------------------------------------------------------- + +%% C.ADD - Compressed Add +%% rd = rd + rs2 (both rd and rs2 are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), op=10 (0x2) +-spec c_add(riscv_register(), riscv_register()) -> binary(). +c_add(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#9). + +%% C.MV - Compressed Move (copy register) +%% rd = rs2 (both are full 5-bit registers) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), op=10 (0x2) +-spec c_mv(riscv_register(), riscv_register()) -> binary(). +c_mv(Rd, Rs2) -> + encode_cr_type(16#2, Rd, Rs2, 16#8). + +%% C.SUB - Compressed Subtract +%% rd' = rd' - rs2' (both use 3-bit compressed register encoding) +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=00, op=01 (0x1) +-spec c_sub(riscv_register(), riscv_register()) -> binary(). +c_sub(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#0, 16#23). + +%% C.AND - Compressed Bitwise AND +%% rd' = rd' & rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=11, op=01 (0x1) +-spec c_and(riscv_register(), riscv_register()) -> binary(). +c_and(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#3, 16#23). + +%% C.OR - Compressed Bitwise OR +%% rd' = rd' | rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=10, op=01 (0x1) +-spec c_or(riscv_register(), riscv_register()) -> binary(). +c_or(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#2, 16#23). + +%% C.XOR - Compressed Bitwise XOR +%% rd' = rd' ^ rs2' +%% Format: CA-type +%% Encoding: funct6=100011 (0x23), funct2=01, op=01 (0x1) +-spec c_xor(riscv_register(), riscv_register()) -> binary(). +c_xor(Rd, Rs2) -> + encode_ca_type(16#1, Rd, Rs2, 16#1, 16#23). + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate Instructions +%%----------------------------------------------------------------------------- + +%% C.ADDI - Compressed Add Immediate +%% rd = rd + imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=000, op=01 (0x1) +-spec c_addi(riscv_register(), integer()) -> binary(). +c_addi(Rd, Imm) when Imm >= -32, Imm =< 31, Rd =/= zero -> + encode_ci_type(16#1, Rd, Imm, 16#0); +c_addi(zero, _Imm) -> + error({invalid_compressed_instruction, c_addi, 'rd cannot be zero'}); +c_addi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ANDI - Compressed AND Immediate +%% rd' = rd' & imm (rd' uses 3-bit encoding, imm is 6-bit signed) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, imm[5]=bit12, funct2=10, imm[4:0]=bits 6:2, op=01 +-spec c_andi(riscv_register(), integer()) -> binary(). +c_andi(Rd, Imm) when Imm >= -32, Imm =< 31 -> + RdNum = reg_to_c_num(Rd), + ImmMasked = Imm band 16#3F, + Imm5 = (ImmMasked bsr 5) band 1, + Imm4_0 = ImmMasked band 16#1F, + Instr = + (16#4 bsl 13) bor + (Imm5 bsl 12) bor + (16#2 bsl 10) bor + (RdNum bsl 7) bor + (Imm4_0 bsl 2) bor + 16#1, + <>; +c_andi(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LI - Compressed Load Immediate +%% rd = imm (rd is full 5-bit register, imm is 6-bit signed) +%% Format: CI-type +%% Encoding: funct3=010, op=01 (0x1) +-spec c_li(riscv_register(), integer()) -> binary(). +c_li(Rd, Imm) when Imm >= -32, Imm =< 31 -> + encode_ci_type(16#1, Rd, Imm, 16#2); +c_li(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.LUI - Compressed Load Upper Immediate +%% rd = imm << 12 (rd is full 5-bit register, imm is 6-bit signed non-zero) +%% Format: CI-type +%% Encoding: funct3=011, op=01 (0x1) +-spec c_lui(riscv_register(), integer()) -> binary(). +c_lui(Rd, Imm) when Imm >= -32, Imm =< 31, Imm =/= 0, Rd =/= zero, Rd =/= sp -> + encode_ci_type(16#1, Rd, Imm, 16#3); +c_lui(Rd, _Imm) when Rd =:= zero; Rd =:= sp -> + error({invalid_compressed_instruction, c_lui, 'rd cannot be zero or sp'}); +c_lui(_Rd, 0) -> + error({invalid_compressed_instruction, c_lui, 'immediate cannot be zero'}); +c_lui(_Rd, Imm) -> + error({immediate_out_of_range, Imm, -32, 31}). + +%% C.ADDI16SP - Compressed Add Immediate to SP (scaled by 16) +%% sp = sp + imm (imm is 10-bit signed, must be multiple of 16, non-zero) +%% Format: CI-type (special encoding) +%% Encoding: funct3=011, rd/rs1=sp (x2), op=01 +-spec c_addi16sp(integer()) -> binary(). +c_addi16sp(Imm) when + Imm >= -512, Imm =< 496, (Imm rem 16) =:= 0, Imm =/= 0 +-> + % Immediate encoding: nzimm[9|4|6|8:7|5] -> bits [12|6|5|4:3|2] + ImmMasked = Imm band 16#3FF, + Imm9 = (ImmMasked bsr 9) band 1, + Imm4 = (ImmMasked bsr 4) band 1, + Imm6 = (ImmMasked bsr 6) band 1, + Imm8_7 = (ImmMasked bsr 7) band 3, + Imm5 = (ImmMasked bsr 5) band 1, + ImmBits = (Imm9 bsl 5) bor (Imm4 bsl 4) bor (Imm6 bsl 3) bor (Imm8_7 bsl 1) bor Imm5, + encode_ci_type(16#1, sp, ImmBits, 16#3); +c_addi16sp(0) -> + error({invalid_compressed_instruction, c_addi16sp, 'immediate cannot be zero'}); +c_addi16sp(Imm) when (Imm rem 16) =/= 0 -> + error({immediate_not_aligned, Imm, 16}); +c_addi16sp(Imm) -> + error({immediate_out_of_range, Imm, -512, 496}). + +%% C.ADDI4SPN - Compressed Add Immediate (scaled by 4) to SP, store in rd' +%% rd' = sp + imm (imm is 10-bit unsigned, must be multiple of 4, non-zero) +%% Format: CIW-type +%% Encoding: funct3=000, op=00 (0x0) +-spec c_addi4spn(riscv_register(), integer()) -> binary(). +c_addi4spn(Rd, Imm) when + Imm >= 4, Imm =< 1020, (Imm rem 4) =:= 0 +-> + % Immediate encoding: nzuimm[5:4|9:6|2|3] -> bits [12:11|10:7|6|5] + ImmMasked = Imm band 16#3FF, + Imm5_4 = (ImmMasked bsr 4) band 3, + Imm9_6 = (ImmMasked bsr 6) band 15, + Imm2 = (ImmMasked bsr 2) band 1, + Imm3 = (ImmMasked bsr 3) band 1, + ImmBits = (Imm5_4 bsl 6) bor (Imm9_6 bsl 2) bor (Imm2 bsl 1) bor Imm3, + encode_ciw_type(16#0, Rd, ImmBits, 16#0); +c_addi4spn(_Rd, Imm) when Imm =:= 0 -> + error({invalid_compressed_instruction, c_addi4spn, 'immediate cannot be zero'}); +c_addi4spn(_Rd, Imm) when (Imm rem 4) =/= 0 -> + error({immediate_not_aligned, Imm, 4}); +c_addi4spn(_Rd, Imm) -> + error({immediate_out_of_range, Imm, 4, 1020}). + +%%----------------------------------------------------------------------------- +%% C Extension - Shift Instructions +%%----------------------------------------------------------------------------- + +%% C.SLLI - Compressed Shift Left Logical Immediate +%% rd = rd << shamt (rd is full 5-bit register, shamt is 6-bit unsigned) +%% Format: CI-type +%% Encoding: funct3=000, op=10 (0x2) +-spec c_slli(riscv_register(), 0..63) -> binary(). +c_slli(Rd, Shamt) when Shamt >= 0, Shamt =< 63, Rd =/= zero -> + encode_ci_type(16#2, Rd, Shamt, 16#0); +c_slli(zero, _Shamt) -> + error({invalid_compressed_instruction, c_slli, 'rd cannot be zero'}); +c_slli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRLI - Compressed Shift Right Logical Immediate +%% rd' = rd' >> shamt (rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=00, shamt[4:0]=bits 6:2, op=01 +-spec c_srli(riscv_register(), 0..63) -> binary(). +c_srli(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#0 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srli(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%% C.SRAI - Compressed Shift Right Arithmetic Immediate +%% rd' = rd' >> shamt (sign-extend, rd' uses 3-bit encoding, shamt is 6-bit unsigned) +%% Format: CB-type (with special encoding) +%% Encoding: funct3=100, shamt[5]=bit12, funct2=01, shamt[4:0]=bits 6:2, op=01 +-spec c_srai(riscv_register(), 0..63) -> binary(). +c_srai(Rd, Shamt) when Shamt >= 0, Shamt =< 63 -> + RdNum = reg_to_c_num(Rd), + Shamt5 = (Shamt bsr 5) band 1, + Shamt4_0 = Shamt band 16#1F, + Instr = + (16#4 bsl 13) bor + (Shamt5 bsl 12) bor + (16#1 bsl 10) bor + (RdNum bsl 7) bor + (Shamt4_0 bsl 2) bor + 16#1, + <>; +c_srai(_Rd, Shamt) -> + error({shift_amount_out_of_range, Shamt, 0, 63}). + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store Instructions +%%----------------------------------------------------------------------------- + +%% C.LW - Compressed Load Word +%% rd' = mem[rs1' + offset] (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CL-type +%% Encoding: funct3=010, op=00 (0x0) +-spec c_lw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_lw(Rd, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cl_type(16#0, Rd, Rs1, Offset, 16#2); +c_lw(_Rd, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lw(_Rd, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.SW - Compressed Store Word +%% mem[rs1' + offset] = rs2' (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4) +%% Format: CS-type +%% Encoding: funct3=110, op=00 (0x0) +-spec c_sw(riscv_register(), {riscv_register(), integer()}) -> binary(). +c_sw(Rs2, {Rs1, Offset}) when + Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0 +-> + encode_cs_type(16#0, Rs1, Rs2, Offset, 16#6); +c_sw(_Rs2, {_Rs1, Offset}) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_sw(_Rs2, {_Rs1, Offset}) -> + error({offset_out_of_range, Offset, 0, 124}). + +%% C.LWSP - Compressed Load Word from Stack Pointer +%% rd = mem[sp + offset] (rd is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CI-type (special encoding) +%% Encoding: funct3=010, op=10 (0x2) +-spec c_lwsp(riscv_register(), integer()) -> binary(). +c_lwsp(Rd, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0, Rd =/= zero +-> + % Offset encoding: offset[5|4:2|7:6] -> bits [12|6:4|3:2] + OffsetMasked = Offset band 16#FF, + Offset5 = (OffsetMasked bsr 5) band 1, + Offset4_2 = (OffsetMasked bsr 2) band 7, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5 bsl 5) bor (Offset4_2 bsl 2) bor Offset7_6, + encode_ci_type(16#2, Rd, ImmBits, 16#2); +c_lwsp(zero, _Offset) -> + error({invalid_compressed_instruction, c_lwsp, 'rd cannot be zero'}); +c_lwsp(_Rd, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_lwsp(_Rd, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%% C.SWSP - Compressed Store Word to Stack Pointer +%% mem[sp + offset] = rs2 (rs2 is full 5-bit register, offset is 8-bit unsigned, multiple of 4) +%% Format: CSS-type +%% Encoding: funct3=110, op=10 (0x2) +-spec c_swsp(riscv_register(), integer()) -> binary(). +c_swsp(Rs2, Offset) when + Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0 +-> + % Offset encoding: offset[5:2|7:6] -> bits [12:9|8:7] + OffsetMasked = Offset band 16#FF, + Offset5_2 = (OffsetMasked bsr 2) band 15, + Offset7_6 = (OffsetMasked bsr 6) band 3, + ImmBits = (Offset5_2 bsl 2) bor Offset7_6, + encode_css_type(16#2, Rs2, ImmBits, 16#6); +c_swsp(_Rs2, Offset) when (Offset rem 4) =/= 0 -> + error({offset_not_aligned, Offset, 4}); +c_swsp(_Rs2, Offset) -> + error({offset_out_of_range, Offset, 0, 252}). + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump Instructions +%%----------------------------------------------------------------------------- + +%% C.BEQZ - Compressed Branch if Equal to Zero +%% if (rs1' == 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=110, op=01 (0x1) +-spec c_beqz(riscv_register(), integer()) -> binary(). +c_beqz(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#6); +c_beqz(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_beqz(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.BNEZ - Compressed Branch if Not Equal to Zero +%% if (rs1' != 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2) +%% Format: CB-type +%% Encoding: funct3=111, op=01 (0x1) +-spec c_bnez(riscv_register(), integer()) -> binary(). +c_bnez(Rs1, Offset) when + Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 +-> + encode_cb_type(16#1, Rs1, Offset, 16#7); +c_bnez(_Rs1, Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_bnez(_Rs1, Offset) -> + error({offset_out_of_range, Offset, -256, 254}). + +%% C.J - Compressed Unconditional Jump +%% pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=101, op=01 (0x1) +-spec c_j(integer()) -> binary(). +c_j(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#5); +c_j(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_j(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JAL - Compressed Jump and Link (RV32C only, rd is implicitly ra) +%% ra = pc + 2; pc += offset (offset is 12-bit signed, multiple of 2) +%% Format: CJ-type +%% Encoding: funct3=001 (0x1), op=01 (0x1) +-spec c_jal(integer()) -> binary(). +c_jal(Offset) when + Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 +-> + encode_cj_type(16#1, Offset, 16#1); +c_jal(Offset) when (Offset rem 2) =/= 0 -> + error({offset_not_aligned, Offset, 2}); +c_jal(Offset) -> + error({offset_out_of_range, Offset, -2048, 2046}). + +%% C.JR - Compressed Jump Register +%% pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1000 (0x8), rs2=x0, op=10 (0x2) +-spec c_jr(riscv_register()) -> binary(). +c_jr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#8); +c_jr(zero) -> + error({invalid_compressed_instruction, c_jr, 'rs1 cannot be zero'}). + +%% C.JALR - Compressed Jump and Link Register +%% ra = pc + 2; pc = rs1 (rs1 is full 5-bit register, must not be zero) +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs2=x0, op=10 (0x2) +-spec c_jalr(riscv_register()) -> binary(). +c_jalr(Rs1) when Rs1 =/= zero -> + encode_cr_type(16#2, Rs1, zero, 16#9); +c_jalr(zero) -> + error({invalid_compressed_instruction, c_jalr, 'rs1 cannot be zero'}). + +%% C.EBREAK - Compressed Environment Breakpoint +%% Causes a breakpoint exception to be raised +%% Format: CR-type +%% Encoding: funct4=1001 (0x9), rs1/rd=x0, rs2=x0, op=10 (0x2) +-spec c_ebreak() -> binary(). +c_ebreak() -> + encode_cr_type(16#2, zero, zero, 16#9). + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instructions +%%----------------------------------------------------------------------------- + +%% C.NOP - Compressed No Operation +%% Expands to: c.addi x0, 0 +%% Format: CI-type +%% Encoding: funct3=000, rd/rs1=x0, imm=0, op=01 (0x1) +-spec c_nop() -> binary(). +c_nop() -> + encode_ci_type(16#1, zero, 0, 16#0). diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl index 5f54e6e512..9ef6c6441f 100644 --- a/libs/jit/src/jit_x86_64.erl +++ b/libs/jit/src/jit_x86_64.erl @@ -37,6 +37,8 @@ call_primitive_with_cp/3, return_if_not_equal_to_ctx/2, jump_to_label/2, + jump_to_continuation/2, + jump_to_offset/2, if_block/3, if_else_block/4, shift_right/3, @@ -70,10 +72,26 @@ add_label/3 ]). +-ifdef(JIT_DWARF). +-export([ + dwarf_opcode/2, + dwarf_label/2, + dwarf_function/3, + dwarf_line/2, + dwarf_ctx_register/0 +]). +-endif. + +-compile([warnings_as_errors]). + -include_lib("jit.hrl"). -include("primitives.hrl"). +-ifdef(JIT_DWARF). +-include("jit_dwarf.hrl"). +-endif. + -define(ASSERT(Expr), true = Expr). %% System V X86_64 calling conventions which we apply here. @@ -114,7 +132,8 @@ branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}], available_regs :: [x86_64_register()], used_regs :: [x86_64_register()], - labels :: [{integer() | reference(), integer()}] + labels :: [{integer() | reference(), integer()}], + variant :: non_neg_integer() }). -type state() :: #state{}. @@ -138,7 +157,8 @@ | {'(int)', maybe_free_x86_64_register(), '!=', x86_64_register() | integer()} | {'(bool)', maybe_free_x86_64_register(), '==', false} | {'(bool)', maybe_free_x86_64_register(), '!=', false} - | {maybe_free_x86_64_register(), '&', non_neg_integer(), '!=', integer()}. + | {maybe_free_x86_64_register(), '&', non_neg_integer(), '!=', integer()} + | {{free, x86_64_register()}, '==', {free, x86_64_register()}}. -define(WORD_SIZE, 8). @@ -156,6 +176,13 @@ -define(X_REG(N), {16#30 + (N * ?WORD_SIZE), ?CTX_REG}). -define(CP, {16#B8, ?CTX_REG}). -define(FP_REGS, {16#C0, ?CTX_REG}). +-define(FP_REG_OFFSET(State, F), + (F * + case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of + 0 -> 8; + _ -> 4 + end) +). -define(BS, {16#C8, ?CTX_REG}). -define(BS_OFFSET, {16#D0, ?CTX_REG}). -define(JITSTATE_MODULE, {0, ?JITSTATE_REG}). @@ -173,6 +200,8 @@ -define(PARAMETER_REGS, [rdi, rsi, rdx, rcx, r8, r9]). -define(SCRATCH_REGS, [rdi, rsi, rdx, rcx, r8, r9, r10, r11]). +-include("jit_backend_dwarf_impl.hrl"). + %%----------------------------------------------------------------------------- %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e. %% sizeof(uintptr_t) @@ -201,7 +230,7 @@ word_size() -> ?WORD_SIZE. %% @return New backend state %%----------------------------------------------------------------------------- -spec new(any(), module(), stream()) -> state(). -new(_Variant, StreamModule, Stream) -> +new(Variant, StreamModule, Stream) -> #state{ stream_module = StreamModule, stream = Stream, @@ -209,7 +238,8 @@ new(_Variant, StreamModule, Stream) -> offset = StreamModule:offset(Stream), available_regs = ?AVAILABLE_REGS, used_regs = [], - labels = [] + labels = [], + variant = Variant }. %%----------------------------------------------------------------------------- @@ -513,6 +543,49 @@ jump_to_label( State#state{stream = Stream1, branches = [Reloc | AccBranches]} end. +jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) -> + Offset = StreamModule:offset(Stream0), + RelOffset = TargetOffset - Offset, + I1 = jit_x86_64_asm:jmp(RelOffset), + Stream1 = StreamModule:append(Stream0, I1), + State#state{stream = Stream1}. + +%%----------------------------------------------------------------------------- +%% @doc Jump to a continuation address stored in a register. +%% This is used for optimized intra-module returns. +%% @end +%% @param State current backend state +%% @param OffsetReg register containing the continuation offset +%% @return Updated backend state +%%----------------------------------------------------------------------------- +jump_to_continuation( + #state{ + stream_module = StreamModule, + stream = Stream0, + offset = BaseOffset, + available_regs = [TempReg | _] + } = State, + {free, OffsetReg} +) -> + % Calculate absolute address: native_code_base + target_offset + % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset) + % Similar to aarch64 approach but using leaq for PC-relative addressing + CurrentStreamOffset = StreamModule:offset(Stream0), + NetOffset = BaseOffset - CurrentStreamOffset - 7, + + % Get native code base address using PC-relative lea: leaq NetOffset(%rip), TempReg + I1 = jit_x86_64_asm:leaq({rip, NetOffset}, TempReg), + 7 = byte_size(I1), + % Add target offset to get final absolute address: addq OffsetReg, TempReg + I2 = jit_x86_64_asm:addq(OffsetReg, TempReg), + % Indirect jump to the calculated absolute address: jmpq *TempReg + I3 = jit_x86_64_asm:jmpq({TempReg}), + + Code = <>, + Stream1 = StreamModule:append(Stream0, Code), + % Free all registers since this is a tail jump + State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}. + %%----------------------------------------------------------------------------- %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally %% execute a block. @@ -722,6 +795,14 @@ if_block_cond0( {RelocJZOffset, I3} = jit_x86_64_asm:jnz_rel8(1), State1 = if_block_free_reg(RegOrTuple, State0), {State1, <>, byte_size(I1) + byte_size(I2) + RelocJZOffset}; +if_block_cond0(State0, {{free, Reg1}, '==', {free, Reg2}}) -> + % Compare two free registers + I1 = jit_x86_64_asm:cmpq(Reg2, Reg1), + {RelocJNZOffset, I2} = jit_x86_64_asm:jnz_rel8(1), + % Free both registers + State1 = if_block_free_reg({free, Reg1}, State0), + State2 = if_block_free_reg({free, Reg2}, State1), + {State2, <>, byte_size(I1) + RelocJNZOffset}; if_block_cond0( State0, {'(int)', RegOrTuple, '==', Val} @@ -822,12 +903,30 @@ merge_used_regs(State, []) -> %% @param Shift number of bits to shift %% @return new state %%----------------------------------------------------------------------------- -shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when +-spec shift_right(#state{}, maybe_free_x86_64_register(), non_neg_integer()) -> + {#state{}, x86_64_register()}. +shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when ?IS_GPR(Reg) andalso is_integer(Shift) -> I = jit_x86_64_asm:shrq(Shift, Reg), Stream1 = StreamModule:append(Stream0, I), - State#state{stream = Stream1}. + {State#state{stream = Stream1}, Reg}; +shift_right( + #state{ + stream_module = StreamModule, + available_regs = [ResultReg | T], + used_regs = UR, + stream = Stream0 + } = State, + Reg, + Shift +) when + ?IS_GPR(Reg) andalso is_integer(Shift) +-> + I1 = jit_x86_64_asm:movq(Reg, ResultReg), + I2 = jit_x86_64_asm:shrq(Shift, ResultReg), + Stream1 = StreamModule:append(Stream0, <>), + {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}. %%----------------------------------------------------------------------------- %% @doc Emit a shift register left by a fixed number of bits, effectively @@ -1248,7 +1347,7 @@ move_to_vm_register( ) when is_atom(Reg) -> I1 = jit_x86_64_asm:movq({8, Reg}, Reg), I2 = jit_x86_64_asm:movq(?FP_REGS, Temp), - I3 = jit_x86_64_asm:movq(Reg, {F * 8, Temp}), + I3 = jit_x86_64_asm:movq(Reg, {?FP_REG_OFFSET(State0, F), Temp}), Code = <>, Stream1 = StreamModule:append(Stream0, Code), State1 = free_native_register(State0, Reg), @@ -1562,7 +1661,19 @@ move_to_array_element( Stream1 = StreamModule:append(Stream0, I1), State#state{stream = Stream1}. --spec move_to_native_register(state(), value()) -> {state(), x86_64_register()}. +-spec move_to_native_register(state(), value() | cp) -> {state(), x86_64_register()}. +move_to_native_register( + #state{ + stream_module = StreamModule, + stream = Stream0, + available_regs = [Reg | AvailT], + used_regs = Used + } = State, + cp +) -> + I1 = jit_x86_64_asm:movq(?CP, Reg), + Stream1 = StreamModule:append(Stream0, I1), + {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg}; move_to_native_register(State, Reg) when is_atom(Reg) -> {State, Reg}; move_to_native_register( @@ -1961,3 +2072,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label) -spec add_label(state(), integer() | reference(), integer()) -> state(). add_label(#state{labels = Labels} = State, Label, Offset) -> State#state{labels = [{Label, Offset} | Labels]}. + +-ifdef(JIT_DWARF). +%%----------------------------------------------------------------------------- +%% @doc Return the DWARF register number for the ctx parameter +%% @returns The DWARF register number where ctx is passed (rdi in x86_64) +%% @end +%%----------------------------------------------------------------------------- +-spec dwarf_ctx_register() -> non_neg_integer(). +dwarf_ctx_register() -> + ?DWARF_RDI_REG_X86_64. +-endif. diff --git a/src/libAtomVM/CMakeLists.txt b/src/libAtomVM/CMakeLists.txt index 2a5c58bd05..fec49b6bdf 100644 --- a/src/libAtomVM/CMakeLists.txt +++ b/src/libAtomVM/CMakeLists.txt @@ -179,6 +179,9 @@ endif() if (NOT AVM_DISABLE_JIT AND NOT AVM_ENABLE_PRECOMPILED) target_compile_definitions(libAtomVM PUBLIC AVM_NO_EMU) endif() +if (AVM_DISABLE_JIT_DWARF OR AVM_DISABLE_JIT) + target_compile_definitions(libAtomVM PUBLIC AVM_NO_JIT_DWARF) +endif() if(HAVE_PLATFORM_SMP_H) target_compile_definitions(libAtomVM PUBLIC HAVE_PLATFORM_SMP_H) diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def index 04aff1f840..db7914438b 100644 --- a/src/libAtomVM/defaultatoms.def +++ b/src/libAtomVM/defaultatoms.def @@ -208,3 +208,5 @@ X(CODE_SERVER_ATOM, "\xB", "code_server") X(LOAD_ATOM, "\x4", "load") X(JIT_X86_64_ATOM, "\xA", "jit_x86_64") X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64") +X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m") +X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32") diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c index 1d63f4b836..fb5014df87 100644 --- a/src/libAtomVM/jit.c +++ b/src/libAtomVM/jit.c @@ -38,8 +38,139 @@ #include #include +#ifndef AVM_NO_JIT_DWARF +#include +#include + +#if TERM_BYTES == 4 +// ELF32 structures +typedef struct +{ + unsigned char e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint32_t e_entry; + uint32_t e_phoff; + uint32_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf_Ehdr; + +typedef struct +{ + uint32_t sh_name; + uint32_t sh_type; + uint32_t sh_flags; + uint32_t sh_addr; + uint32_t sh_offset; + uint32_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint32_t sh_addralign; + uint32_t sh_entsize; +} Elf_Shdr; + +typedef struct +{ + uint32_t st_name; + uint32_t st_value; + uint32_t st_size; + unsigned char st_info; + unsigned char st_other; + uint16_t st_shndx; +} Elf_Sym; + +typedef struct +{ + uint32_t p_type; + uint32_t p_offset; + uint32_t p_vaddr; + uint32_t p_paddr; + uint32_t p_filesz; + uint32_t p_memsz; + uint32_t p_flags; + uint32_t p_align; +} Elf_Phdr; +#elif TERM_BYTES == 8 +// ELF64 structures +typedef struct +{ + unsigned char e_ident[16]; + uint16_t e_type; + uint16_t e_machine; + uint32_t e_version; + uint64_t e_entry; + uint64_t e_phoff; + uint64_t e_shoff; + uint32_t e_flags; + uint16_t e_ehsize; + uint16_t e_phentsize; + uint16_t e_phnum; + uint16_t e_shentsize; + uint16_t e_shnum; + uint16_t e_shstrndx; +} Elf_Ehdr; + +typedef struct +{ + uint32_t sh_name; + uint32_t sh_type; + uint64_t sh_flags; + uint64_t sh_addr; + uint64_t sh_offset; + uint64_t sh_size; + uint32_t sh_link; + uint32_t sh_info; + uint64_t sh_addralign; + uint64_t sh_entsize; +} Elf_Shdr; + +typedef struct +{ + uint32_t st_name; + unsigned char st_info; + unsigned char st_other; + uint16_t st_shndx; + uint64_t st_value; + uint64_t st_size; +} Elf_Sym; + +typedef struct +{ + uint32_t p_type; + uint32_t p_flags; + uint64_t p_offset; + uint64_t p_vaddr; + uint64_t p_paddr; + uint64_t p_filesz; + uint64_t p_memsz; + uint64_t p_align; +} Elf_Phdr; +#else +#error TERM_BYTES should be 4 or 8 +#endif + +// ELF constants +#define SHT_SYMTAB 2 +#define SHT_STRTAB 3 +#define STT_FUNC 2 +#define STB_GLOBAL 1 +#define PT_LOAD 1 +#define PF_X 1 // Execute +#define PF_R 4 // Read + +// ELF symbol type extraction +#define ELF_ST_TYPE(info) ((info) & 0xf) + +#endif -// #define ENABLE_TRACE +#define ENABLE_TRACE #include "trace.h" // Verify matching atom index in default_atoms.hrl @@ -83,10 +214,44 @@ _Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_aarch64.erl"); _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_aarch64.erl"); _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_aarch64.erl"); +#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M +_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_armv6m.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_armv6m.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_armv6m.erl"); + +_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); + +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 +_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_riscv32.erl"); + +_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_riscv32.erl"); +_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_riscv32.erl"); + +_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits"); + #else #error Unknown jit target #endif +#ifdef AVM_USE_SINGLE_PRECISION +_Static_assert(sizeof(avm_float_t) == 0x4, "sizeof(avm_float_t) is 0x4 for single precision"); +#else +_Static_assert(sizeof(avm_float_t) == 0x8, "sizeof(avm_float_t) is 0x8 for double precision"); +#endif + #define PROCESS_MAYBE_TRAP_RETURN_VALUE(return_value, offset) \ if (term_is_invalid_term(return_value)) { \ if (UNLIKELY(!context_get_flags(ctx, Trap))) { \ @@ -124,7 +289,7 @@ static void destroy_extended_registers(Context *ctx, unsigned int live) static void jit_trim_live_regs(Context *ctx, uint32_t live) { - TRACE("jit_trim_live_regs: ctx->process_id = %d, live = %d\n", ctx->process_id, live); + TRACE("jit_trim_live_regs: ctx->process_id = %" PRId32 ", live = %" PRIu32 "\n", ctx->process_id, live); if (UNLIKELY(!list_is_empty(&ctx->extended_x_regs))) { destroy_extended_registers(ctx, live); } @@ -164,8 +329,8 @@ static Context *jit_return(Context *ctx, JITState *jit_state) static Context *jit_terminate_context(Context *ctx, JITState *jit_state) { - TRACE("jit_terminate_context: ctx->process_id = %d\n", ctx->process_id); - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 "\n", ctx->process_id); + TRACE("-- Code execution finished for %" PRId32 "--\n", ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); @@ -177,7 +342,7 @@ static Context *jit_terminate_context(Context *ctx, JITState *jit_state) static Context *jit_handle_error(Context *ctx, JITState *jit_state, int offset) { - TRACE("jit_terminate_context: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_terminate_context: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); if (offset || term_is_invalid_term(ctx->x[2])) { ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, ctx->x[0]); } @@ -244,14 +409,14 @@ static void set_error(Context *ctx, JITState *jit_state, int offset, term error_ static Context *jit_raise_error(Context *ctx, JITState *jit_state, int offset, term error_type_atom) { - TRACE("jit_raise_error: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); set_error(ctx, jit_state, offset, error_type_atom); return jit_handle_error(ctx, jit_state, 0); } static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int offset, term error_atom, term arg1) { - TRACE("jit_raise_error_tuple: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise_error_tuple: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); // We can gc as we are raising if (UNLIKELY(memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, &arg1, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { set_error(ctx, jit_state, offset, OUT_OF_MEMORY_ATOM); @@ -268,7 +433,7 @@ static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int off static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term stacktrace, term exc_value) { - TRACE("jit_raise: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset); + TRACE("jit_raise: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset); ctx->x[0] = stacktrace_exception_class(stacktrace); ctx->x[1] = exc_value; ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, stacktrace); @@ -277,7 +442,7 @@ static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term st static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_next_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_next_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -286,7 +451,7 @@ static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state) static Context *jit_schedule_wait_cp(Context *ctx, JITState *jit_state) { - TRACE("jit_schedule_wait_cp: ctx->process_id = %d\n", ctx->process_id); + TRACE("jit_schedule_wait_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id); ctx->saved_function_ptr = jit_state->continuation; ctx->saved_module = jit_state->module; jit_state->remaining_reductions = 0; @@ -424,7 +589,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = bif->bif2_ptr(ctx, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -454,7 +619,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int return_value = gcbif->gcbif2_ptr(ctx, 0, 0, ctx->x[0], ctx->x[1]); break; default: - fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity); + fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity); AVM_ABORT(); } PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset); @@ -477,7 +642,7 @@ static term jit_module_get_atom_term_by_id(JITState *jit_state, int atom_index) static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, uint32_t heap_need, uint32_t live) { - TRACE("jit_allocate: stack_need=%u heap_need=%u live=%u\n", stack_need, heap_need, live); + TRACE("jit_allocate: ENTRY ctx=%p jit_state=%p stack_need=%" PRIu32 " heap_need=%" PRIu32 " live=%" PRIu32 "\n", (void*)ctx, (void*)jit_state, stack_need, heap_need, live); if (ctx->heap.root->next || ((ctx->heap.heap_ptr + heap_need > ctx->e - (stack_need + 1)))) { TRIM_LIVE_REGS(live); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need + stack_need + 1, live, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { @@ -492,7 +657,7 @@ static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) { - TRACE("jit_get_imported_bif: bif=%u\n", bif); + TRACE("jit_get_imported_bif: bif=%" PRIu32 "\n", bif); const struct ExportedFunction *exported_bif = jit_state->module->imported_funcs[bif]; const BifImpl0 result = EXPORTED_FUNCTION_TO_BIF(exported_bif)->bif0_ptr; return result; @@ -500,7 +665,7 @@ static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif) static bool jit_deallocate(Context *ctx, JITState *jit_state, uint32_t n_words) { - TRACE("jit_deallocate: n_words=%u\n", n_words); + TRACE("jit_deallocate: n_words=%" PRIu32 "\n", n_words); ctx->cp = ctx->e[n_words]; ctx->e += n_words + 1; // Hopefully, we only need x[0] @@ -525,7 +690,7 @@ static TermCompareResult jit_term_compare(Context *ctx, JITState *jit_state, ter static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, uint32_t live_registers) { - TRACE("jit_test_heap: heap_need=%u live_registers=%u\n", heap_need, live_registers); + TRACE("jit_test_heap: heap_need=%" PRIu32 " live_registers=%" PRIu32 "\n", heap_need, live_registers); size_t heap_free = context_avail_free_memory(ctx); // if we need more heap space than is currently free, then try to GC the needed space if (heap_free < heap_need) { @@ -539,7 +704,7 @@ static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, } else if (heap_free > heap_need * HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF) { TRIM_LIVE_REGS(live_registers); if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need * (HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF / 2), live_registers, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) { - TRACE("Unable to ensure free memory. heap_need=%i\n", heap_need); + TRACE("Unable to ensure free memory. heap_need=%" PRIu32 "\n", heap_need); set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM); return false; } @@ -620,13 +785,13 @@ static term maybe_alloc_boxed_integer_fragment(Context *ctx, avm_int64_t value) static term jit_term_alloc_tuple(Context *ctx, uint32_t size) { - TRACE("jit_term_alloc_tuple: size=%u\n", size); + TRACE("jit_term_alloc_tuple: size=%" PRIu32 "\n", size); return term_alloc_tuple(size, &ctx->heap); } static term jit_term_alloc_fun(Context *ctx, JITState *jit_state, uint32_t fun_index, uint32_t numfree) { - TRACE("jit_term_alloc_fun: fun_index=%u numfree=%u\n", fun_index, numfree); + TRACE("jit_term_alloc_fun: fun_index=%" PRIu32 " numfree=%" PRIu32 "\n", fun_index, numfree); size_t size = numfree + BOXED_FUN_SIZE; term *boxed_func = memory_heap_alloc(&ctx->heap, size); @@ -832,7 +997,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state) static term jit_mailbox_peek(Context *ctx) { - TRACE("jit_mailbox_peek: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id); term out = term_invalid_term(); mailbox_peek(ctx, &out); return out; @@ -840,26 +1005,26 @@ static term jit_mailbox_peek(Context *ctx) static void jit_mailbox_remove_message(Context *ctx) { - TRACE("jit_mailbox_remove_message: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_remove_message(&ctx->mailbox, &ctx->heap); } static void jit_timeout(Context *ctx) { - TRACE("jit_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); mailbox_reset(&ctx->mailbox); } static void jit_mailbox_next(Context *ctx) { - TRACE("jit_mailbox_next: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_mailbox_next: ctx->process_id=%" PRId32 "\n", ctx->process_id); mailbox_next(&ctx->mailbox); } static void jit_cancel_timeout(Context *ctx) { - TRACE("jit_cancel_timeout: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_cancel_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id); if (context_get_flags(ctx, WaitingTimeout | WaitingTimeoutExpired)) { scheduler_cancel_timeout(ctx); } @@ -867,7 +1032,7 @@ static void jit_cancel_timeout(Context *ctx) static void jit_clear_timeout_flag(Context *ctx) { - TRACE("jit_clear_timeout_flag: ctx->process_id=%d\n", ctx->process_id); + TRACE("jit_clear_timeout_flag: ctx->process_id=%" PRId32 "\n", ctx->process_id); context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags); } @@ -1729,3 +1894,806 @@ const ModuleNativeInterface module_native_interface = { }; #endif + +#ifndef AVM_NO_JIT_DWARF + +// GDB JIT interface structures and constants +typedef enum +{ + JIT_NOACTION = 0, + JIT_REGISTER_FN, + JIT_UNREGISTER_FN +} jit_actions_t; + +struct jit_code_entry +{ + struct jit_code_entry *next_entry; + struct jit_code_entry *prev_entry; + const char *symfile_addr; + uint64_t symfile_size; +}; + +struct jit_descriptor +{ + uint32_t version; + uint32_t action_flag; + struct jit_code_entry *relevant_entry; + struct jit_code_entry *first_entry; +}; + +// Global GDB JIT interface descriptor +// This must have C linkage and specific symbol names for GDB to find it +struct jit_descriptor __jit_debug_descriptor = { 1, 0, NULL, NULL }; + +// GDB sets breakpoint on this function to be notified of new JIT code +void __attribute__((noinline)) __jit_debug_register_code(void) +{ + // GDB will set a breakpoint here +} + +// DWARF parsing helpers for address patching + +// Read unsigned LEB128 (used in DWARF for variable-length integers) +static size_t read_uleb128(const uint8_t *data, size_t *offset, uint64_t *value) +{ + *value = 0; + int shift = 0; + size_t start = *offset; + + while (1) { + uint8_t byte = data[(*offset)++]; + *value |= ((uint64_t)(byte & 0x7f)) << shift; + if ((byte & 0x80) == 0) { + break; + } + shift += 7; + } + + return *offset - start; +} + +// Structure to hold parsed abbreviation entry +typedef struct { + uint64_t code; + uint64_t tag; + uint8_t has_children; + // Attributes stored as pairs of (name, form) + uint64_t *attrs; // Dynamic array of attribute name/form pairs + size_t attr_count; +} dwarf_abbrev_t; + +// Parse a single abbreviation from .debug_abbrev +static bool parse_abbrev(const uint8_t *abbrev_data, size_t abbrev_size, size_t *offset, dwarf_abbrev_t *abbrev) +{ + if (*offset >= abbrev_size) { + return false; + } + + // Read abbreviation code + read_uleb128(abbrev_data, offset, &abbrev->code); + if (abbrev->code == 0) { + return false; // End of abbreviation table + } + + // Read tag + read_uleb128(abbrev_data, offset, &abbrev->tag); + + // Read has_children flag + abbrev->has_children = abbrev_data[(*offset)++]; + + // Count attributes first + size_t temp_offset = *offset; + size_t count = 0; + while (temp_offset < abbrev_size) { + uint64_t name, form; + read_uleb128(abbrev_data, &temp_offset, &name); + read_uleb128(abbrev_data, &temp_offset, &form); + if (name == 0 && form == 0) { + break; + } + count++; + } + + // Allocate and read attributes + abbrev->attr_count = count; + if (count > 0) { + abbrev->attrs = malloc(count * 2 * sizeof(uint64_t)); + for (size_t i = 0; i < count; i++) { + read_uleb128(abbrev_data, offset, &abbrev->attrs[i * 2]); // name + read_uleb128(abbrev_data, offset, &abbrev->attrs[i * 2 + 1]); // form + } + } else { + abbrev->attrs = NULL; + } + + // Skip terminator (0, 0) + (*offset) += 2; + + return true; +} + +// Parse all abbreviations from .debug_abbrev +static dwarf_abbrev_t *parse_abbrev_table(const uint8_t *abbrev_data, size_t abbrev_size, size_t *count) +{ + // First pass: count abbreviations + size_t offset = 0; + size_t abbrev_count = 0; + + while (offset < abbrev_size) { + uint64_t code; + read_uleb128(abbrev_data, &offset, &code); + if (code == 0) { + break; + } + + // Skip tag + uint64_t tag; + read_uleb128(abbrev_data, &offset, &tag); + offset++; // has_children + + // Skip attributes + while (offset < abbrev_size) { + uint64_t name, form; + read_uleb128(abbrev_data, &offset, &name); + read_uleb128(abbrev_data, &offset, &form); + if (name == 0 && form == 0) { + break; + } + } + + abbrev_count++; + } + + if (abbrev_count == 0) { + *count = 0; + return NULL; + } + + // Second pass: parse abbreviations + dwarf_abbrev_t *abbrevs = calloc(abbrev_count, sizeof(dwarf_abbrev_t)); + offset = 0; + size_t i = 0; + + while (offset < abbrev_size && i < abbrev_count) { + if (!parse_abbrev(abbrev_data, abbrev_size, &offset, &abbrevs[i])) { + break; + } + i++; + } + + *count = i; + return abbrevs; +} + +// Free abbreviation table +static void free_abbrev_table(dwarf_abbrev_t *abbrevs, size_t count) +{ + for (size_t i = 0; i < count; i++) { + free(abbrevs[i].attrs); + } + free(abbrevs); +} + +// Find abbreviation by code +static const dwarf_abbrev_t *find_abbrev(const dwarf_abbrev_t *abbrevs, size_t count, uint64_t code) +{ + for (size_t i = 0; i < count; i++) { + if (abbrevs[i].code == code) { + return &abbrevs[i]; + } + } + return NULL; +} + +// Get size of a DWARF form value +static size_t get_form_size(uint64_t form, uint8_t addr_size, const uint8_t *data, size_t offset) +{ + switch (form) { + case 0x01: // DW_FORM_addr + return addr_size; + case 0x03: // DW_FORM_block2 + return 2 + (data[offset] | (data[offset + 1] << 8)); + case 0x04: // DW_FORM_block4 + return 4 + (data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | (data[offset + 3] << 24)); + case 0x05: // DW_FORM_data2 + return 2; + case 0x06: // DW_FORM_data4 + return 4; + case 0x07: // DW_FORM_data8 + return 8; + case 0x08: // DW_FORM_string + return strlen((const char *)&data[offset]) + 1; + case 0x09: // DW_FORM_block + case 0x18: // DW_FORM_exprloc + // Variable length - LEB128 size followed by data + { + uint64_t block_len; + size_t temp = offset; + size_t leb_size = read_uleb128(data, &temp, &block_len); + return leb_size + block_len; // LEB128 size + block data + } + case 0x0f: // DW_FORM_udata + // Just a LEB128 value + { + uint64_t val; + size_t temp = offset; + return read_uleb128(data, &temp, &val); + } + case 0x13: // DW_FORM_ref4 + return 4; + case 0x0b: // DW_FORM_data1 + return 1; + case 0x0e: // DW_FORM_strp + return 4; + case 0x10: // DW_FORM_ref_addr + return addr_size; + case 0x11: // DW_FORM_ref1 + return 1; + case 0x12: // DW_FORM_ref2 + return 2; + case 0x14: // DW_FORM_ref8 + return 8; + case 0x17: // DW_FORM_sec_offset + return 4; + case 0x19: // DW_FORM_flag_present + return 0; + default: + TRACE("Unknown DWARF form: 0x%llx\n", (unsigned long long)form); + return 0; + } +} + +// Patch addresses in .debug_info using parsed abbreviations +static void patch_debug_info_addresses(uint8_t *debug_info, size_t debug_info_size, + const dwarf_abbrev_t *abbrevs, size_t abbrev_count, + uintptr_t load_address) +{ + if (debug_info_size < 11) { + return; + } + + // Parse compile unit header + uint8_t addr_size = debug_info[10]; + TRACE("Patching .debug_info with addr_size=%d\n", addr_size); + + // Skip: length(4) + version(2) + abbrev_offset(4) + addr_size(1) = 11 bytes + size_t offset = 11; + int patch_count = 0; + + // Parse DIEs + while (offset < debug_info_size) { + uint64_t abbrev_code; + size_t code_size = read_uleb128(debug_info, &offset, &abbrev_code); + + if (abbrev_code == 0) { + // Null DIE - end of siblings + continue; + } + + const dwarf_abbrev_t *abbrev = find_abbrev(abbrevs, abbrev_count, abbrev_code); + if (!abbrev) { + TRACE("Warning: Unknown abbreviation code %llu at offset %zu\n", + (unsigned long long)abbrev_code, offset - code_size); + break; + } + + // Process attributes + for (size_t i = 0; i < abbrev->attr_count; i++) { + uint64_t attr_name = abbrev->attrs[i * 2]; + uint64_t attr_form = abbrev->attrs[i * 2 + 1]; + + // Check if this is an address attribute (DW_FORM_addr) + if (attr_form == 0x01) { // DW_FORM_addr + // This is an address - patch it + if (addr_size == 8) { + uint64_t *addr = (uint64_t *)&debug_info[offset]; + uint64_t old_val = *addr; + *addr += load_address; + TRACE("Patched .debug_info[%zu] (attr 0x%llx): 0x%llx -> 0x%llx\n", + offset, (unsigned long long)attr_name, (unsigned long long)old_val, (unsigned long long)*addr); + patch_count++; + } else if (addr_size == 4) { + uint32_t *addr = (uint32_t *)&debug_info[offset]; + uint32_t old_val = *addr; + *addr += (uint32_t)load_address; + TRACE("Patched .debug_info[%zu] (attr 0x%llx): 0x%x -> 0x%x\n", + offset, (unsigned long long)attr_name, old_val, *addr); + patch_count++; + } + } + + // Skip to next attribute + size_t form_size = get_form_size(attr_form, addr_size, debug_info, offset); + if (form_size == 0) { + TRACE("Failed to get form size for form 0x%llx at offset %zu\n", + (unsigned long long)attr_form, offset); + return; + } + offset += form_size; + + if (offset > debug_info_size) { + TRACE("Offset exceeded debug_info size\n"); + return; + } + } + } + + TRACE("Total .debug_info patches: %d\n", patch_count); +} + +// Create a minimal ELF file for debugging with proper PIE support +static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_data, size_t original_elf_size, + uintptr_t load_address, size_t *new_elf_size) +{ + TRACE("create_minimal_elf_for_debugging: original_elf_size=%zu, load_address=0x%lx\n", + original_elf_size, load_address); + + // Extract symbol table, string table, and DWARF sections from original ELF + const char *symtab_data = NULL; + size_t symtab_size = 0; + const char *strtab_data = NULL; + size_t strtab_size = 0; + const char *debug_info_data = NULL; + size_t debug_info_size = 0; + const char *debug_line_data = NULL; + size_t debug_line_size = 0; + const char *debug_abbrev_data = NULL; + size_t debug_abbrev_size = 0; + const char *debug_str_data = NULL; + size_t debug_str_size = 0; + const char *debug_aranges_data = NULL; + size_t debug_aranges_size = 0; + + // Parse original ELF to extract symbol, string, and DWARF tables + if (original_elf_size < sizeof(Elf_Ehdr)) { + fprintf(stderr, "ERROR: Original ELF too small for header\n"); + return NULL; + } + + const Elf_Ehdr *ehdr = (const Elf_Ehdr *) original_elf_data; + const Elf_Shdr *shdrs = (const Elf_Shdr *) (original_elf_data + ehdr->e_shoff); + const char *shstrtab = (const char *) (original_elf_data + shdrs[ehdr->e_shstrndx].sh_offset); + + // Find .symtab, .strtab, and .debug_* sections + for (int i = 0; i < ehdr->e_shnum; i++) { + const char *section_name = shstrtab + shdrs[i].sh_name; + + if (shdrs[i].sh_type == SHT_SYMTAB) { + symtab_data = (const char *) original_elf_data + shdrs[i].sh_offset; + symtab_size = shdrs[i].sh_size; + } else if (shdrs[i].sh_type == SHT_STRTAB && i != ehdr->e_shstrndx) { + strtab_data = (const char *) original_elf_data + shdrs[i].sh_offset; + strtab_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_info") == 0) { + debug_info_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_info_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_line") == 0) { + debug_line_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_line_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_abbrev") == 0) { + debug_abbrev_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_abbrev_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_str") == 0) { + debug_str_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_str_size = shdrs[i].sh_size; + } else if (strcmp(section_name, ".debug_aranges") == 0) { + debug_aranges_data = (const char *) original_elf_data + shdrs[i].sh_offset; + debug_aranges_size = shdrs[i].sh_size; + } + } + + if (!symtab_data || !strtab_data) { + fprintf(stderr, "ERROR: Could not find symbol or string table in original ELF\n"); + return NULL; + } + + TRACE("Found DWARF sections: .debug_info=%zu .debug_line=%zu .debug_abbrev=%zu .debug_str=%zu .debug_aranges=%zu\n", + debug_info_size, debug_line_size, debug_abbrev_size, debug_str_size, debug_aranges_size); + + // Section name strings: "\0.text\0.symtab\0.strtab\0.shstrtab\0.debug_info\0.debug_line\0.debug_abbrev\0.debug_str\0.debug_aranges\0" + const char *section_names = "\0.text\0.symtab\0.strtab\0.shstrtab\0.debug_info\0.debug_line\0.debug_abbrev\0.debug_str\0.debug_aranges\0"; + size_t shstrtab_size = 103; // strlen of section_names + + // Count how many sections we have (null + .text + .symtab + .strtab + .shstrtab + debug sections) + int section_count = 5; // Base sections + if (debug_info_data) section_count++; + if (debug_line_data) section_count++; + if (debug_abbrev_data) section_count++; + if (debug_str_data) section_count++; + if (debug_aranges_data) section_count++; + + // Find the actual .text section size from the original ELF + const Elf_Ehdr *orig_ehdr = (const Elf_Ehdr *) original_elf_data; + const Elf_Shdr *orig_shdrs = (const Elf_Shdr *) (original_elf_data + orig_ehdr->e_shoff); + + size_t code_size = 0; + + // Look for .text section in original ELF + for (int i = 0; i < orig_ehdr->e_shnum; i++) { + const Elf_Shdr *shdr = &orig_shdrs[i]; + if (shdr->sh_type == 1 && (shdr->sh_flags & 6) == 6) { // SHT_PROGBITS + SHF_ALLOC + SHF_EXECINSTR + code_size = shdr->sh_size; + break; + } + } + + if (code_size == 0) { + fprintf(stderr, "ERROR: Could not find .text section in original ELF\n"); + return NULL; + } + + // Calculate size of new minimal ELF (ELF header + 1 program header + section headers + data) + // IMPORTANT: We now include code_size so we can copy the actual JIT code into the file + size_t elf_size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr)) + + code_size + symtab_size + strtab_size + shstrtab_size + + debug_info_size + debug_line_size + debug_abbrev_size + debug_str_size + debug_aranges_size; + + uint8_t *new_elf = (uint8_t *) malloc(elf_size); + if (!new_elf) { + fprintf(stderr, "ERROR: Failed to allocate memory for new ELF\n"); + return NULL; + } + memset(new_elf, 0, elf_size); + + // Create ELF header + Elf_Ehdr *new_ehdr = (Elf_Ehdr *) new_elf; + memcpy(new_ehdr->e_ident, orig_ehdr->e_ident, 16); + // Use ET_EXEC for JIT debugging - code is loaded at fixed address + // ET_EXEC is the correct type for executables with PT_LOAD at specific addresses + new_ehdr->e_type = 2; // ET_EXEC + new_ehdr->e_machine = orig_ehdr->e_machine; + new_ehdr->e_version = orig_ehdr->e_version; + new_ehdr->e_entry = 0; + new_ehdr->e_phoff = sizeof(Elf_Ehdr); + new_ehdr->e_shoff = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr); + new_ehdr->e_flags = orig_ehdr->e_flags; + new_ehdr->e_ehsize = sizeof(Elf_Ehdr); + new_ehdr->e_phentsize = sizeof(Elf_Phdr); + new_ehdr->e_phnum = 1; + new_ehdr->e_shentsize = sizeof(Elf_Shdr); + new_ehdr->e_shnum = section_count; + new_ehdr->e_shstrndx = 4; // .shstrtab is the section name string table (always section 4) + + // Create program header (PT_LOAD segment) + Elf_Phdr *new_phdr = (Elf_Phdr *) (new_elf + sizeof(Elf_Ehdr)); + new_phdr->p_type = PT_LOAD; + new_phdr->p_flags = PF_R | PF_X; + + // PT_LOAD will start where code is in the file and map to load_address in memory + // p_offset will be set after we know where code is + new_phdr->p_offset = 0; // Will be set after we copy code + new_phdr->p_vaddr = load_address; + new_phdr->p_paddr = load_address; + new_phdr->p_filesz = 0; // Will be set after we copy data + new_phdr->p_memsz = 0; // Will be set later after we know total size + new_phdr->p_align = 1; + + // Create section headers + Elf_Shdr *new_shdrs = (Elf_Shdr *) (new_elf + sizeof(Elf_Ehdr) + sizeof(Elf_Phdr)); + size_t current_offset = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr)); + + // Copy the actual JIT code into the file right after section headers + // This allows GDB's BFD to recognize it as a valid object file + uint8_t *code_dest = new_elf + current_offset; + memcpy(code_dest, (void*)load_address, code_size); + size_t code_file_offset = current_offset; + current_offset += code_size; + + // Section 0: null section (required) + new_shdrs[0] = (Elf_Shdr){ 0 }; + + // Section 1: .text section + new_shdrs[1].sh_name = 1; // ".text\0" at offset 1 in section names + new_shdrs[1].sh_type = 1; // SHT_PROGBITS + new_shdrs[1].sh_flags = 6; // SHF_ALLOC | SHF_EXECINSTR + new_shdrs[1].sh_addr = load_address; + new_shdrs[1].sh_offset = code_file_offset; // Point to code we copied into the file + new_shdrs[1].sh_size = code_size; + new_shdrs[1].sh_addralign = 1; + + // Section 2: .symtab + new_shdrs[2].sh_name = 7; // ".symtab\0" at offset 7 in section names + new_shdrs[2].sh_type = SHT_SYMTAB; + new_shdrs[2].sh_offset = current_offset; + new_shdrs[2].sh_size = symtab_size; + new_shdrs[2].sh_link = 3; // Points to .strtab + +#if TERM_BYTES == 8 + new_shdrs[2].sh_addralign = 8; +#else + new_shdrs[2].sh_addralign = 4; +#endif + + new_shdrs[2].sh_entsize = sizeof(Elf_Sym); + current_offset += symtab_size; + + // Section 3: .strtab + new_shdrs[3].sh_name = 15; // ".strtab\0" at offset 15 in section names + new_shdrs[3].sh_type = SHT_STRTAB; + new_shdrs[3].sh_offset = current_offset; + new_shdrs[3].sh_size = strtab_size; + new_shdrs[3].sh_addralign = 1; + current_offset += strtab_size; + + // Section 4: .shstrtab (section name string table) + new_shdrs[4].sh_name = 23; // ".shstrtab\0" at offset 23 in section names + new_shdrs[4].sh_type = SHT_STRTAB; + new_shdrs[4].sh_offset = current_offset; + new_shdrs[4].sh_size = shstrtab_size; + new_shdrs[4].sh_addralign = 1; + current_offset += shstrtab_size; + + // Add DWARF sections if present + // DWARF sections don't need SHF_ALLOC - they're debug info only, not loaded at runtime + int next_section = 5; + + // Section 5: .debug_info (if present) + if (debug_info_data) { + new_shdrs[next_section].sh_name = 33; // ".debug_info\0" at offset 33 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 0; // No ALLOC - debug info only + new_shdrs[next_section].sh_addr = 0; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_info_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_info_size; + next_section++; + } + + // Section 6: .debug_line (if present) + if (debug_line_data) { + new_shdrs[next_section].sh_name = 45; // ".debug_line\0" at offset 45 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 0; + new_shdrs[next_section].sh_addr = 0; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_line_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_line_size; + next_section++; + } + + // Section 7: .debug_abbrev (if present) + if (debug_abbrev_data) { + new_shdrs[next_section].sh_name = 57; // ".debug_abbrev\0" at offset 57 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 0; + new_shdrs[next_section].sh_addr = 0; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_abbrev_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_abbrev_size; + next_section++; + } + + // Section 8: .debug_str (if present) + if (debug_str_data) { + new_shdrs[next_section].sh_name = 71; // ".debug_str\0" at offset 71 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_flags = 0; + new_shdrs[next_section].sh_addr = 0; + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_str_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_str_size; + next_section++; + } + + // PT_LOAD covers only the .text section (code) + // DWARF sections are not loadable - they're debug info only + new_phdr->p_offset = code_file_offset; + new_phdr->p_memsz = code_size; + new_phdr->p_filesz = code_size; + TRACE("PT_LOAD covers 0x%lx to 0x%lx (size=0x%lx), filesz=0x%lx\n", + (unsigned long)load_address, (unsigned long)(load_address + code_size), + (unsigned long)new_phdr->p_memsz, (unsigned long)new_phdr->p_filesz); + + // Section 9: .debug_aranges (if present) + // DISABLED: LLDB uses symbols for breakpoints, not .debug_aranges + // Keeping this corrupted actually made breakpoints work better! + if (false && debug_aranges_data) { + new_shdrs[next_section].sh_name = 82; // ".debug_aranges\0" at offset 82 in section names + new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS + new_shdrs[next_section].sh_offset = current_offset; + new_shdrs[next_section].sh_size = debug_aranges_size; + new_shdrs[next_section].sh_addralign = 1; + current_offset += debug_aranges_size; + next_section++; + } + + // Copy symbol table data + uint8_t *new_symtab = new_elf + new_shdrs[2].sh_offset; + memcpy(new_symtab, symtab_data, symtab_size); + + // Copy string table data + uint8_t *new_strtab = new_elf + new_shdrs[3].sh_offset; + memcpy(new_strtab, strtab_data, strtab_size); + + TRACE("Copied symbol table: %zu bytes, %zu symbols\n", symtab_size, symtab_size / sizeof(Elf_Sym)); + + // Debug: print first few function symbols + Elf_Sym *syms = (Elf_Sym *)new_symtab; + size_t num_syms = symtab_size / sizeof(Elf_Sym); + for (size_t i = 0; i < num_syms && i < 10; i++) { + if (ELF_ST_TYPE(syms[i].st_info) == STT_FUNC) { + const char *sym_name = (const char *)(new_strtab + syms[i].st_name); + TRACE(" Symbol[%zu]: %s @ 0x%lx (size=%zu)\n", i, sym_name, + (unsigned long)syms[i].st_value, (size_t)syms[i].st_size); + } + } + + // With PT_LOAD program header, the debugger should automatically apply the base address + + // Copy section name string table data + uint8_t *new_shstrtab = new_elf + new_shdrs[4].sh_offset; + memcpy(new_shstrtab, section_names, shstrtab_size); + + // Copy DWARF section data + next_section = 5; + + if (debug_info_data) { + uint8_t *new_debug_info = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_info, debug_info_data, debug_info_size); + + // No need to patch DWARF addresses - PT_LOAD handles relocation automatically + // since DWARF sections now have SHF_ALLOC and virtual addresses + TRACE("DWARF sections in PT_LOAD - debugger will apply base address\n"); + + next_section++; + } + + if (debug_line_data) { + uint8_t *new_debug_line = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_line, debug_line_data, debug_line_size); + next_section++; + } + + if (debug_abbrev_data) { + uint8_t *new_debug_abbrev = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_abbrev, debug_abbrev_data, debug_abbrev_size); + next_section++; + } + + if (debug_str_data) { + uint8_t *new_debug_str = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_str, debug_str_data, debug_str_size); + next_section++; + } + + // DISABLED: .debug_aranges not needed for LLDB breakpoints + if (false && debug_aranges_data) { + uint8_t *new_debug_aranges = new_elf + new_shdrs[next_section].sh_offset; + memcpy(new_debug_aranges, debug_aranges_data, debug_aranges_size); + + // Patch .debug_aranges addresses to absolute addresses + // Structure: [length:4][version:2][debug_info_offset:4][addr_size:1][seg_size:1][padding:variable] + // [address:addr_size][length:addr_size][terminator:addr_size*2] + // Header is 4+2+4+1+1 = 12 bytes, then padding to align to 2*addr_size + if (debug_aranges_size >= 12) { + uint8_t addr_size = new_debug_aranges[10]; // Address size field at offset 4+2+4 = 10 + TRACE(".debug_aranges addr_size=%d\n", addr_size); + + // Calculate padding: header is 8 bytes (after the length field), align to 2*addr_size + size_t header_size = 8; // version(2) + debug_info_offset(4) + addr_size(1) + seg_size(1) + size_t tuple_alignment = 2 * addr_size; + size_t padding_size = (tuple_alignment - (header_size % tuple_alignment)) % tuple_alignment; + size_t descriptor_offset = 4 + header_size + padding_size; // Skip length field + header + padding + + TRACE(".debug_aranges descriptor at offset %zu (header=%zu, padding=%zu)\n", + descriptor_offset, header_size, padding_size); + + if (debug_aranges_size >= descriptor_offset + addr_size * 2) { + if (addr_size == 8) { + // Patch the address range start address (64-bit) + uint64_t *range_start = (uint64_t *)(new_debug_aranges + descriptor_offset); + uint64_t old_addr = *range_start; + *range_start += load_address; + TRACE("Patched .debug_aranges: 0x%llx -> 0x%llx\n", (unsigned long long)old_addr, (unsigned long long)*range_start); + } else if (addr_size == 4) { + // Patch the address range start address (32-bit) + uint32_t *range_start = (uint32_t *)(new_debug_aranges + descriptor_offset); + uint32_t old_addr = *range_start; + *range_start += (uint32_t)load_address; + TRACE("Patched .debug_aranges: 0x%x -> 0x%x\n", old_addr, *range_start); + } + } + } + + next_section++; + } + + *new_elf_size = elf_size; + return new_elf; +} + +void jit_debug_register_code(Module *mod, const void *native_code, size_t native_size, ModuleNativeEntryPoint entry_point) +{ + UNUSED(mod); + + if (!native_code || native_size < 8) { + fprintf(stderr, "jit_debug_register_code: no native code or too small\n"); + return; + } + + // Parse the NativeCodeChunk header to find where the ELF starts + const uint8_t *data = (const uint8_t *) native_code; + uint32_t info_size = READ_32_UNALIGNED(data); + + if (info_size + 4 > native_size) { + fprintf(stderr, "jit_debug_register_code: invalid info_size\n"); + return; + } + + // Check if there's an ELF header after the NativeCodeChunk header + const uint8_t *elf_start = data + 4 + info_size; + size_t elf_size = native_size - (4 + info_size); + + if (elf_size < 16) { + fprintf(stderr, "jit_debug_register_code: no space for ELF header\n"); + return; + } + + // Check for ELF magic: 0x7f, 'E', 'L', 'F' + if (elf_start[0] != 0x7f || elf_start[1] != 'E' || elf_start[2] != 'L' || elf_start[3] != 'F') { + fprintf(stderr, "jit_debug_register_code: no ELF header found, not registering debug info\n"); + return; + } + + // Allocate memory for the JIT code entry (but not for the ELF data itself) + struct jit_code_entry *entry = malloc(sizeof(struct jit_code_entry)); + if (!entry) { + return; + } + + // Use the actual mapped entry point address as the load address + uintptr_t load_address = (uintptr_t) entry_point; + + // Create a minimal ELF file with proper symbols for debugging + size_t new_elf_size; + const uint8_t *new_elf = create_minimal_elf_for_debugging(elf_start, elf_size, load_address, &new_elf_size); + + if (!new_elf) { + fprintf(stderr, "ERROR: Failed to create minimal ELF for debugging\n"); + return; + } + + // Debug: dump ELF to file for inspection + FILE *f = fopen("/tmp/jit_debug.elf", "wb"); + if (f) { + fwrite(new_elf, 1, new_elf_size, f); + fclose(f); + TRACE("Wrote JIT ELF to /tmp/jit_debug.elf (%zu bytes)\n", new_elf_size); + } + + // Initialize the entry with the new ELF + entry->next_entry = NULL; + entry->prev_entry = NULL; + entry->symfile_addr = (const char *) new_elf; + entry->symfile_size = new_elf_size; + + // Add to GDB's linked list + if (__jit_debug_descriptor.first_entry) { + __jit_debug_descriptor.first_entry->prev_entry = entry; + entry->next_entry = __jit_debug_descriptor.first_entry; + } + __jit_debug_descriptor.first_entry = entry; + + // TODO: Store entry pointer in module for later unregistration + + // Notify GDB that new code has been registered + __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + __jit_debug_descriptor.relevant_entry = entry; + __jit_debug_register_code(); +} + +void jit_debug_unregister_code(Context *ctx, Module *mod) +{ + UNUSED(ctx); + UNUSED(mod); + + // TODO: Implement unregistration + // Need to store the jit_code_entry pointer in the module structure + // and retrieve it here to properly unregister +} + +#endif // AVM_NO_JIT_DWARF diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h index 77caa9d578..0bfbb7aad8 100644 --- a/src/libAtomVM/jit.h +++ b/src/libAtomVM/jit.h @@ -172,8 +172,11 @@ enum TrapAndLoadResult #define JIT_ARCH_X86_64 1 #define JIT_ARCH_AARCH64 2 +#define JIT_ARCH_ARMV6M 3 +#define JIT_ARCH_RISCV32 4 #define JIT_VARIANT_PIC 1 +#define JIT_VARIANT_FLOAT32 2 #ifndef AVM_NO_JIT @@ -187,6 +190,16 @@ enum TrapAndLoadResult #define JIT_JUMPTABLE_ENTRY_SIZE 4 #endif +#ifdef __arm__ +#define JIT_ARCH_TARGET JIT_ARCH_ARMV6M +#define JIT_JUMPTABLE_ENTRY_SIZE 12 +#endif + +#if defined(__riscv) && (__riscv_xlen == 32) +#define JIT_ARCH_TARGET JIT_ARCH_RISCV32 +#define JIT_JUMPTABLE_ENTRY_SIZE 8 +#endif + #ifndef JIT_ARCH_TARGET #error Unknown JIT target #endif @@ -212,6 +225,34 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream); */ enum TrapAndLoadResult jit_trap_and_load(Context *ctx, Module *mod, uint32_t label); +#ifndef AVM_NO_JIT_DWARF +/** + * @brief Register JIT-compiled code with debug info with GDB/LLDB + * + * @details This function registers native code and associated DWARF debug + * information with the debugger using the GDB JIT interface. This allows + * debuggers to show function names and source line information for JIT code. + * + * @param mod The module containing the JIT code + * @param native_code Pointer to the native machine code + * @param native_size Size of the native code in bytes + * @param entry_point The actual mapped entry point address + */ +void jit_debug_register_code(Module *mod, const void *native_code, size_t native_size, ModuleNativeEntryPoint entry_point); + +/** + * @brief Unregister JIT-compiled code from debugger + * + * @details This function unregisters previously registered JIT code from + * the debugger. Should be called when a module is unloaded. + * + * @param ctx The context + * @param mod The module being unloaded + */ +void jit_debug_unregister_code(Context *ctx, Module *mod); + +#endif + #ifdef __cplusplus } #endif diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c index 12961f121e..1b5528b522 100644 --- a/src/libAtomVM/module.c +++ b/src/libAtomVM/module.c @@ -38,6 +38,9 @@ #include #include +// #define ENABLE_TRACE +#include "trace.h" + #ifdef WITH_ZLIB #include #endif @@ -336,10 +339,23 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary fprintf(stderr, "Unknown native code chunk version (%d)\n", ENDIAN_SWAP_16(native_code->version)); } else { for (int arch_index = 0; arch_index < ENDIAN_SWAP_16(native_code->architectures_count); arch_index++) { - if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == JIT_VARIANT_PIC) { + uint16_t runtime_variant; +#ifdef AVM_USE_SINGLE_PRECISION + runtime_variant = JIT_VARIANT_FLOAT32 | JIT_VARIANT_PIC; +#else + runtime_variant = JIT_VARIANT_PIC; +#endif + if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == runtime_variant) { size_t offset = ENDIAN_SWAP_32(native_code->info_size) + ENDIAN_SWAP_32(native_code->architectures[arch_index].offset) + sizeof(native_code->info_size); ModuleNativeEntryPoint module_entry_point = sys_map_native_code((const uint8_t *) &native_code->info_size, ENDIAN_SWAP_32(native_code->size), offset); module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), module_entry_point); + +#ifndef AVM_NO_JIT_DWARF + // Register debug info with debugger (will check for embedded ELF) + const void *chunk_start = (const uint8_t *) &native_code->info_size; + size_t chunk_size = ENDIAN_SWAP_32(native_code->size); + jit_debug_register_code(mod, chunk_start, chunk_size, module_entry_point); +#endif break; } } @@ -457,6 +473,11 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary COLD_FUNC void module_destroy(Module *module) { +#ifndef AVM_NO_JIT_DWARF + // Unregister DWARF debug info from debugger if it was registered + jit_debug_unregister_code(NULL, module); +#endif + free(module->labels); free(module->imported_funcs); free(module->literals_table); diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c index 2fe0b12948..17fdce2a20 100644 --- a/src/libAtomVM/nifs.c +++ b/src/libAtomVM/nifs.c @@ -209,6 +209,7 @@ static term nif_erlang_module_loaded(Context *ctx, int argc, term argv[]); static term nif_erlang_nif_error(Context *ctx, int argc, term argv[]); #ifndef AVM_NO_JIT static term nif_jit_backend_module(Context *ctx, int argc, term argv[]); +static term nif_jit_variant(Context *ctx, int argc, term argv[]); #endif static term nif_lists_reverse(Context *ctx, int argc, term argv[]); static term nif_lists_keyfind(Context *ctx, int argc, term argv[]); @@ -794,6 +795,11 @@ static const struct Nif jit_backend_module_nif = { .base.type = NIFFunctionType, .nif_ptr = nif_jit_backend_module }; + +static const struct Nif jit_variant_nif = { + .base.type = NIFFunctionType, + .nif_ptr = nif_jit_variant +}; #endif static const struct Nif lists_reverse_nif = { @@ -5680,10 +5686,27 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[]) return JIT_X86_64_ATOM; #elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64 return JIT_AARCH64_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + return JIT_ARMV6M_ATOM; +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + return JIT_RISCV32_ATOM; #else #error Unknown JIT target #endif } + +static term nif_jit_variant(Context *ctx, int argc, term argv[]) +{ + UNUSED(ctx); + UNUSED(argc); + UNUSED(argv); + +#ifdef AVM_USE_SINGLE_PRECISION + return term_from_int(JIT_VARIANT_FLOAT32 | JIT_VARIANT_PIC); +#else + return term_from_int(JIT_VARIANT_PIC); +#endif +} #endif static term nif_lists_reverse(Context *ctx, int argc, term argv[]) diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf index a647c1de04..a4a2591fa0 100644 --- a/src/libAtomVM/nifs.gperf +++ b/src/libAtomVM/nifs.gperf @@ -193,6 +193,7 @@ lists:keyfind/3, &lists_keyfind_nif lists:keymember/3, &lists_keymember_nif lists:member/2, &lists_member_nif jit:backend_module/0, IF_HAVE_JIT(&jit_backend_module_nif) +jit:variant/0, IF_HAVE_JIT(&jit_variant_nif) lists:reverse/1, &lists_reverse_nif lists:reverse/2, &lists_reverse_nif maps:from_keys/2, &maps_from_keys_nif diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h index d8fc4106b0..e9d49698a0 100644 --- a/src/libAtomVM/opcodesswitch.h +++ b/src/libAtomVM/opcodesswitch.h @@ -7444,7 +7444,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb) } terminate_context: - TRACE("-- Code execution finished for %i--\n", ctx->process_id); + TRACE("-- Code execution finished for %i--\n", (int) ctx->process_id); GlobalContext *global = ctx->global; if (ctx->leader) { scheduler_stop_all(global); diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt index 9dec6ec5f3..4ddc362924 100644 --- a/src/platforms/esp32/CMakeLists.txt +++ b/src/platforms/esp32/CMakeLists.txt @@ -51,8 +51,21 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# Configuration comes from idf.py menuconfig (KConfig), not CMake options +if(CONFIG_JIT_ENABLED) + if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") + else() + message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)") + set(AVM_DISABLE_JIT ON) + endif() +else() + set(AVM_DISABLE_JIT ON) + message(STATUS "JIT compilation disabled") +endif() project(atomvm-esp32) diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt index ebcedd3b57..8156bb2ac8 100644 --- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt +++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt @@ -25,6 +25,7 @@ set(AVM_SYS_COMPONENT_SRCS "sys.c" "platform_nifs.c" "platform_defaultatoms.c" + "jit_stream_flash.c" "../../../../libAtomVM/inet.c" "../../../../libAtomVM/otp_crypto.c" "../../../../libAtomVM/otp_net.c" diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c index 8318ae759a..ec229d70b2 100644 --- a/src/platforms/esp32/components/avm_sys/sys.c +++ b/src/platforms/esp32/components/avm_sys/sys.c @@ -807,3 +807,28 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) UNUSED(global); #endif } + +#ifndef AVM_NO_JIT +#include + +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + uintptr_t addr = (uintptr_t) (native_code + offset); + +#if defined(CONFIG_IDF_TARGET_ARCH_RISCV) + // On RISC-V ESP32 targets, native code in flash needs to be accessed + // through the instruction cache (IROM) not data cache (DROM) +#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2) + // ESP32-C3 and C2 have separate DROM and IROM regions + if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) { + // Convert from data cache address to instruction cache address + addr = addr - SOC_DROM_LOW + SOC_IROM_LOW; + } +#endif + // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed +#endif + + return (ModuleNativeEntryPoint) addr; +} +#endif diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt index 97580dbfea..00595afeef 100644 --- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt +++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt @@ -31,7 +31,12 @@ if (HAVE_PLATFORM_ATOMIC_H) target_include_directories(libAtomVM PUBLIC ../avm_sys/) endif() -target_link_libraries(${COMPONENT_LIB} - INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +if (AVM_DISABLE_JIT) + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init") +else() + target_link_libraries(${COMPONENT_LIB} + INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code") +endif() target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11) diff --git a/src/platforms/esp32/main/Kconfig.projbuild b/src/platforms/esp32/main/Kconfig.projbuild index 88bf92aa1a..1eba944ed7 100755 --- a/src/platforms/esp32/main/Kconfig.projbuild +++ b/src/platforms/esp32/main/Kconfig.projbuild @@ -39,5 +39,11 @@ menu "AtomVM configuration" depends on USE_USB_SERIAL help Enable TinyUSB CDC functionality if USE_USB_SERIAL is enabled. + + config JIT_ENABLED + bool "Enable just in time compilation" + default n + help + Enable Just in time compilation, or just execution of precompiled native code endmenu diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt index 2d97d91345..cee138d34c 100644 --- a/src/platforms/esp32/test/CMakeLists.txt +++ b/src/platforms/esp32/test/CMakeLists.txt @@ -57,8 +57,16 @@ endif() # On Esp32, select is run in a loop in a dedicated task set(AVM_SELECT_IN_TASK ON) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON) +# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4) +# This must be set before project() so libAtomVM is configured correctly +if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4") + set(AVM_DISABLE_JIT OFF) + set(AVM_JIT_TARGET_ARCH riscv32) + message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)") +else() + message(STATUS "JIT is not supported on ${IDF_TARGET} (Xtensa architecture) - using interpreter") + set(AVM_DISABLE_JIT ON) +endif() project(atomvm-esp32-test) diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt index e2d67269e8..dc4789f374 100644 --- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt +++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt @@ -20,11 +20,31 @@ add_library(esp32_test_modules) +include(ExternalProject) +if(NOT AVM_DISABLE_JIT) +set(host_atomvm_jit_target "--target=jit") +else() +set(host_atomvm_jit_target "") +endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." + BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM ) +macro(jit_precompile module_name) + if(NOT AVM_DISABLE_JIT) + add_custom_command( + OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam + COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH} + && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam + DEPENDS ${module_name}.beam HostAtomVM + COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}" + VERBATIM + ) + endif() +endmacro() + function(compile_erlang module_name) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" @@ -33,6 +53,7 @@ function(compile_erlang module_name) COMMENT "Compiling ${module_name}.erl" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + jit_precompile(${module_name}) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() @@ -55,46 +76,44 @@ compile_erlang(test_time_and_processes) compile_erlang(test_twdt) compile_erlang(test_tz) +set(erlang_test_beams + test_esp_partition.beam + test_file.beam + test_wifi_example.beam + test_list_to_atom.beam + test_list_to_binary.beam + test_md5.beam + test_crypto.beam + test_monotonic_time.beam + test_mount.beam + test_net.beam + test_rtc_slow.beam + test_select.beam + test_socket.beam + test_ssl.beam + test_time_and_processes.beam + test_twdt.beam + test_tz.beam +) + +if(NOT AVM_DISABLE_JIT) + set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) + list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) + set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) + set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) +else() + set(erlang_test_beams_to_package ${erlang_test_beams}) + set(erlang_test_beams_depends ${erlang_test_beams}) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i esp32_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm - test_esp_partition.beam - test_file.beam - test_wifi_example.beam - test_list_to_atom.beam - test_list_to_binary.beam - test_md5.beam - test_crypto.beam - test_monotonic_time.beam - test_mount.beam - test_net.beam - test_rtc_slow.beam - test_select.beam - test_socket.beam - test_ssl.beam - test_time_and_processes.beam - test_twdt.beam - test_tz.beam + ${erlang_test_beams_to_package} DEPENDS HostAtomVM - "${CMAKE_CURRENT_BINARY_DIR}/test_esp_partition.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_wifi_example.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_file.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_atom.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_binary.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_md5.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_monotonic_time.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_mount.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_net.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_rtc_slow.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_select.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_socket.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_ssl.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_time_and_processes.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_twdt.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_tz.beam" + ${erlang_test_beams_depends} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM ) diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c index 376f7384d0..f246a9791d 100644 --- a/src/platforms/generic_unix/lib/jit_stream_mmap.c +++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c @@ -244,7 +244,13 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) #elif defined(__GNUC__) __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size); #endif +#if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + // Set thumb bit for armv6m + ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base + 1; +#else ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base; +#endif + js_obj->stream_base = NULL; return result; } diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c index 086b39de79..099164dd89 100644 --- a/src/platforms/generic_unix/lib/sys.c +++ b/src/platforms/generic_unix/lib/sys.c @@ -842,7 +842,12 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t si } __builtin___clear_cache((char *) native_code_mmap, (char *) (native_code_mmap + size)); #endif +#if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + // Set thumb bit for armv6m + return (ModuleNativeEntryPoint) (native_code_mmap + offset + 1); +#else return (ModuleNativeEntryPoint) (native_code_mmap + offset); +#endif #else UNUSED(size); return (ModuleNativeEntryPoint) (native_code + offset); diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt index 9b9eb582df..86e5e6683a 100644 --- a/src/platforms/rp2/CMakeLists.txt +++ b/src/platforms/rp2/CMakeLists.txt @@ -54,6 +54,8 @@ set(HAVE_MKFIFO "" CACHE INTERNAL "Have symbol mkfifo" FORCE) set(HAVE_UNLINK "" CACHE INTERNAL "Have symbol unlink" FORCE) # Likewise with EXECVE set(HAVE_EXECVE "" CACHE INTERNAL "Have symbol execve" FORCE) +# getcwd is defined in newlib header but not implemented +set(HAVE_GETCWD "" CACHE INTERNAL "Have symbol getcwd" FORCE) # Options that make sense for this platform option(AVM_DISABLE_SMP "Disable SMP support." OFF) @@ -63,9 +65,25 @@ option(AVM_WAIT_FOR_USB_CONNECT "Wait for USB connection before starting" OFF) option(AVM_WAIT_BOOTSEL_ON_EXIT "Wait in BOOTSEL rather than shutdown on exit" ON) option(AVM_REBOOT_ON_NOT_OK "Reboot Pico if result is not ok" OFF) option(AVM_CREATE_STACKTRACES "Create stacktraces" ON) - -# JIT is not available yet on rp2 -set(AVM_DISABLE_JIT ON FORCE) +option(AVM_DISABLE_JIT "Disable just in time compilation." ON) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$") + # We only have armv6m for now, which all cortex-m should support + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "armv6m") + endif() +elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^hazard3$") + # Pico2 RISC-V processor (Hazard3) + if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "riscv32") + endif() +else() + # Other processors not supported yet + if (NOT AVM_DISABLE_JIT) + message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}") + set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE) + set(AVM_ENABLE_PRECOMPILED OFF CACHE BOOL "Enable execution of precompiled code, even if JIT is disabled." FORCE) + endif() +endif() set(AVM_DISABLE_TASK_DRIVER ON FORCE) diff --git a/src/platforms/rp2/src/lib/CMakeLists.txt b/src/platforms/rp2/src/lib/CMakeLists.txt index 957e346539..3cc69b56a3 100644 --- a/src/platforms/rp2/src/lib/CMakeLists.txt +++ b/src/platforms/rp2/src/lib/CMakeLists.txt @@ -31,6 +31,7 @@ set(HEADER_FILES set(SOURCE_FILES gpiodriver.c + jit_stream_flash.c networkdriver.c otp_crypto_platform.c platform_defaultatoms.c diff --git a/src/platforms/rp2/src/lib/jit_stream_flash.c b/src/platforms/rp2/src/lib/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/rp2/src/lib/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/rp2/src/lib/sys.c b/src/platforms/rp2/src/lib/sys.c index aeffd72870..ac4c9c717c 100644 --- a/src/platforms/rp2/src/lib/sys.c +++ b/src/platforms/rp2/src/lib/sys.c @@ -474,3 +474,12 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global) struct RP2PlatformData *platform = global->platform_data; SMP_MUTEX_UNLOCK(platform->random_mutex); } + +#ifndef AVM_NO_JIT +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + // We need to set the Thumb bit + return (ModuleNativeEntryPoint) ((uintptr_t) (native_code + offset) | 1); +} +#endif diff --git a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt index cbdf581eef..b203d168b2 100644 --- a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt +++ b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt @@ -19,12 +19,30 @@ # include(ExternalProject) +if(NOT AVM_DISABLE_JIT) +set(host_atomvm_jit_target "--target=jit") +else() +set(host_atomvm_jit_target "") +endif() ExternalProject_Add(HostAtomVM SOURCE_DIR ../../../../../../ INSTALL_COMMAND cmake -E echo "Skipping install step." - BUILD_COMMAND cmake --build . --target=atomvmlib --target=PackBEAM --target=UF2Tool + BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM --target=UF2Tool ) +macro(jit_precompile module_name) + if(NOT AVM_DISABLE_JIT) + add_custom_command( + OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam + COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH} + && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam + DEPENDS ${module_name}.beam HostAtomVM + COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}" + VERBATIM + ) + endif() +endmacro() + function(compile_erlang module_name module_src_dir) add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam" @@ -33,6 +51,7 @@ function(compile_erlang module_name module_src_dir) COMMENT "Compiling ${module_name}.erl" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) + jit_precompile(${module_name}) set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam") endfunction() @@ -41,18 +60,26 @@ compile_erlang(test_clocks "") compile_erlang(test_smp "") compile_erlang(test_crypto ../../../esp32/test/main/test_erl_sources/) +set(erlang_test_beams + test_clocks.beam + test_smp.beam + test_crypto.beam +) + +if(NOT AVM_DISABLE_JIT) + set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams}) + list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/) + list(APPEND erlang_test_beams ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}}) +endif() + add_custom_command( OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rp2_test_modules.avm" COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i rp2_test_modules.avm HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm - test_clocks.beam - test_smp.beam - test_crypto.beam + ${erlang_test_beams} DEPENDS HostAtomVM - "${CMAKE_CURRENT_BINARY_DIR}/test_clocks.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_smp.beam" - "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam" + ${erlang_test_beams} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} VERBATIM ) diff --git a/src/platforms/stm32/CMakeLists.txt b/src/platforms/stm32/CMakeLists.txt index 569b0a5a17..51489ca5bf 100644 --- a/src/platforms/stm32/CMakeLists.txt +++ b/src/platforms/stm32/CMakeLists.txt @@ -36,9 +36,6 @@ option(AVM_CONFIG_REBOOT_ON_NOT_OK "Reboot when application exits with non 'ok' option(AVM_DISABLE_GPIO_NIFS "Disable GPIO nifs (input and output)" OFF) option(AVM_DISABLE_GPIO_PORT_DRIVER "Disable GPIO 'port' driver (input, output, and interrupts)" OFF) -# JIT is not available yet on esp32 -set(AVM_DISABLE_JIT ON FORCE) - set(AVM_DISABLE_SMP ON FORCE) set(AVM_DISABLE_TASK_DRIVER ON FORCE) @@ -85,6 +82,11 @@ if (NOT CMAKE_TOOLCHAIN_FILE) endif () mark_as_advanced(CMAKE_TOOLCHAIN_FILE) +option(AVM_DISABLE_JIT "Disable just in time compilation." ON) +if (NOT AVM_DISABLE_JIT) + set(AVM_JIT_TARGET_ARCH "armv6m") +endif() + if ((NOT ${CMAKE_C_COMPILER_ID} STREQUAL "GNU") OR (NOT ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") OR (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 7.2.1)) diff --git a/src/platforms/stm32/src/lib/CMakeLists.txt b/src/platforms/stm32/src/lib/CMakeLists.txt index f1846c070d..536d21cc88 100644 --- a/src/platforms/stm32/src/lib/CMakeLists.txt +++ b/src/platforms/stm32/src/lib/CMakeLists.txt @@ -33,6 +33,7 @@ set(HEADER_FILES set(SOURCE_FILES gpio_driver.c + jit_stream_flash.c platform_nifs.c sys.c ../../../../libAtomVM/portnifloader.c diff --git a/src/platforms/stm32/src/lib/jit_stream_flash.c b/src/platforms/stm32/src/lib/jit_stream_flash.c new file mode 100644 index 0000000000..77dfcca908 --- /dev/null +++ b/src/platforms/stm32/src/lib/jit_stream_flash.c @@ -0,0 +1,34 @@ +/* + * This file is part of AtomVM. + * + * Copyright 2025 by Paul Guyot + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later + */ + +#ifndef AVM_NO_JIT + +#include "context.h" +#include "jit.h" +#include "term.h" + +ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream) +{ + UNUSED(ctx); + UNUSED(jit_stream); + return NULL; +} + +#endif diff --git a/src/platforms/stm32/src/lib/sys.c b/src/platforms/stm32/src/lib/sys.c index c65a39cab3..262a2be8e1 100644 --- a/src/platforms/stm32/src/lib/sys.c +++ b/src/platforms/stm32/src/lib/sys.c @@ -296,3 +296,12 @@ void sys_init_icache() __dsb; __isb; } + +#ifndef AVM_NO_JIT +ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset) +{ + UNUSED(size); + // We need to set the Thumb bit + return (ModuleNativeEntryPoint) ((uintptr_t) (native_code + offset) | 1); +} +#endif diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt index 267a4b3d29..6d6285886f 100644 --- a/tests/erlang_tests/CMakeLists.txt +++ b/tests/erlang_tests/CMakeLists.txt @@ -27,6 +27,7 @@ macro(jit_precompile module_name) ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam + ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${AVM_JIT_TARGET_ARCH}.beam ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${AVM_JIT_TARGET_ARCH}_asm.beam ) diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt index 70f46ccc09..6aa216b5dc 100644 --- a/tests/libs/jit/CMakeLists.txt +++ b/tests/libs/jit/CMakeLists.txt @@ -26,11 +26,20 @@ set(ERLANG_MODULES tests jit_tests jit_tests_common + jit_dwarf_tests jit_aarch64_tests jit_aarch64_asm_tests + jit_armv6m_tests + jit_armv6m_asm_tests + jit_riscv32_tests + jit_riscv32_asm_tests jit_x86_64_tests jit_x86_64_asm_tests ) -pack_archive(test_jit_lib ERLC_FLAGS -DTEST MODULES ${ERLANG_MODULES}) +if (NOT AVM_DISABLE_JIT_DWARF) + pack_archive(test_jit_lib ERLC_FLAGS -DTEST -DJIT_DWARF MODULES ${ERLANG_MODULES}) +else() + pack_archive(test_jit_lib ERLC_FLAGS -DTEST MODULES ${ERLANG_MODULES}) +endif() pack_eunit(test_jit estdlib eavmlib etest jit) diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl index cf053da995..11c7fe392f 100644 --- a/tests/libs/jit/jit_aarch64_asm_tests.erl +++ b/tests/libs/jit/jit_aarch64_asm_tests.erl @@ -20,9 +20,7 @@ -module(jit_aarch64_asm_tests). --ifdef(TEST). -include_lib("eunit/include/eunit.hrl"). --endif. -export([ list_to_integer/1, diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl index 18bdcf88cb..247728dd29 100644 --- a/tests/libs/jit/jit_aarch64_tests.erl +++ b/tests/libs/jit/jit_aarch64_tests.erl @@ -88,6 +88,51 @@ call_primitive_2_args_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: f9401447 ldr x7, [x2, #40]\n" + " 4: d2800202 mov x2, #0x10 // #16\n" + " 8: d2800403 mov x3, #0x20 // #32\n" + " c: d2800044 mov x4, #0x2 // #2\n" + " 10: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: 927ef4e7 and x7, x7, #0xfffffffffffffffc\n" + " 8: f9401c08 ldr x8, [x0, #56]\n" + " c: f940b850 ldr x16, [x2, #368]\n" + " 10: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 14: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 18: aa0703e2 mov x2, x7\n" + " 1c: d2800803 mov x3, #0x40 // #64\n" + " 20: d2800104 mov x4, #0x8 // #8\n" + " 24: aa0803e5 mov x5, x8\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_primitive_extended_regs_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), @@ -146,6 +191,44 @@ call_primitive_extended_regs_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, 1), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, 2), + {State3, Reg3} = ?BACKEND:move_to_native_register(State2, 3), + {State4, Reg4} = ?BACKEND:move_to_native_register(State3, 4), + {State5, Reg5} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + Reg2, Reg1, {free, Reg4}, Reg3, {free, Reg5} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, Reg2, Reg1, Reg3]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: d2800027 mov x7, #0x1 // #1\n" + " 4: d2800048 mov x8, #0x2 // #2\n" + " 8: d2800069 mov x9, #0x3 // #3\n" + " c: d280008a mov x10, #0x4 // #4\n" + " 10: d28000ab mov x11, #0x5 // #5\n" + " 14: f940e450 ldr x16, [x2, #456]\n" + " 18: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 1c: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 20: a9bf23e9 stp x9, x8, [sp, #-16]!\n" + " 24: f81f0fe7 str x7, [sp, #-16]!\n" + " 28: aa0803e0 mov x0, x8\n" + " 2c: aa0703e1 mov x1, x7\n" + " 30: aa0a03e2 mov x2, x10\n" + " 34: aa0903e3 mov x3, x9\n" + " 38: aa0b03e4 mov x4, x11\n" + " 3c: d63f0200 blr x16\n" + " 40: aa0003ea mov x10, x0\n" + " 44: f84107e7 ldr x7, [sp], #16\n" + " 48: a8c123e9 ldp x9, x8, [sp], #16\n" + " 4c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 50: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_only_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -168,6 +251,23 @@ call_ext_only_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: f9404c48 ldr x8, [x2, #152]\n" + " 8: d2800102 mov x2, #0x8 // #8\n" + " c: d2805963 mov x3, #0x2cb // #715\n" + " 10: aa0703e4 mov x4, x7\n" + " 14: d61f0100 br x8" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_last_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -760,17 +860,34 @@ if_else_block_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -shift_right_test() -> - State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:shift_right(State1, Reg, 3), - Stream = ?BACKEND:stream(State2), - Dump = - << - " 0: f9401807 ldr x7, [x0, #48]\n" - " 4: d343fce7 lsr x7, x7, #3" - >>, - ?assertEqual(dump_to_bin(Dump), Stream). +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d343fce7 lsr x7, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: f9401807 ldr x7, [x0, #48]\n" + " 4: d343fce8 lsr x8, x7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. shift_left_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -1021,6 +1138,179 @@ is_boolean_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). +%% Test OP_WAIT_TIMEOUT pattern +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = << + " 0: 100000e7 adr x7, 0x1c\n" + " 4: f9000427 str x7, [x1, #8]\n" + " 8: d2827107 mov x7, #0x1388 // #5000\n" + " c: f9407848 ldr x8, [x2, #240]\n" + " 10: aa0703e2 mov x2, x7\n" + " 14: d2800543 mov x3, #0x2a // #42\n" + " 18: d61f0100 br x8\n" + " 1c: f9405450 ldr x16, [x2, #168]\n" + " 20: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 24: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 28: d63f0200 blr x16\n" + " 2c: aa0003e7 mov x7, x0\n" + " 30: a8c10be1 ldp x1, x2, [sp], #16\n" + " 34: a8c103fe ldp x30, x0, [sp], #16\n" + " 38: eb0000ff cmp x7, x0\n" + " 3c: 54000060 b.eq 0x48 // b.none\n" + " 40: aa0703e0 mov x0, x7\n" + " 44: d65f03c0 ret\n" + " 48: f9408450 ldr x16, [x2, #264]\n" + " 4c: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 50: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 54: d2800041 mov x1, #0x2 // #2\n" + " 58: d63f0200 blr x16\n" + " 5c: aa0003e7 mov x7, x0\n" + " 60: a8c10be1 ldp x1, x2, [sp], #16\n" + " 64: a8c103fe ldp x30, x0, [sp], #16\n" + " 68: b5000087 cbnz x7, 0x78\n" + " 6c: f9407c47 ldr x7, [x2, #248]\n" + " 70: d2800542 mov x2, #0x2a // #42\n" + " 74: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 14000000 b 0x0\n" + " 4: 14000000 b 0x4\n" + " 8: 14000000 b 0x8\n" + " c: 14000000 b 0xc\n" + " 10: 14000000 b 0x10\n" + " 14: 14000000 b 0x14\n" + " 18: 10000007 adr x7, 0x18\n" + " 1c: f9000427 str x7, [x1, #8]\n" + " 20: f9407447 ldr x7, [x2, #232]\n" + " 24: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated adr + ret + labels table + lines table + % adr = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Total minimum: 36 bytes + ?assert(byte_size(Stream) >= 36), + + % Expected: adr x0, #8 + ret + labels table + lines table + % The data tables start at offset 0x8, so we load PC + 8 into x0 + Dump = << + " 0: 10000040 adr x0, 0x8\n" + " 4: d65f03c0 ret\n" + " 8: 01000200 .word 0x01000200\n" + " c: 10000000 adr x0, 0xc\n" + " 10: 00000200 .word 0x00000200\n" + " 14: 02002000 .word 0x02002000\n" + " 18: 00000a00 .word 0x00000a00\n" + " 1c: 14001000 .word 0x14001000\n" + " 20: 20000000 .word 0x20000000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: f9402050 ldr x16, [x2, #64]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e0 mov x0, x1\n" + " 10: d2800541 mov x1, #0x2a // #42\n" + " 14: d63f0200 blr x16\n" + " 18: aa0003e7 mov x7, x0\n" + " 1c: a8c10be1 ldp x1, x2, [sp], #16\n" + " 20: a8c103fe ldp x30, x0, [sp], #16\n" + " 24: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 28: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " 2c: d2800001 mov x1, #0x0 // #0\n" + " 30: d2800062 mov x2, #0x3 // #3\n" + " 34: f9401403 ldr x3, [x0, #40]\n" + " 38: f9400063 ldr x3, [x3]\n" + " 3c: f9401804 ldr x4, [x0, #48]\n" + " 40: d63f00e0 blr x7\n" + " 44: aa0003e7 mov x7, x0\n" + " 48: a8c10be1 ldp x1, x2, [sp], #16\n" + " 4c: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in r1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, r1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: f940b050 ldr x16, [x2, #352]\n" + " 4: a9bf03fe stp x30, x0, [sp, #-16]!\n" + " 8: a9bf0be1 stp x1, x2, [sp, #-16]!\n" + " c: aa0103e2 mov x2, x1\n" + " 10: d2800083 mov x3, #0x4 // #4\n" + " 14: d2800024 mov x4, #0x1 // #1\n" + " 18: d63f0200 blr x16\n" + " 1c: aa0003e7 mov x7, x0\n" + " 20: a8c10be1 ldp x1, x2, [sp], #16\n" + " 24: a8c103fe ldp x30, x0, [sp], #16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + call_ext_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), @@ -1645,6 +1935,66 @@ move_to_native_register_test_() -> ] end}. +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, r2, 2, << + " 0: 91000842 add x2, x2, #0x2" + >>) + end), + ?_test(begin + add_test0(State0, r2, 256, << + " 0: 91040042 add x2, x2, #0x100" + >>) + end), + ?_test(begin + add_test0(State0, r2, r3, << + " 0: 8b030042 add x2, x2, x3" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, r2, 2, << + " 0: d1000842 sub x2, x2, #0x2" + >>) + end), + ?_test(begin + sub_test0(State0, r2, 256, << + " 0: d1040042 sub x2, x2, #0x100" + >>) + end), + ?_test(begin + sub_test0(State0, r2, r3, << + " 0: cb030042 sub x2, x2, x3" + >>) + end) + ] + end}. + mul_test0(State0, Reg, Imm, Dump) -> State1 = ?BACKEND:mul(State0, Reg, Imm), Stream = ?BACKEND:stream(State1), @@ -1719,6 +2069,21 @@ mul_test_() -> ] end}. +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, r0}), + Stream = ?BACKEND:stream(State1), + % Expected: adr x7, NetOffset; add x7, x7, x0; br x7 + % With default offset 0, NetOffset = 0 - 0 = 0, temp register is r7 + Dump = + << + " 0: 10000007 adr x7, 0x0\n" + " 4: 8b0000e7 add x7, x7, x0\n" + " 8: d61f00e0 br x7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl new file mode 100644 index 0000000000..eefe6781ef --- /dev/null +++ b/tests/libs/jit/jit_armv6m_asm_tests.erl @@ -0,0 +1,343 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_armv6m_asm_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(arm, Bin, Str), Value) +). + +adds_test_() -> + [ + ?_assertAsmEqual(<<16#3038:16/little>>, "adds r0, #56", jit_armv6m_asm:adds(r0, 56)), + ?_assertAsmEqual( + <<16#3038:16/little>>, "adds r0, r0, #56", jit_armv6m_asm:adds(r0, r0, 56) + ), + ?_assertAsmEqual(<<16#3000:16/little>>, "adds r0, #0", jit_armv6m_asm:adds(r0, 0)), + ?_assertAsmEqual(<<16#3101:16/little>>, "adds r1, #1", jit_armv6m_asm:adds(r1, 1)), + ?_assertAsmEqual(<<16#1C42:16/little>>, "adds r2, r0, #1", jit_armv6m_asm:adds(r2, r0, 1)), + ?_assertAsmEqual(<<16#18c9:16/little>>, "adds r1, r1, r3", jit_armv6m_asm:adds(r1, r1, r3)), + ?_assertAsmEqual(<<16#1850:16/little>>, "adds r0, r2, r1", jit_armv6m_asm:adds(r0, r2, r1)) + ]. + +add_test_() -> + [ + %% ARMv6-M Thumb ADD instructions (register, high registers supported) + %% ADD Rd, Rm - adds register value to register (supports PC) + ?_assertAsmEqual(<<16#449f:16/little>>, "add pc, r3", jit_armv6m_asm:add(pc, r3)), + ?_assertAsmEqual(<<16#4440:16/little>>, "add r0, r8", jit_armv6m_asm:add(r0, r8)), + ?_assertAsmEqual(<<16#4488:16/little>>, "add r8, r1", jit_armv6m_asm:add(r8, r1)), + ?_assertAsmEqual(<<16#44c9:16/little>>, "add r9, r9", jit_armv6m_asm:add(r9, r9)), + ?_assertAsmEqual(<<16#4419:16/little>>, "add r1, r3", jit_armv6m_asm:add(r1, r3)) + ]. + +subs_test_() -> + [ + ?_assertAsmEqual(<<16#3f38:16/little>>, "subs r7, #56", jit_armv6m_asm:subs(r7, 56)), + ?_assertAsmEqual( + <<16#3f38:16/little>>, "subs r7, r7, #56", jit_armv6m_asm:subs(r7, r7, 56) + ), + ?_assertAsmEqual(<<16#3800:16/little>>, "subs r0, #0", jit_armv6m_asm:subs(r0, 0)), + ?_assertAsmEqual(<<16#1e42:16/little>>, "subs r2, r0, #1", jit_armv6m_asm:subs(r2, r0, 1)), + ?_assertAsmEqual(<<16#1ad1:16/little>>, "subs r1, r2, r3", jit_armv6m_asm:subs(r1, r2, r3)) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual(<<16#B082:16/little>>, "sub sp, #8", jit_armv6m_asm:sub(sp, 8)), + ?_assertAsmEqual(<<16#B082:16/little>>, "sub sp, sp, #8", jit_armv6m_asm:sub(sp, sp, 8)), + ?_assertAsmEqual(<<16#B080:16/little>>, "sub sp, #0", jit_armv6m_asm:sub(sp, 0)), + ?_assertAsmEqual(<<16#B084:16/little>>, "sub sp, #16", jit_armv6m_asm:sub(sp, 16)), + ?_assertAsmEqual(<<16#B0FF:16/little>>, "sub sp, #508", jit_armv6m_asm:sub(sp, 508)) + ]. + +muls_test_() -> + [ + ?_assertAsmEqual(<<16#4359:16/little>>, "muls r1, r3", jit_armv6m_asm:muls(r1, r3)), + ?_assertAsmEqual(<<16#4348:16/little>>, "muls r0, r1", jit_armv6m_asm:muls(r0, r1)) + ]. + +b_test_() -> + [ + %% Thumb B (unconditional) encoding tests - ARMv6-M 16-bit only + ?_assertAsmEqual(<<16#E7FE:16/little>>, "b .+0", jit_armv6m_asm:b(0)), + ?_assertAsmEqual(<<16#E006:16/little>>, "b .+16", jit_armv6m_asm:b(16)), + ?_assertAsmEqual(<<16#E7DE:16/little>>, "b .-64", jit_armv6m_asm:b(-64)), + ?_assertAsmEqual(<<16#E000:16/little>>, "b .+4", jit_armv6m_asm:b(4)), + ?_assertAsmEqual(<<16#E3FF:16/little>>, "b .+2050", jit_armv6m_asm:b(2050)), + ?_assertAsmEqual(<<16#E400:16/little>>, "b .-2044", jit_armv6m_asm:b(-2044)), + %% Test error cases for offsets too large for ARMv6-M + ?_assertError({unencodable_offset, 2052}, jit_armv6m_asm:b(2052)), + ?_assertError({unencodable_offset, -2046}, jit_armv6m_asm:b(-2046)) + ]. + +blx_test_() -> + [ + %% Thumb BLX (register) encoding tests + ?_assertAsmEqual(<<16#4780:16/little>>, "blx r0", jit_armv6m_asm:blx(r0)), + ?_assertAsmEqual(<<16#4788:16/little>>, "blx r1", jit_armv6m_asm:blx(r1)), + ?_assertAsmEqual(<<16#47E8:16/little>>, "blx r13", jit_armv6m_asm:blx(r13)) + ]. + +bx_test_() -> + [ + %% Thumb BX (branch exchange) encoding tests + ?_assertAsmEqual(<<16#4700:16/little>>, "bx r0", jit_armv6m_asm:bx(r0)), + ?_assertAsmEqual(<<16#4708:16/little>>, "bx r1", jit_armv6m_asm:bx(r1)), + ?_assertAsmEqual(<<16#4768:16/little>>, "bx r13", jit_armv6m_asm:bx(r13)) + ]. + +ldr_test_() -> + [ + %% ARMv6-M Thumb LDR immediate offset (0-124, multiple of 4) + ?_assertAsmEqual( + <<16#6889:16/little>>, "ldr r1, [r1, #8]", jit_armv6m_asm:ldr(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#6982:16/little>>, "ldr r2, [r0, #24]", jit_armv6m_asm:ldr(r2, {r0, 24}) + ), + %% SP-relative load (0-1020, multiple of 4) + ?_assertAsmEqual( + <<16#9f00:16/little>>, "ldr r7, [sp, #0]", jit_armv6m_asm:ldr(r7, {sp, 0}) + ), + ?_assertAsmEqual( + <<16#9801:16/little>>, "ldr r0, [sp, #4]", jit_armv6m_asm:ldr(r0, {sp, 4}) + ), + %% PC-relative load (0-1020, multiple of 4) + ?_assertAsmEqual( + <<16#4a18:16/little>>, "ldr r2, [pc, #96]", jit_armv6m_asm:ldr(r2, {pc, 96}) + ), + %% Register offset + ?_assertAsmEqual( + <<16#58d1:16/little>>, "ldr r1, [r2, r3]", jit_armv6m_asm:ldr(r1, {r2, r3}) + ) + ]. + +movs_test_() -> + [ + %% ARMv6-M Thumb MOVS instructions (sets flags) + %% MOVS immediate (8-bit only, 0-255) + ?_assertAsmEqual(<<16#2000:16/little>>, "movs r0, #0", jit_armv6m_asm:movs(r0, 0)), + ?_assertAsmEqual(<<16#2101:16/little>>, "movs r1, #1", jit_armv6m_asm:movs(r1, 1)), + ?_assertAsmEqual(<<16#22ff:16/little>>, "movs r2, #255", jit_armv6m_asm:movs(r2, 255)), + %% MOVS register - low registers only (r0-r7) + ?_assertAsmEqual(<<16#0008:16/little>>, "movs r0, r1", jit_armv6m_asm:movs(r0, r1)), + ?_assertAsmEqual(<<16#001a:16/little>>, "movs r2, r3", jit_armv6m_asm:movs(r2, r3)) + ]. + +mov_test_() -> + [ + %% ARMv6-M Thumb MOV instructions (no flags, for high registers) + %% MOV register - requires at least one high register (r8-r15) + ?_assertAsmEqual(<<16#4680:16/little>>, "mov r8, r0", jit_armv6m_asm:mov(r8, r0)), + ?_assertAsmEqual(<<16#4640:16/little>>, "mov r0, r8", jit_armv6m_asm:mov(r0, r8)), + ?_assertAsmEqual(<<16#46c8:16/little>>, "mov r8, r9", jit_armv6m_asm:mov(r8, r9)), + ?_assertAsmEqual(<<16#46c0:16/little>>, "mov r8, r8", jit_armv6m_asm:mov(r8, r8)), + ?_assertAsmEqual(<<16#4619:16/little>>, "mov r1, r3", jit_armv6m_asm:mov(r1, r3)), + ?_assertAsmEqual(<<16#46c0:16/little>>, "nop", jit_armv6m_asm:nop()) + ]. + +str_test_() -> + [ + %% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4) + ?_assertAsmEqual( + <<16#6089:16/little>>, "str r1, [r1, #8]", jit_armv6m_asm:str(r1, {r1, 8}) + ), + ?_assertAsmEqual( + <<16#6182:16/little>>, "str r2, [r0, #24]", jit_armv6m_asm:str(r2, {r0, 24}) + ), + %% SP-relative store (0-1020, multiple of 4) + ?_assertAsmEqual( + <<16#9700:16/little>>, "str r7, [sp, #0]", jit_armv6m_asm:str(r7, {sp, 0}) + ), + ?_assertAsmEqual( + <<16#9001:16/little>>, "str r0, [sp, #4]", jit_armv6m_asm:str(r0, {sp, 4}) + ), + %% Register offset + ?_assertAsmEqual( + <<16#50d1:16/little>>, "str r1, [r2, r3]", jit_armv6m_asm:str(r1, {r2, r3}) + ) + ]. + +cmp_test_() -> + [ + %% ARMv6-M Thumb CMP register (low registers only) + ?_assertAsmEqual(<<16#4288:16/little>>, "cmp r0, r1", jit_armv6m_asm:cmp(r0, r1)), + ?_assertAsmEqual(<<16#42bb:16/little>>, "cmp r3, r7", jit_armv6m_asm:cmp(r3, r7)), + %% ARMv6-M Thumb CMP immediate (8-bit, 0-255, low registers only) + ?_assertAsmEqual(<<16#2800:16/little>>, "cmp r0, #0", jit_armv6m_asm:cmp(r0, 0)), + ?_assertAsmEqual(<<16#2805:16/little>>, "cmp r0, #5", jit_armv6m_asm:cmp(r0, 5)), + ?_assertAsmEqual(<<16#2fff:16/little>>, "cmp r7, #255", jit_armv6m_asm:cmp(r7, 255)) + ]. + +ands_test_() -> + [ + %% ARMv6-M Thumb ANDS register (2-operand: Rd = Rd AND Rm) + ?_assertAsmEqual(<<16#4008:16/little>>, "ands r0, r1", jit_armv6m_asm:ands(r0, r1)), + ?_assertAsmEqual(<<16#4011:16/little>>, "ands r1, r2", jit_armv6m_asm:ands(r1, r2)), + ?_assertAsmEqual(<<16#401a:16/little>>, "ands r2, r3", jit_armv6m_asm:ands(r2, r3)) + ]. + +orrs_test_() -> + [ + %% ARMv6-M Thumb ORRS register (2-operand: Rd = Rd OR Rm, sets flags) + ?_assertAsmEqual(<<16#4308:16/little>>, "orrs r0, r1", jit_armv6m_asm:orrs(r0, r1)), + ?_assertAsmEqual(<<16#4311:16/little>>, "orrs r1, r2", jit_armv6m_asm:orrs(r1, r2)), + ?_assertAsmEqual(<<16#431a:16/little>>, "orrs r2, r3", jit_armv6m_asm:orrs(r2, r3)) + ]. + +bics_test_() -> + [ + ?_assertAsmEqual(<<16#4391:16/little>>, "bics r1, r2", jit_armv6m_asm:bics(r1, r2)), + ?_assertAsmEqual(<<16#43a3:16/little>>, "bics r3, r4", jit_armv6m_asm:bics(r3, r4)) + ]. + +negs_test_() -> + [ + ?_assertAsmEqual(<<16#4251:16/little>>, "negs r1, r2", jit_armv6m_asm:negs(r1, r2)), + ?_assertAsmEqual(<<16#4263:16/little>>, "negs r3, r4", jit_armv6m_asm:negs(r3, r4)) + ]. + +rsbs_test_() -> + [ + ?_assertAsmEqual(<<16#4251:16/little>>, "rsbs r1, r2, 0", jit_armv6m_asm:rsbs(r1, r2, 0)), + ?_assertAsmEqual(<<16#4263:16/little>>, "rsbs r3, r4, 0", jit_armv6m_asm:rsbs(r3, r4, 0)) + ]. + +lsls_test_() -> + [ + %% ARMv6-M Thumb LSLS immediate shift (1-31) + ?_assertAsmEqual(<<16#0148:16/little>>, "lsls r0, r1, #5", jit_armv6m_asm:lsls(r0, r1, 5)), + ?_assertAsmEqual(<<16#0212:16/little>>, "lsls r2, r2, #8", jit_armv6m_asm:lsls(r2, r2, 8)), + %% LSLS register shift + ?_assertAsmEqual(<<16#409a:16/little>>, "lsls r2, r3", jit_armv6m_asm:lsls(r2, r3)) + ]. + +lsrs_test_() -> + [ + %% ARMv6-M Thumb LSRS immediate shift (1-32) + ?_assertAsmEqual(<<16#0948:16/little>>, "lsrs r0, r1, #5", jit_armv6m_asm:lsrs(r0, r1, 5)), + ?_assertAsmEqual(<<16#0a12:16/little>>, "lsrs r2, r2, #8", jit_armv6m_asm:lsrs(r2, r2, 8)), + %% LSRS register shift + ?_assertAsmEqual(<<16#40da:16/little>>, "lsrs r2, r3", jit_armv6m_asm:lsrs(r2, r3)) + ]. + +tst_test_() -> + [ + %% ARMv6-M Thumb TST instructions (register only, low registers) + %% TST Rn, Rm - test bits (performs Rn & Rm, updates flags) + ?_assertAsmEqual(<<16#4208:16/little>>, "tst r0, r1", jit_armv6m_asm:tst(r0, r1)), + ?_assertAsmEqual(<<16#421a:16/little>>, "tst r2, r3", jit_armv6m_asm:tst(r2, r3)), + ?_assertAsmEqual(<<16#4239:16/little>>, "tst r1, r7", jit_armv6m_asm:tst(r1, r7)) + ]. + +bcc_test_() -> + [ + %% Thumb conditional branch encoding tests - ARMv6-M 16-bit only + ?_assertAsmEqual(<<16#D0FE:16/little>>, "beq .+0", jit_armv6m_asm:bcc(eq, 0)), + ?_assertAsmEqual(<<16#D1FE:16/little>>, "bne .+0", jit_armv6m_asm:bcc(ne, 0)), + ?_assertAsmEqual(<<16#D1DE:16/little>>, "bne .-64", jit_armv6m_asm:bcc(ne, -64)), + ?_assertAsmEqual(<<16#D03E:16/little>>, "beq .+128", jit_armv6m_asm:bcc(eq, 128)), + ?_assertAsmEqual(<<16#D23E:16/little>>, "bcs .+128", jit_armv6m_asm:bcc(cs, 128)), + ?_assertAsmEqual(<<16#D33E:16/little>>, "bcc .+128", jit_armv6m_asm:bcc(cc, 128)), + ?_assertAsmEqual(<<16#D43E:16/little>>, "bmi .+128", jit_armv6m_asm:bcc(mi, 128)), + ?_assertAsmEqual(<<16#D53E:16/little>>, "bpl .+128", jit_armv6m_asm:bcc(pl, 128)), + ?_assertAsmEqual(<<16#D63E:16/little>>, "bvs .+128", jit_armv6m_asm:bcc(vs, 128)), + ?_assertAsmEqual(<<16#D83E:16/little>>, "bhi .+128", jit_armv6m_asm:bcc(hi, 128)), + ?_assertAsmEqual(<<16#D93E:16/little>>, "bls .+128", jit_armv6m_asm:bcc(ls, 128)), + ?_assertAsmEqual(<<16#DA3E:16/little>>, "bge .+128", jit_armv6m_asm:bcc(ge, 128)), + ?_assertAsmEqual(<<16#DB3E:16/little>>, "blt .+128", jit_armv6m_asm:bcc(lt, 128)), + ?_assertAsmEqual(<<16#DC3E:16/little>>, "bgt .+128", jit_armv6m_asm:bcc(gt, 128)), + ?_assertAsmEqual(<<16#DD3E:16/little>>, "ble .+128", jit_armv6m_asm:bcc(le, 128)), + ?_assertAsmEqual(<<16#E03E:16/little>>, "bal .+128", jit_armv6m_asm:bcc(al, 128)), + ?_assertAsmEqual(<<16#D07F:16/little>>, "beq .+258", jit_armv6m_asm:bcc(eq, 258)), + ?_assertAsmEqual(<<16#D180:16/little>>, "bne .-252", jit_armv6m_asm:bcc(ne, -252)), + %% Test error cases for offsets too large for ARMv6-M + ?_assertError({unencodable_offset, 260}, jit_armv6m_asm:bcc(eq, 260)), + ?_assertError({unencodable_offset, -254}, jit_armv6m_asm:bcc(ne, -254)) + ]. + +adr_test_() -> + [ + %% ARMv6-M Thumb ADR (PC-relative address) - implemented as ADD Rd, PC, #imm + %% adr(Rd, N) means "Rd = current_PC + N" where PC is instruction address + %% Range: 4-1024, must be multiple of 4 + ?_assertAsmEqual(<<16#a000:16/little>>, "adr r0, .+4", jit_armv6m_asm:adr(r0, 4)), + ?_assertAsmEqual(<<16#a101:16/little>>, "adr r1, .+8", jit_armv6m_asm:adr(r1, 8)), + ?_assertAsmEqual(<<16#a202:16/little>>, "adr r2, .+12", jit_armv6m_asm:adr(r2, 12)), + ?_assertAsmEqual(<<16#a708:16/little>>, "adr r7, .+36", jit_armv6m_asm:adr(r7, 36)), + %% Test maximum offset value (1024 bytes) + ?_assertAsmEqual(<<16#a0ff:16/little>>, "adr r0, .+1024", jit_armv6m_asm:adr(r0, 1024)) + ]. + +push_test_() -> + [ + %% ARMv6-M Thumb PUSH instruction (low registers + optional LR) + %% Single register push + ?_assertAsmEqual(<<16#b401:16/little>>, "push {r0}", jit_armv6m_asm:push([r0])), + %% Multiple register push + ?_assertAsmEqual( + <<16#b407:16/little>>, "push {r0, r1, r2}", jit_armv6m_asm:push([r0, r1, r2]) + ), + %% Push with LR + ?_assertAsmEqual(<<16#b500:16/little>>, "push {lr}", jit_armv6m_asm:push([lr])), + %% Push registers + LR + ?_assertAsmEqual( + <<16#b507:16/little>>, "push {r0, r1, r2, lr}", jit_armv6m_asm:push([r0, r1, r2, lr]) + ) + ]. + +pop_test_() -> + [ + %% ARMv6-M Thumb POP instruction (low registers + optional PC) + %% Single register pop + ?_assertAsmEqual(<<16#bc01:16/little>>, "pop {r0}", jit_armv6m_asm:pop([r0])), + %% Multiple register pop + ?_assertAsmEqual( + <<16#bc07:16/little>>, "pop {r0, r1, r2}", jit_armv6m_asm:pop([r0, r1, r2]) + ), + %% Pop with PC + ?_assertAsmEqual(<<16#bd00:16/little>>, "pop {pc}", jit_armv6m_asm:pop([pc])), + %% Pop registers + PC + ?_assertAsmEqual( + <<16#bd07:16/little>>, "pop {r0, r1, r2, pc}", jit_armv6m_asm:pop([r0, r1, r2, pc]) + ) + ]. + +bkpt_test_() -> + [ + %% BKPT #0 + ?_assertAsmEqual(<<16#be00:16/little>>, "bkpt #0", jit_armv6m_asm:bkpt(0)), + %% BKPT #1 + ?_assertAsmEqual(<<16#be01:16/little>>, "bkpt #1", jit_armv6m_asm:bkpt(1)), + %% BKPT #255 + ?_assertAsmEqual(<<16#beff:16/little>>, "bkpt #255", jit_armv6m_asm:bkpt(255)) + ]. + +mvns_test_() -> + [ + %% ARMv6-M Thumb MVNS instructions (register only, low registers) + %% MVNS Rd, Rm - bitwise NOT (performs ~Rm -> Rd, sets flags) + ?_assertAsmEqual(<<16#43e3:16/little>>, "mvns r3, r4", jit_armv6m_asm:mvns(r3, r4)), + ?_assertAsmEqual(<<16#43f3:16/little>>, "mvns r3, r6", jit_armv6m_asm:mvns(r3, r6)), + ?_assertAsmEqual(<<16#43c8:16/little>>, "mvns r0, r1", jit_armv6m_asm:mvns(r0, r1)) + ]. diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl new file mode 100644 index 0000000000..c7cf14ae75 --- /dev/null +++ b/tests/libs/jit/jit_armv6m_tests.erl @@ -0,0 +1,3749 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_armv6m_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_armv6m). + +% disassembly obtained with: +% arm-elf-objdump -b binary -D dump.bin -M arm + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6817 ldr r7, [r2, #0]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9902 ldr r1, [sp, #8]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6857 ldr r7, [r2, #4]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9902 ldr r1, [sp, #8]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6897 ldr r7, [r2, #8]\n" + " 2: b405 push {r0, r2}\n" + " 4: 212a movs r1, #42 ; 0x2a\n" + " 6: 222b movs r2, #43 ; 0x2b\n" + " 8: 232c movs r3, #44 ; 0x2c\n" + " a: 47b8 blx r7\n" + " c: 4607 mov r7, r0\n" + " e: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6957 ldr r7, [r2, #20]\n" + " 2: b082 sub sp, #8\n" + " 4: 2602 movs r6, #2\n" + " 6: 9600 str r6, [sp, #0]\n" + " 8: 9902 ldr r1, [sp, #8]\n" + " a: 2210 movs r2, #16\n" + " c: 2320 movs r3, #32\n" + " e: 47b8 blx r7\n" + " 10: b002 add sp, #8\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2603 movs r6, #3\n" + " 4: 43b7 bics r7, r6\n" + " 6: 69c6 ldr r6, [r0, #28]\n" + " 8: 25b8 movs r5, #184 ; 0xb8\n" + " a: 5955 ldr r5, [r2, r5]\n" + " c: b405 push {r0, r2}\n" + " e: b082 sub sp, #8\n" + " 10: 9601 str r6, [sp, #4]\n" + " 12: 2608 movs r6, #8\n" + " 14: 9600 str r6, [sp, #0]\n" + " 16: 9904 ldr r1, [sp, #16]\n" + " 18: 463a mov r2, r7\n" + " 1a: 2340 movs r3, #64 ; 0x40\n" + " 1c: 47a8 blx r5\n" + " 1e: 4605 mov r5, r0\n" + " 20: b002 add sp, #8\n" + " 22: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 6c97 ldr r7, [r2, #72] ; 0x48\n" + " 2: b405 push {r0, r2}\n" + " 4: 2113 movs r1, #19\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}\n" + " c: 6c96 ldr r6, [r2, #72] ; 0x48\n" + " e: b4c5 push {r0, r2, r6, r7}\n" + " 10: 2114 movs r1, #20\n" + " 12: 47b0 blx r6\n" + " 14: 4605 mov r5, r0\n" + " 16: bcc5 pop {r0, r2, r6, r7}\n" + " 18: 6c96 ldr r6, [r2, #72] ; 0x48\n" + " 1a: b4a5 push {r0, r2, r5, r7}\n" + " 1c: 2113 movs r1, #19\n" + " 1e: 47b0 blx r6\n" + " 20: 4606 mov r6, r0\n" + " 22: bca5 pop {r0, r2, r5, r7}\n" + " 24: 6b54 ldr r4, [r2, #52] ; 0x34\n" + " 26: b455 push {r0, r2, r4, r6}\n" + " 28: 6839 ldr r1, [r7, #0]\n" + " 2a: 682a ldr r2, [r5, #0]\n" + " 2c: 47a0 blx r4\n" + " 2e: 4607 mov r7, r0\n" + " 30: bc55 pop {r0, r2, r4, r6}\n" + " 32: 6037 str r7, [r6, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, r7} = ?BACKEND:move_to_native_register(State0, 1), + {State2, r6} = ?BACKEND:move_to_native_register(State1, 2), + {State3, r5} = ?BACKEND:move_to_native_register(State2, 3), + {State4, r4} = ?BACKEND:move_to_native_register(State3, 4), + {State5, r3} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + r6, r7, {free, r4}, r5, {free, r3} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, r6, r7, r5]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: 2701 movs r7, #1\n" + " 2: 2602 movs r6, #2\n" + " 4: 2503 movs r5, #3\n" + " 6: 2404 movs r4, #4\n" + " 8: 2305 movs r3, #5\n" + " a: 21e4 movs r1, #228 @ 0xe4\n" + " c: 5851 ldr r1, [r2, r1]\n" + " e: b4e7 push {r0, r1, r2, r5, r6, r7}\n" + " 10: b082 sub sp, #8\n" + " 12: 9300 str r3, [sp, #0]\n" + " 14: 4633 mov r3, r6\n" + " 16: 460e mov r6, r1\n" + " 18: 4618 mov r0, r3\n" + " 1a: 4639 mov r1, r7\n" + " 1c: 4622 mov r2, r4\n" + " 1e: 462b mov r3, r5\n" + " 20: 47b0 blx r6\n" + " 22: 4604 mov r4, r0\n" + " 24: b002 add sp, #8\n" + " 26: bce7 pop {r0, r1, r2, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6917 ldr r7, [r2, #16]\n" + " 20: b082 sub sp, #8\n" + " 22: 2601 movs r6, #1\n" + " 24: 4276 negs r6, r6\n" + " 26: 9601 str r6, [sp, #4]\n" + " 28: 2602 movs r6, #2\n" + " 2a: 9600 str r6, [sp, #0]\n" + " 2c: 9902 ldr r1, [sp, #8]\n" + " 2e: 2220 movs r2, #32\n" + " 30: 2302 movs r3, #2\n" + " 32: 47b8 blx r7\n" + " 34: b002 add sp, #8\n" + " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_unaligned_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + %% First do a 2-byte instruction to create unaligned start + State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), + State2 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State1), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State3), + Dump = << + % State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), + " 0: 6019 str r1, [r3, #0]\n" + % State2 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State1), + " 2: 9e00 ldr r6, [sp, #0]\n" + " 4: 68b7 ldr r7, [r6, #8]\n" + " 6: 3f01 subs r7, #1\n" + " 8: 60b7 str r7, [r6, #8]\n" + " a: d108 bne.n 0x1e\n" + " c: a703 add r7, pc, #12 ; (adr r7, 0x1c)\n" + " e: 3701 adds r7, #1\n" + " 10: 6077 str r7, [r6, #4]\n" + " 12: 6897 ldr r7, [r2, #8]\n" + " 14: 9e05 ldr r6, [sp, #20]\n" + " 16: 9705 str r7, [sp, #20]\n" + " 18: 46b6 mov lr, r6\n" + " 1a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + % State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + " 1e: 6917 ldr r7, [r2, #16]\n" + " 20: b082 sub sp, #8\n" + " 22: 2601 movs r6, #1\n" + " 24: 4276 negs r6, r6\n" + " 26: 9601 str r6, [sp, #4]\n" + " 28: 2602 movs r6, #2\n" + " 2a: 9600 str r6, [sp, #0]\n" + " 2c: 9902 ldr r1, [sp, #8]\n" + " 2e: 2220 movs r2, #32\n" + " 30: 2302 movs r3, #2\n" + " 32: 47b8 blx r7\n" + " 34: b002 add sp, #8\n" + " 36: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + % {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + " 0: 6987 ldr r7, [r0, #24]\n" + % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [... + " 2: 6cd6 ldr r6, [r2, #76] ; 0x4c\n" + " 4: b082 sub sp, #8\n" + " 6: 9700 str r7, [sp, #0]\n" + " 8: 9902 ldr r1, [sp, #8]\n" + " a: 2204 movs r2, #4\n" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: 47b0 blx r6\n" + " 10: b002 add sp, #8\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + % Literal pool + " 14: 02cb lsls r3, r1, #11\n" + " 16: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + % State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), + "1e: 6917 ldr r7, [r2, #16]\n" + " 20: b082 sub sp, #8\n" + " 22: 260a movs r6, #10\n" + " 24: 9601 str r6, [sp, #4]\n" + " 26: 2602 movs r6, #2\n" + " 28: 9600 str r6, [sp, #0]\n" + " 2a: 9902 ldr r1, [sp, #8]\n" + " 2c: 2220 movs r2, #32\n" + " 2e: 2302 movs r3, #2\n" + " 30: 47b8 blx r7\n" + " 32: b002 add sp, #8\n" + " 34: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6817 ldr r7, [r2, #0]\n" + " 2: 222a movs r2, #42 ; 0x2a\n" + " 4: 9e05 ldr r6, [sp, #20]\n" + " 6: 9705 str r7, [sp, #20]\n" + " 8: 46b6 mov lr, r6\n" + " a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 2: b405 push {r0, r2}\n" + " 4: 9902 ldr r1, [sp, #8]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}\n" + " c: 4287 cmp r7, r0\n" + " e: d001 beq.n 0x14\n" + " 10: 4638 mov r0, r7\n" + " 12: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(r7, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(r6, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 2: b405 push {r0, r2}\n" + " 4: 9902 ldr r1, [sp, #8]\n" + " 6: 47b8 blx r7\n" + " 8: 4607 mov r7, r0\n" + " a: bc05 pop {r0, r2}\n" + " c: 463e mov r6, r7\n" + " e: 4286 cmp r6, r0\n" + " 10: d001 beq.n 0x16\n" + " 12: 4630 mov r0, r6\n" + " 14: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6837 ldr r7, [r6, #0]\n" + " 4: 65c7 str r7, [r0, #92] ; 0x5c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 371c adds r7, #28\n" + " 4: 6147 str r7, [r0, #20]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d500 bpl.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 42b7 cmp r7, r6\n" + " 6: da00 bge.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: da00 bge.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 1024}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: da01 bge.n 0xc\n" + " 8: dafe bge.n 0x8\n" + " a: 3602 adds r6, #2\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 0400 lsls r0, r0, #16\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', -1}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2501 movs r5, #1\n" + " 6: 426d negs r5, r5\n" + " 8: 42af cmp r7, r5\n" + " a: d100 bne.n 0xe\n" + " c: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f00 cmp r7, #0\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + % Test large immediate (1995) that requires temporary register + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', 1995}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 1) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 4d02 ldr r5, [pc, #8] ; (0x10)\n" + " 6: 42af cmp r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3601 adds r6, #1\n" + " c: e078 b.n 0x100\n" + " e: 0000 movs r0, r0\n" + " 10: 07cb lsls r3, r1, #31\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f2a cmp r7, #42 ; 0x2a\n" + " 6: d100 bne.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d400 bmi.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d400 bmi.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d500 bpl.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 07fd lsls r5, r7, #31\n" + " 6: d500 bpl.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 077d lsls r5, r7, #29\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2505 movs r5, #5\n" + " 6: 422f tst r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 077d lsls r5, r7, #29\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 43fd mvns r5, r7\n" + " 6: 072d lsls r5, r5, #28\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 43ff mvns r7, r7\n" + " 6: 073f lsls r7, r7, #28\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 463d mov r5, r7\n" + " 6: 243f movs r4, #63 ; 0x3f\n" + " 8: 4025 ands r5, r4\n" + " a: 2d08 cmp r5, #8\n" + " c: d000 beq.n 0x10\n" + " e: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 42b7 cmp r7, r6\n" + " 6: da00 bge.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + { + {free, RegA}, + '&', + ?TERM_BOXED_TAG_MASK, + '!=', + ?TERM_BOXED_POSITIVE_INTEGER + }, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 253f movs r5, #63 ; 0x3f\n" + " 6: 402f ands r7, r5\n" + " 8: 2f08 cmp r7, #8\n" + " a: d000 beq.n 0xe\n" + " c: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +%% Test coverage for bitwise AND optimization paths +bitwise_and_optimization_test_() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 6}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 7}), + [ + %% Test optimized case: 16#3 (low bits mask, 2 bits) - lsls r5, r7, #30 + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 16#3, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 07bd lsls r5, r7, #30\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end), + %% Test optimized case: 16#F (low bits mask, 4 bits) - lsls r5, r7, #28 + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 16#F, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 073d lsls r5, r7, #28\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end), + %% Test optimized case: 16#3F (low bits mask, 6 bits) - lsls r5, r7, #26 + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 16#3F, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 06bd lsls r5, r7, #26\n" + " 6: d000 beq.n 0xa\n" + " 8: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end), + %% Test non-optimized case: 5 (neither single bit nor low bits mask) - mov+tst + ?_test(begin + State3 = ?BACKEND:if_block( + State2, + {RegA, '&', 5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6b07 ldr r7, [r0, #48] ; 0x30\n" + " 2: 6b46 ldr r6, [r0, #52] ; 0x34\n" + " 4: 2505 movs r5, #5\n" + " 6: 422f tst r7, r5\n" + " 8: d000 beq.n 0xc\n" + " a: 3602 adds r6, #2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3)) + end) + ]. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 2f3b cmp r7, #59 ; 0x3b\n" + " 6: d101 bne.n 0xc\n" + " 8: 3602 adds r6, #2\n" + " a: e000 b.n 0xe\n" + " c: 3604 adds r6, #4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 08ff lsrs r7, r7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 08fe lsrs r6, r7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 00ff lsls r7, r7, #3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop ; (mov r8, r8)\n" + " 8: 0054 lsls r4, r2, #1\n" + " a: 0000 movs r0, r0\n" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 0010 movs r0, r2\n" + " 16: 0000 movs r0, r0\n" + " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" + " 20: 0030 movs r0, r6\n" + " 22: 0000 movs r0, r0\n" + " 24: 9e00 ldr r6, [sp, #0]\n" + " 26: 68b7 ldr r7, [r6, #8]\n" + " 28: 3f01 subs r7, #1\n" + " 2a: 60b7 str r7, [r6, #8]\n" + " 2c: d004 beq.n 0x38\n" + " 2e: e00f b.n 0x50\n" + " 30: 46c0 nop ; (mov r8, r8)\n" + " 32: 46c0 nop ; (mov r8, r8)\n" + " 34: 46c0 nop ; (mov r8, r8)\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: a700 add r7, pc, #0 ; (adr r7, 0x3c)\n" + " 3a: 2623 movs r6, #35 ; 0x23\n" + " 3c: 4276 negs r6, r6\n" + " 3e: 19f6 adds r6, r6, r7\n" + " 40: 9f00 ldr r7, [sp, #0]\n" + " 42: 607e str r6, [r7, #4]\n" + " 44: 6897 ldr r7, [r2, #8]\n" + " 46: 9e05 ldr r6, [sp, #20]\n" + " 48: 9705 str r7, [sp, #20]\n" + " 4a: 46b6 mov lr, r6\n" + " 4c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 4e: 46c0 nop ; (mov r8, r8)\n" + " 50: 6817 ldr r7, [r2, #0]\n" + " 52: 9e05 ldr r6, [sp, #20]\n" + " 54: 9705 str r7, [sp, #20]\n" + " 56: 46b6 mov lr, r6\n" + " 58: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 5a: 46c0 nop ; (mov r8, r8)\n" + " 5c: 6857 ldr r7, [r2, #4]\n" + " 5e: 9e05 ldr r6, [sp, #20]\n" + " 60: 9705 str r7, [sp, #20]\n" + " 62: 46b6 mov lr, r6\n" + " 64: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with different alignment (unaligned start) +call_only_or_schedule_next_and_label_relocation_unaligned_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + %% First do a 2-byte instruction to create unaligned start + State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), + State2 = ?BACKEND:jump_table(State1, 2), + State3 = ?BACKEND:add_label(State2, 1), + State4 = ?BACKEND:call_only_or_schedule_next(State3, 2), + State5 = ?BACKEND:add_label(State4, 2), + State6 = ?BACKEND:call_primitive_last(State5, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State7 = ?BACKEND:add_label(State6, 0), + State8 = ?BACKEND:call_primitive_last(State7, 1, [ctx, jit_state]), + State9 = ?BACKEND:update_branches(State8), + Stream = ?BACKEND:stream(State9), + Dump = + << + " 0: 6019 str r1, [r3, #0]\n" + " 2: 4b01 ldr r3, [pc, #4] ; (0x8)\n" + " 4: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 6: 449f add pc, r3\n" + " 8: 46c0 nop ; (mov r8, r8)\n" + " a: 0056 lsls r6, r2, #1\n" + " c: 0000 movs r0, r0\n" + " e: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " 10: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 12: 449f add pc, r3\n" + " 14: 46c0 nop ; (mov r8, r8)\n" + " 16: 0012 movs r2, r2\n" + " 18: 0000 movs r0, r0\n" + " 1a: 4b01 ldr r3, [pc, #4] ; (0x20)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 449f add pc, r3\n" + " 20: 46c0 nop ; (mov r8, r8)\n" + " 22: 0032 movs r2, r6\n" + " 24: 0000 movs r0, r0\n" + " 26: 46c0 nop ; (mov r8, r8)\n" + " 28: 9e00 ldr r6, [sp, #0]\n" + " 2a: 68b7 ldr r7, [r6, #8]\n" + " 2c: 3f01 subs r7, #1\n" + " 2e: 60b7 str r7, [r6, #8]\n" + " 30: d004 beq.n 0x3c\n" + " 32: e00f b.n 0x54\n" + " 34: 46c0 nop ; (mov r8, r8)\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: 46c0 nop ; (mov r8, r8)\n" + " 3a: 46c0 nop ; (mov r8, r8)\n" + " 3c: a700 add r7, pc, #0 ; (adr r7, 0x40)\n" + " 3e: 2627 movs r6, #39 ; 0x27\n" + " 40: 4276 negs r6, r6\n" + " 42: 19f6 adds r6, r6, r7\n" + " 44: 9f00 ldr r7, [sp, #0]\n" + " 46: 607e str r6, [r7, #4]\n" + " 48: 6897 ldr r7, [r2, #8]\n" + " 4a: 9e05 ldr r6, [sp, #20]\n" + " 4c: 9705 str r7, [sp, #20]\n" + " 4e: 46b6 mov lr, r6\n" + " 50: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 52: 46c0 nop ; (mov r8, r8)\n" + " 54: 6817 ldr r7, [r2, #0]\n" + " 56: 9e05 ldr r6, [sp, #20]\n" + " 58: 9705 str r7, [sp, #20]\n" + " 5a: 46b6 mov lr, r6\n" + " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 5e: 46c0 nop ; (mov r8, r8)\n" + " 60: 6857 ldr r7, [r2, #4]\n" + " 62: 9e05 ldr r6, [sp, #20]\n" + " 64: 9705 str r7, [sp, #20]\n" + " 66: 46b6 mov lr, r6\n" + " 68: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with large gap (256+ bytes) to force mov_immediate path +call_only_or_schedule_next_and_label_relocation_large_gap_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the actual function bodies + % Each operation emits ~2 bytes, so 128 operations = ~256 bytes + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, r3) + end, + State1, + lists:seq(1, 128) + ), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + % Extract the final section starting at 0x124 to verify the literal pool pattern + Dump = << + " 124: 9e00 ldr r6, [sp, #0]\n" + " 126: 68b7 ldr r7, [r6, #8]\n" + " 128: 3f01 subs r7, #1\n" + " 12a: 60b7 str r7, [r6, #8]\n" + " 12c: d004 beq.n 0x138\n" + " 12e: e00f b.n 0x150\n" + " 130: 46c0 nop ; (mov r8, r8)\n" + " 132: 46c0 nop ; (mov r8, r8)\n" + " 134: 46c0 nop ; (mov r8, r8)\n" + " 136: 46c0 nop ; (mov r8, r8)\n" + " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#124), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +%% Test with large gap (256+ bytes) and different alignment to force literal pool path +call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the rest of the code + % Use 127 operations (instead of 128) to create different alignment + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, r3) + end, + State1, + lists:seq(1, 127) + ), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + % Extract the final section starting at 0x122 to verify the literal pool pattern with different alignment + Dump = << + " 122: 46c0 nop ; (mov r8, r8)\n" + " 124: 9e00 ldr r6, [sp, #0]\n" + " 126: 68b7 ldr r7, [r6, #8]\n" + " 128: 3f01 subs r7, #1\n" + " 12a: 60b7 str r7, [r6, #8]\n" + " 12c: d004 beq.n 0x138\n" + " 12e: e00f b.n 0x150\n" + " 130: 46c0 nop ; (mov r8, r8)\n" + " 132: 46c0 nop ; (mov r8, r8)\n" + " 134: 46c0 nop ; (mov r8, r8)\n" + " 136: 46c0 nop ; (mov r8, r8)\n" + " 138: a700 add r7, pc, #0 ; (adr r7, 0x13c)\n" + " 13a: 4e04 ldr r6, [pc, #16] ; (0x14c)\n" + " 13c: 19f6 adds r6, r6, r7\n" + " 13e: 9f00 ldr r7, [sp, #0]\n" + " 140: 607e str r6, [r7, #4]\n" + " 142: 6897 ldr r7, [r2, #8]\n" + " 144: 9e05 ldr r6, [sp, #20]\n" + " 146: 9705 str r7, [sp, #20]\n" + " 148: 46b6 mov lr, r6\n" + " 14a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 14c: fedd ffff mrc2 15, 6, pc, cr13, cr15, {7}\n" + " 150: 6817 ldr r7, [r2, #0]\n" + " 152: 9e05 ldr r6, [sp, #20]\n" + " 154: 9705 str r7, [sp, #20]\n" + " 156: 46b6 mov lr, r6\n" + " 158: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 15a: 46c0 nop ; (mov r8, r8)\n" + " 15c: 6857 ldr r7, [r2, #4]\n" + " 15e: 9e05 ldr r6, [sp, #20]\n" + " 160: 9705 str r7, [sp, #20]\n" + " 162: 46b6 mov lr, r6\n" + " 164: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#122), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: 6a17 ldr r7, [r2, #32]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9802 ldr r0, [sp, #8]\n" + " 6: 2102 movs r1, #2\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: 6bd6 ldr r6, [r2, #60] ; 0x3c\n" + " 10: b4c5 push {r0, r2, r6, r7}\n" + " 12: 490b ldr r1, [pc, #44] ; (0x40)\n" + " 14: 47b0 blx r6\n" + " 16: 4605 mov r5, r0\n" + " 18: bcc5 pop {r0, r2, r6, r7}\n" + " 1a: b405 push {r0, r2}\n" + " 1c: b082 sub sp, #8\n" + " 1e: 9500 str r5, [sp, #0]\n" + " 20: 2100 movs r1, #0\n" + " 22: 2201 movs r2, #1\n" + " 24: 6983 ldr r3, [r0, #24]\n" + " 26: 47b8 blx r7\n" + " 28: 4607 mov r7, r0\n" + " 2a: b002 add sp, #8\n" + " 2c: bc05 pop {r0, r2}\n" + " 2e: 2f00 cmp r7, #0\n" + " 30: d108 bne.n 0x44\n" + " 32: 6997 ldr r7, [r2, #24]\n" + " 34: 2234 movs r2, #52 ; 0x34\n" + " 36: 9e05 ldr r6, [sp, #20]\n" + " 38: 9705 str r7, [sp, #20]\n" + " 3a: 46b6 mov lr, r6\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3e: 0000 movs r0, r0\n" + " 40: e895 3b7f ldmia.w r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n" + " 44: 6187 str r7, [r0, #24]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2603 movs r6, #3\n" + " 4: 43b7 bics r7, r6\n" + " 6: 687d ldr r5, [r7, #4]\n" + " 8: 6946 ldr r6, [r0, #20]\n" + " a: 6075 str r5, [r6, #4]\n" + " c: 683d ldr r5, [r7, #0]\n" + " e: 6946 ldr r6, [r0, #20]\n" + " 10: 6035 str r5, [r6, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 43fe mvns r6, r7\n" + " 4: 0736 lsls r6, r6, #28\n" + " 6: d015 beq.n 0x34\n" + " 8: 463e mov r6, r7\n" + " a: 2503 movs r5, #3\n" + " c: 402e ands r6, r5\n" + " e: 2e02 cmp r6, #2\n" + " 10: d004 beq.n 0x1c\n" + " 12: e075 b.n 0x100\n" + " 14: 46c0 nop ; (mov r8, r8)\n" + " 16: 46c0 nop ; (mov r8, r8)\n" + " 18: 46c0 nop ; (mov r8, r8)\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: 2603 movs r6, #3\n" + " 1e: 43b7 bics r7, r6\n" + " 20: 683f ldr r7, [r7, #0]\n" + " 22: 263f movs r6, #63 ; 0x3f\n" + " 24: 4037 ands r7, r6\n" + " 26: 2f08 cmp r7, #8\n" + " 28: d004 beq.n 0x34\n" + " 2a: e069 b.n 0x100\n" + " 2c: 46c0 nop ; (mov r8, r8)\n" + " 2e: 46c0 nop ; (mov r8, r8)\n" + " 30: 46c0 nop ; (mov r8, r8)\n" + " 32: 46c0 nop ; (mov r8, r8)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +%% Keep the unoptimized version to test the and case. +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 43fe mvns r6, r7\n" + " 4: 0736 lsls r6, r6, #28\n" + " 6: d01b beq.n 0x40\n" + " 8: 463e mov r6, r7\n" + " a: 2503 movs r5, #3\n" + " c: 402e ands r6, r5\n" + " e: 2e02 cmp r6, #2\n" + " 10: d004 beq.n 0x1c\n" + " 12: e075 b.n 0x100\n" + " 14: 46c0 nop ; (mov r8, r8)\n" + " 16: 46c0 nop ; (mov r8, r8)\n" + " 18: 46c0 nop ; (mov r8, r8)\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: 2603 movs r6, #3\n" + " 1e: 43b7 bics r7, r6\n" + " 20: 683f ldr r7, [r7, #0]\n" + " 22: 463e mov r6, r7\n" + " 24: 253f movs r5, #63 ; 0x3f\n" + " 26: 402e ands r6, r5\n" + " 28: 2e08 cmp r6, #8\n" + " 2a: d009 beq.n 0x40\n" + " 2c: 263f movs r6, #63 ; 0x3f\n" + " 2e: 4037 ands r7, r6\n" + " 30: 2f18 cmp r7, #24\n" + " 32: d005 beq.n 0x40\n" + " 34: e064 b.n 0x100\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: 46c0 nop ; (mov r8, r8)\n" + " 3a: 46c0 nop ; (mov r8, r8)\n" + " 3c: 46c0 nop ; (mov r8, r8)\n" + " 3e: 46c0 nop ; (mov r8, r8)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2f4b cmp r7, #75 ; 0x4b\n" + " 4: d006 beq.n 0x14\n" + " 6: 2f0b cmp r7, #11\n" + " 8: d004 beq.n 0x14\n" + " a: e079 b.n 0x100\n" + " c: 46c0 nop ; (mov r8, r8)\n" + " e: 46c0 nop ; (mov r8, r8)\n" + " 10: 46c0 nop ; (mov r8, r8)\n" + " 12: 46c0 nop ; (mov r8, r8)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2f4b cmp r7, #75 ; 0x4b\n" + " 4: d006 beq.n 0x14\n" + " 6: 2f0b cmp r7, #11\n" + " 8: d004 beq.n 0x14\n" + " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" + " c: 447e add r6, pc\n" + " e: 4730 bx r6\n" + " 10: 0ff1 lsrs r0, r6, #31\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_unaligned_test() -> + % Create a new state with a 2-byte instruction already in the stream + % to simulate starting at an odd offset (offset 2 instead of 0) + PaddingInstruction = jit_armv6m_asm:bx(lr), + TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 4770 bx lr\n" + " 2: 6987 ldr r7, [r0, #24]\n" + " 4: 2f4b cmp r7, #75 @ 0x4b\n" + " 6: d007 beq.n 0x18\n" + " 8: 2f0b cmp r7, #11\n" + " a: d005 beq.n 0x18\n" + " c: 4e01 ldr r6, [pc, #4] @ (0x14)\n" + " e: 447e add r6, pc\n" + " 10: 4730 bx r6\n" + " 12: 46c0 nop @ (mov r8, r8)\n" + " 14: 0fef lsrs r7, r5, #31\n" + " 16: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 2f4b cmp r7, #75 ; 0x4b\n" + " 4: d006 beq.n 0x14\n" + " 6: 2f0b cmp r7, #11\n" + " 8: d004 beq.n 0x14\n" + " a: 4e01 ldr r6, [pc, #4] ; (0x10)\n" + " c: 447e add r6, pc\n" + " e: 4730 bx r6\n" + " 10: 0ff1 lsrs r1, r6, #31\n" + " 12: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_unaligned_test() -> + % Create a new state with a 2-byte instruction already in the stream + % to simulate starting at an odd offset (offset 2 instead of 0) + PaddingInstruction = jit_armv6m_asm:bx(lr), + TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 4770 bx lr\n" + " 2: 6987 ldr r7, [r0, #24]\n" + " 4: 2f4b cmp r7, #75 ; 0x4b\n" + " 6: d007 beq.n 0x18\n" + " 8: 2f0b cmp r7, #11\n" + " a: d005 beq.n 0x18\n" + " c: 4e01 ldr r6, [pc, #4] ; (0x14)\n" + " e: 447e add r6, pc\n" + " 10: 4730 bx r6\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 0fef lsrs r7, r5, #31\n" + " 16: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = << + " 0: a706 add r7, pc, #24 ; (adr r7, 0x1c)\n" + " 2: 3701 adds r7, #1\n" + " 4: 9e00 ldr r6, [sp, #0]\n" + " 6: 6077 str r7, [r6, #4]\n" + " 8: 4f03 ldr r7, [pc, #12] ; (0x18)\n" + " a: 6f96 ldr r6, [r2, #120] ; 0x78\n" + " c: 463a mov r2, r7\n" + " e: 232a movs r3, #42 ; 0x2a\n" + " 10: 9f05 ldr r7, [sp, #20]\n" + " 12: 9605 str r6, [sp, #20]\n" + " 14: 46be mov lr, r7\n" + " 16: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 18: 1388 asrs r0, r1, #14\n" + " 1a: 0000 movs r0, r0\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6d57 ldr r7, [r2, #84] ; 0x54\n" + " 20: b405 push {r0, r2}\n" + " 22: 9902 ldr r1, [sp, #8]\n" + " 24: 47b8 blx r7\n" + " 26: 4607 mov r7, r0\n" + " 28: bc05 pop {r0, r2}\n" + " 2a: 4287 cmp r7, r0\n" + " 2c: d001 beq.n 0x32\n" + " 2e: 4638 mov r0, r7\n" + " 30: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 32: 2784 movs r7, #132 ; 0x84\n" + " 34: 59d7 ldr r7, [r2, r7]\n" + " 36: b405 push {r0, r2}\n" + " 38: 2102 movs r1, #2\n" + " 3a: 47b8 blx r7\n" + " 3c: 4607 mov r7, r0\n" + " 3e: bc05 pop {r0, r2}\n" + " 40: 2f00 cmp r7, #0\n" + " 42: d105 bne.n 0x50\n" + " 44: 6fd7 ldr r7, [r2, #124] ; 0x7c\n" + " 46: 222a movs r2, #42 ; 0x2a\n" + " 48: 9e05 ldr r6, [sp, #20]\n" + " 4a: 9705 str r7, [sp, #20]\n" + " 4c: 46b6 mov lr, r6\n" + " 4e: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = << + " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop ; (mov r8, r8)\n" + " 8: 0000 movs r0, r0\n" + " a: 0000 movs r0, r0\n" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 0000 movs r0, r0\n" + " 16: 0000 movs r0, r0\n" + " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" + " 20: 0000 movs r0, r0\n" + " 22: 0000 movs r0, r0\n" + " 24: 4b01 ldr r3, [pc, #4] ; (0x2c)\n" + " 26: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 28: 449f add pc, r3\n" + " 2a: 46c0 nop ; (mov r8, r8)\n" + " 2c: 0000 movs r0, r0\n" + " 2e: 0000 movs r0, r0\n" + " 30: 4b01 ldr r3, [pc, #4] ; (0x38)\n" + " 32: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 34: 449f add pc, r3\n" + " 36: 46c0 nop ; (mov r8, r8)\n" + " 38: 0000 movs r0, r0\n" + " 3a: 0000 movs r0, r0\n" + " 3c: 4b01 ldr r3, [pc, #4] ; (0x44)\n" + " 3e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 40: 449f add pc, r3\n" + " 42: 46c0 nop ; (mov r8, r8)\n" + " 44: 0000 movs r0, r0\n" + " 46: 0000 movs r0, r0\n" + " 48: a700 add r7, pc, #0 ; (adr r7, 0x4c)\n" + " 4a: 2633 movs r6, #51 ; 0x33\n" + " 4c: 4276 negs r6, r6\n" + " 4e: 19f6 adds r6, r6, r7\n" + " 50: 9f00 ldr r7, [sp, #0]\n" + " 52: 607e str r6, [r7, #4]\n" + " 54: 6f57 ldr r7, [r2, #116] ; 0x74\n" + " 56: 9e05 ldr r6, [sp, #20]\n" + " 58: 9705 str r7, [sp, #20]\n" + " 5a: 46b6 mov lr, r6\n" + " 5c: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/2 function +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated adr + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table + % adr = 4 bytes, pop = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes + % Total minimum: 30 bytes + ?assert(byte_size(Stream) >= 30), + + % Expected: adr r0, + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table + % The data tables start at offset 4, so adr should be adr r0, 4 not adr r0, 8 + Dump = << + " 0: a000 add r0, pc, #0 ; (adr r0, 0x4)\n" + " 2: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 4: 0200 lsls r0, r0, #8\n" + " 6: 0100 lsls r0, r0, #4\n" + " 8: 0000 movs r0, r0\n" + " a: 1000 asrs r0, r0, #32\n" + " c: 0200 lsls r0, r0, #8\n" + " e: 0000 movs r0, r0\n" + " 10: 2000 movs r0, #0\n" + " 12: 0200 lsls r0, r0, #8\n" + " 14: 0a00 lsrs r0, r0, #8\n" + " 16: 0000 movs r0, r0\n" + " 18: 1000 asrs r0, r0, #32\n" + " 1a: 1400 asrs r0, r0, #16\n" + " 1c: 0000 movs r0, r0\n" + " 1e: 2000 movs r0, #0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/2 with unaligned offset +return_labels_and_lines_unaligned_test() -> + % Create a new state with a 2-byte instruction already in the stream + % to simulate starting at an odd offset (offset 2 instead of 0) + PaddingInstruction = jit_armv6m_asm:bx(lr), + TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction), + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + Dump = << + " 0: 4770 bx lr\n" + "2: a001 add r0, pc, #4 ; (adr r0, 0x8)\n" + "4: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + "6: 0000 movs r0, r0\n" + "8: 0200 lsls r0, r0, #8\n" + "a: 0100 lsls r0, r0, #4\n" + "c: 0000 movs r0, r0\n" + "e: 1000 asrs r0, r0, #32\n" + "10: 0200 lsls r0, r0, #8\n" + "12: 0000 movs r0, r0\n" + "14: 2000 movs r0, #0\n" + "16: 0200 lsls r0, r0, #8\n" + "18: 0a00 lsrs r0, r0, #8\n" + "1a: 0000 movs r0, r0\n" + "1c: 1000 asrs r0, r0, #32\n" + "1e: 1400 asrs r0, r0, #16\n" + "20: 0000 movs r0, r0\n" + "22: 2000 movs r0, #0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6a17 ldr r7, [r2, #32]\n" + " 2: b405 push {r0, r2}\n" + " 4: 9802 ldr r0, [sp, #8]\n" + " 6: 212a movs r1, #42 ; 0x2a\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: b405 push {r0, r2}\n" + " 10: b082 sub sp, #8\n" + " 12: 6986 ldr r6, [r0, #24]\n" + " 14: 9600 str r6, [sp, #0]\n" + " 16: 2100 movs r1, #0\n" + " 18: 2203 movs r2, #3\n" + " 1a: 6946 ldr r6, [r0, #20]\n" + " 1c: 6833 ldr r3, [r6, #0]\n" + " 1e: 47b8 blx r7\n" + " 20: 4607 mov r7, r0\n" + " 22: b002 add sp, #8\n" + " 24: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in r1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, r1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 27b0 movs r7, #176 ; 0xb0\n" + " 2: 59d7 ldr r7, [r2, r7]\n" + " 4: b405 push {r0, r2}\n" + " 6: b082 sub sp, #8\n" + " 8: 2601 movs r6, #1\n" + " a: 9600 str r6, [sp, #0]\n" + " c: 460e mov r6, r1\n" + " e: 9904 ldr r1, [sp, #16]\n" + " 10: 4632 mov r2, r6\n" + " 12: 2304 movs r3, #4\n" + " 14: 47b8 blx r7\n" + " 16: 4607 mov r7, r0\n" + " 18: b002 add sp, #8\n" + " 1a: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 9e00 ldr r6, [sp, #0]\n" + " 20: 6837 ldr r7, [r6, #0]\n" + " 22: 683f ldr r7, [r7, #0]\n" + " 24: 063f lsls r7, r7, #24\n" + " 26: 4e07 ldr r6, [pc, #28] ; (0x44)\n" + " 28: 4337 orrs r7, r6\n" + " 2a: 65c7 str r7, [r0, #92] ; 0x5c\n" + " 2c: 6917 ldr r7, [r2, #16]\n" + " 2e: b082 sub sp, #8\n" + " 30: 2601 movs r6, #1\n" + " 32: 4276 negs r6, r6\n" + " 34: 9600 str r6, [sp, #0]\n" + " 36: 9902 ldr r1, [sp, #8]\n" + " 38: 2202 movs r2, #2\n" + " 3a: 2305 movs r3, #5\n" + " 3c: 47b8 blx r7\n" + " 3e: b002 add sp, #8\n" + " 40: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 42: 0000 movs r0, r0\n" + " 44: 0120 lsls r0, r4, #4\n" + " 46: 0000 movs r0, r0\n" + " 48: b5f2 push {r1, r4, r5, r6, r7, lr}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = << + " 0: 9e00 ldr r6, [sp, #0]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 3f01 subs r7, #1\n" + " 6: 60b7 str r7, [r6, #8]\n" + " 8: d109 bne.n 0x1e\n" + " a: a704 add r7, pc, #16 ; (adr r7, 0x1c)\n" + " c: 3701 adds r7, #1\n" + " e: 6077 str r7, [r6, #4]\n" + " 10: 6897 ldr r7, [r2, #8]\n" + " 12: 9e05 ldr r6, [sp, #20]\n" + " 14: 9705 str r7, [sp, #20]\n" + " 16: 46b6 mov lr, r6\n" + " 18: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 1a: 46c0 nop ; (mov r8, r8)\n" + " 1c: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1e: 6987 ldr r7, [r0, #24]\n" + " 20: 463e mov r6, r7\n" + " 22: 4635 mov r5, r6\n" + " 24: 2403 movs r4, #3\n" + " 26: 4025 ands r5, r4\n" + " 28: 2d02 cmp r5, #2\n" + " 2a: d00b beq.n 0x44\n" + " 2c: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 2e: b082 sub sp, #8\n" + " 30: 9600 str r6, [sp, #0]\n" + " 32: 9902 ldr r1, [sp, #8]\n" + " 34: 222e movs r2, #46 ; 0x2e\n" + " 36: 4b02 ldr r3, [pc, #8] ; (0x40)\n" + " 38: 47b8 blx r7\n" + " 3a: b002 add sp, #8\n" + " 3c: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 3e: 0000 movs r0, r0\n" + " 40: 018b lsls r3, r1, #6\n" + " 42: 0000 movs r0, r0\n" + " 44: 2503 movs r5, #3\n" + " 46: 43ae bics r6, r5\n" + " 48: 6836 ldr r6, [r6, #0]\n" + " 4a: 4635 mov r5, r6\n" + " 4c: 243f movs r4, #63 ; 0x3f\n" + " 4e: 4025 ands r5, r4\n" + " 50: 2d14 cmp r5, #20\n" + " 52: d00b beq.n 0x6c\n" + " 54: 6cd7 ldr r7, [r2, #76] ; 0x4c\n" + " 56: b082 sub sp, #8\n" + " 58: 9600 str r6, [sp, #0]\n" + " 5a: 9902 ldr r1, [sp, #8]\n" + " 5c: 2256 movs r2, #86 ; 0x56\n" + " 5e: 4b02 ldr r3, [pc, #8] ; (0x68)\n" + " 60: 47b8 blx r7\n" + " 62: b002 add sp, #8\n" + " 64: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 66: 0000 movs r0, r0\n" + " 68: 018b lsls r3, r1, #6\n" + " 6a: 0000 movs r0, r0\n" + " 6c: 9d00 ldr r5, [sp, #0]\n" + " 6e: 682e ldr r6, [r5, #0]\n" + " 70: 6836 ldr r6, [r6, #0]\n" + " 72: 0636 lsls r6, r6, #24\n" + " 74: 4d05 ldr r5, [pc, #20] ; (0x8c)\n" + " 76: 432e orrs r6, r5\n" + " 78: 65c6 str r6, [r0, #92] ; 0x5c\n" + " 7a: 2680 movs r6, #128 ; 0x80\n" + " 7c: 5996 ldr r6, [r2, r6]\n" + " 7e: 463a mov r2, r7\n" + " 80: 2300 movs r3, #0\n" + " 82: 9f05 ldr r7, [sp, #20]\n" + " 84: 9605 str r6, [sp, #20]\n" + " 86: 46be mov lr, r7\n" + " 88: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 8a: 0000 movs r0, r0\n" + " 8c: 0240 lsls r0, r0, #9\n" + " 8e: 0000 movs r0, r0\n" + " 90: b5f2 push {r1, r4, r5, r6, r7, lr}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: 2700 movs r7, #0\n" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: 2700 movs r7, #0\n" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, r6}, << + " 0: 2700 movs r7, #0\n" + " 2: 6037 str r7, [r6, #0]\n" + " 4: e07c b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: 2600 movs r6, #0\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: 2600 movs r6, #0\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: 262a movs r6, #42 ; 0x2a\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 60be str r6, [r7, #8]\n" + " 6: e07b b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: 262a movs r6, #42 ; 0x2a\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 653e str r6, [r7, #80] ; 0x50\n" + " 6: e07b b.n 0x100" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, r3}, << + " 0: 2763 movs r7, #99 ; 0x63\n" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: 69c7 ldr r7, [r0, #28]\n" + " 2: 6207 str r7, [r0, #32]\n" + " 4: e07c b.n 0x100" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, << + " 0: 69c7 ldr r7, [r0, #28]\n" + " 2: 600f str r7, [r1, #0]\n" + " 4: e07c b.n 0x100" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, << + " 0: 6827 ldr r7, [r4, #0]\n" + " 2: 6247 str r7, [r0, #36] ; 0x24\n" + " 4: e07c b.n 0x100" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 6077 str r7, [r6, #4]\n" + " 6: e07b b.n 0x100" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6837 ldr r7, [r6, #0]\n" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6877 ldr r7, [r6, #4]\n" + " 4: 6247 str r7, [r0, #36] ; 0x24\n" + " 6: e07b b.n 0x100" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, r5, {x_reg, 0}, << + " 0: 6185 str r5, [r0, #24]\n" + " 2: e07d b.n 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, r6, {x_reg, extra}, << + " 0: 6586 str r6, [r0, #88] ; 0x58\n" + " 2: e07d b.n 0x100" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, r4, {ptr, r3}, << + " 0: 601c str r4, [r3, #0]\n" + " 2: e07d b.n 0x100" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, r1, {y_reg, 0}, << + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 6039 str r1, [r7, #0]\n" + " 4: e07c b.n 0x100" + >>) + end), + %% Test: Large immediate to x_reg (32-bit literal pool, aligned case) + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6187 str r7, [r0, #24]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" + >>) + end), + %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case) + ?_test(begin + %% First do a 2-byte instruction to create unaligned start + State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}), + %% Then do large immediate which should handle unaligned case + State2 = ?BACKEND:move_to_vm_register(State1, 16#12345678, {x_reg, 0}), + State3 = ?BACKEND:jump_to_offset(State2, 16#100), + Stream = ?BACKEND:stream(State3), + Expected = dump_to_bin(<< + " 0: 6019 str r1, [r3, #0]\n" + " 2: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" + >>), + ?assertEqual(Expected, Stream) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6587 str r7, [r0, #88] ; 0x58\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 60b7 str r7, [r6, #8]\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 6537 str r7, [r6, #80] ; 0x50\n" + " 6: e07b b.n 0x100\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, << + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 601f str r7, [r3, #0]\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 5678 ldrsb r0, [r7, r1]\n" + " a: 1234 asrs r4, r6, #8" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: 6d47 ldr r7, [r0, #84] ; 0x54\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 67f7 str r7, [r6, #124] ; 0x7c\n" + " 6: e07b b.n 0x100" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c\n" + " 4: 6547 str r7, [r0, #84] ; 0x54\n" + " 6: e07b b.n 0x100" + >>) + end), + %% Test: Large y_reg index (32) that exceeds str immediate offset limit + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 32}, << + " 0: 262a movs r6, #42 ; 0x2a\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 2580 movs r5, #128 ; 0x80\n" + " 6: 443d add r5, r7\n" + " 8: 602e str r6, [r5, #0]\n" + " a: e079 b.n 0x100" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: 2701 movs r7, #1\n" + " 2: 427f negs r7, r7\n" + " 4: 6187 str r7, [r0, #24]\n" + " 6: e07b b.n 0x100" + >>) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r3, 2, {x_reg, 0}, << + " 0: 689f ldr r7, [r3, #8]\n" + " 2: 6187 str r7, [r0, #24]" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, r3, 3, {ptr, r5}, << + " 0: 68df ldr r7, [r3, #12]\n" + " 2: 602f str r7, [r5, #0]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r3, 1, {y_reg, 2}, << + " 0: 685e ldr r6, [r3, #4]\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 60be str r6, [r7, #8]" + >>) + end), + %% move_array_element: reg[x] to native reg (r5) + ?_test(begin + move_array_element_test0(State0, r3, 1, r5, << + " 0: 685d ldr r5, [r3, #4]" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, r3, 7, {y_reg, 31}, << + " 0: 69de ldr r6, [r3, #28]\n" + " 2: 6947 ldr r7, [r0, #20]\n" + " 4: 67fe str r6, [r7, #124] ; 0x7c" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, r3, 7, {x_reg, 15}, << + " 0: 69df ldr r7, [r3, #28]\n" + " 2: 6547 str r7, [r0, #84] ; 0x54" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4), + move_array_element_test0(State1, r3, {free, Reg}, {x_reg, 2}, << + " 0: 691f ldr r7, [r3, #16]\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59df ldr r7, [r3, r7]\n" + " 6: 6207 str r7, [r0, #32]" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4), + move_array_element_test0(State1, r3, {free, Reg}, {ptr, r5}, << + " 0: 691f ldr r7, [r3, #16]\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59df ldr r7, [r3, r7]\n" + " 6: 602f str r7, [r5, #0]" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4), + move_array_element_test0(State1, r3, {free, Reg}, {y_reg, 31}, << + " 0: 691f ldr r7, [r3, #16]\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59df ldr r7, [r3, r7]\n" + " 6: 6946 ldr r6, [r0, #20]\n" + " 8: 67f7 str r7, [r6, #124] ; 0x7c" + >>) + end), + %% move_array_element with integer index and x_reg destination + ?_test(begin + {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 68be ldr r6, [r7, #8]\n" + " 4: 62c6 str r6, [r0, #44] ; 0x2c" + >>) + end) + ] + end}. + +get_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, r4, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6927 ldr r7, [r4, #16]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg) + end) + ] + end}. + +move_to_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 609f str r7, [r3, #8]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, r4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 4626 mov r6, r4\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r3, r4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 683f ldr r7, [r7, #0]\n" + " 2: 4626 mov r6, r4\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r3, r4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 68b7 ldr r7, [r6, #8]\n" + " 4: 4626 mov r6, r4\n" + " 6: 00b6 lsls r6, r6, #2\n" + " 8: 519f str r7, [r3, r6]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 609f str r7, [r3, #8]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(7, State1, [r3, r4]), + [r3, r4] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 1c66 adds r6, r4, #1\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]), + State2 = setelement(7, State1, [r3, r4]), + [r3, r4] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 1c66 adds r6, r4, #1\n" + " 4: 00b6 lsls r6, r6, #2\n" + " 6: 519f str r7, [r3, r6]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 272a movs r7, #42 ; 0x2a" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: negative value + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 272a movs r7, #42 ; 0x2a\n" + " 2: 427f negs r7, r7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -255 (boundary case) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 27ff movs r7, #255 ; 0xff\n" + " 2: 427f negs r7, r7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -256 (boundary case, should use literal pool) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(r7, Reg), + Dump = << + " 0: 4f00 ldr r7, [pc, #0] ; (0x4)\n" + " 2: e07d b.n 0x100\n" + " 4: ff00 ffff vmaxnm.f32 , q8, " + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r6, Reg), + Dump = << + " 0: 6836 ldr r6, [r6, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 6a47 ldr r7, [r0, #36] ; 0x24" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(r7, Reg), + Dump = << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 68f7 ldr r7, [r6, #12]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, r6), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 262a movs r6, #42 ; 0x2a" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, r7, r5), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 463d mov r5, r7" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, r7}, r4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 683c ldr r4, [r7, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6a03 ldr r3, [r0, #32]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 6947 ldr r7, [r0, #20]\n" + " 2: 68b9 ldr r1, [r7, #8]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 6e06 ldr r6, [r0, #96] ; 0x60\n" + " 4: 687d ldr r5, [r7, #4]\n" + " 6: 61b5 str r5, [r6, #24]\n" + " 8: 68bd ldr r5, [r7, #8]\n" + " a: 61f5 str r5, [r6, #28]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, r2, 2, << + " 0: 3202 adds r2, #2\n" + " 2: e07d b.n 0x100" + >>) + end), + ?_test(begin + add_test0(State0, r2, 256, << + " 0: 4f01 ldr r7, [pc, #4] ; (0x8)\n" + " 2: 19d2 adds r2, r2, r7\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" + >>) + end), + ?_test(begin + add_test0(State0, r2, r3, << + " 0: 18d2 adds r2, r2, r3\n" + " 2: e07d b.n 0x100" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, r2, 2, << + " 0: 3a02 subs r2, #2\n" + " 2: e07d b.n 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, r2, 256, << + " 0: 4f01 ldr r7, [pc, #4] @ (0xc)\n" + " 2: 1bd2 subs r2, r2, r7\n" + " 4: e07c b.n 0x100\n" + " 6: 0000 movs r0, r0\n" + " 8: 0100 lsls r0, r0, #4\n" + " a: 0000 movs r0, r0" + >>) + end), + ?_test(begin + sub_test0(State0, r2, r3, << + " 0: 1ad2 subs r2, r2, r3\n" + " 2: e07d b.n 0x110" + >>) + end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, r2, 2, << + " 0: 0052 lsls r2, r2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 3, << + " 0: 0057 lsls r7, r2, #1\n" + " 2: 18ba adds r2, r7, r2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 4, << + " 0: 0092 lsls r2, r2, #2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 5, << + " 0: 0097 lsls r7, r2, #2\n" + " 2: 18ba adds r2, r7, r2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 6, << + " 0: 0057 lsls r7, r2, #1\n" + " 2: 18ba adds r2, r7, r2\n" + " 4: 0052 lsls r2, r2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 7, << + " 0: 00d7 lsls r7, r2, #3\n" + " 2: 1aba subs r2, r7, r2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 8, << + " 0: 00d2 lsls r2, r2, #3" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 9, << + " 0: 00d7 lsls r7, r2, #3\n" + " 2: 18ba adds r2, r7, r2" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 10, << + " 0: 0097 lsls r7, r2, #2\n" + " 2: 18ba adds r2, r7, r2\n" + " 4: 0052 lsls r2, r2, #1" + >>) + end), + ?_test(begin + mul_test0(State0, r2, 11, << + " 0: 270b movs r7, #11\n" + " 2: 437a muls r2, r7" + >>) + end) + ] + end}. + +%% Test set_args1 with y_reg pattern +set_args1_y_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1 + % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}]) + % but with {y_reg, 5} instead of {free, Src} + {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [ + {y_reg, 5} + ]), + + Stream = ?BACKEND:stream(State1), + % Expected disassembly for loading from y_reg and calling primitive + Dump = << + " 0: 2743 movs r7, #67 ; 0x43\n" + " 2: 00bf lsls r7, r7, #2\n" + " 4: 59d7 ldr r7, [r2, r7]\n" + " 6: b405 push {r0, r2}\n" + " 8: 6946 ldr r6, [r0, #20]\n" + " a: 6970 ldr r0, [r6, #20]\n" + " c: 47b8 blx r7\n" + " e: 4607 mov r7, r0\n" + " 10: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read (Y=32, offset=128, exceeds 124-byte limit) +large_y_reg_read_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move from a large Y register (32 * 4 = 128 bytes, exceeds 124-byte immediate limit) + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 32}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp register since offset 128 > 124 + Dump = << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 2780 movs r7, #128 ; 0x80\n" + " 4: 4437 add r7, r6\n" + " 6: 683f ldr r7, [r7, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg). + +%% Test large Y register write with available temp registers +large_y_reg_write_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a native register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Move to a large Y register (40 * 4 = 160 bytes) + State2 = ?BACKEND:move_to_vm_register(State1, SrcReg, {y_reg, 40}), + Stream = ?BACKEND:stream(State2), + % Expected: uses helper with two temp registers since we have registers available + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 6946 ldr r6, [r0, #20]\n" + " 4: 25a0 movs r5, #160 ; 0xa0\n" + " 6: 4435 add r5, r6\n" + " 8: 602f str r7, [r5, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read with limited registers (uses IP_REG fallback) +large_y_reg_read_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper) + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback + {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses IP_REG (r12) fallback sequence + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6941 ldr r1, [r0, #20]\n" + " c: 468c mov ip, r1\n" + " e: 218c movs r1, #140 ; 0x8c\n" + " 10: 4461 add r1, ip\n" + " 12: 6809 ldr r1, [r1, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r1, ResultReg). + +%% Test large Y register write with register exhaustion (uses IP_REG fallback) +large_y_reg_write_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a source register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Allocate most remaining registers to simulate exhaustion + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Try to write to large Y register when only one temp register is available + StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses IP_REG (r12) fallback sequence + Dump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6941 ldr r1, [r0, #20]\n" + " c: 468c mov ip, r1\n" + " e: 21c8 movs r1, #200 ; 0xc8\n" + " 10: 4461 add r1, ip\n" + " 12: 600f str r7, [r1, #0]" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing) +y_reg_boundary_direct_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}), + Stream = ?BACKEND:stream(State1), + % Expected: uses direct addressing since 31 * 4 = 124 <= 124 + Dump = << + " 0: 6946 ldr r6, [r0, #20]\n" + " 2: 6ff7 ldr r7, [r6, #124] ; 0x7c" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r7, Reg). + +%% Test debugger function +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: be00 bkpt 0x0000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +and_register_exhaustion_negative_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test negative immediate (-4) which should use BICS with r0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, r7, -4), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: 4684 mov ip, r0\n" + " e: 2003 movs r0, #3\n" + " 10: 4387 bics r7, r0\n" + " 12: 4660 mov r0, ip" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +and_register_exhaustion_positive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test positive immediate (0x3F) which should use ANDS with r0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, r7, 16#3F), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: 4684 mov ip, r0\n" + " e: 203f movs r0, #63 ; 0x3f\n" + " 10: 4007 ands r7, r0\n" + " 12: 4660 mov r0, ip" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +jump_table_large_labels_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 512), + Stream = ?BACKEND:stream(State1), + ?assertEqual((512 + 1) * 12, byte_size(Stream)). + +alloc_boxed_integer_fragment_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 42} + ]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 6bd7 ldr r7, [r2, #60] ; 0x3c\n" + " 2: b405 push {r0, r2}\n" + " 4: 222a movs r2, #42 ; 0x2a\n" + " 6: 2300 movs r3, #0\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +alloc_boxed_integer_fragment_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 16#123456789ABCDEF0} + ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), + ?assertEqual(r7, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 6bd7 ldr r7, [r2, #60] @ 0x3c\n" + " 2: b405 push {r0, r2}\n" + " 4: 4a06 ldr r2, [pc, #24] @ (0x20)\n" + " 6: 4b07 ldr r3, [pc, #28] @ (0x24)\n" + " 8: 47b8 blx r7\n" + " a: 4607 mov r7, r0\n" + " c: bc05 pop {r0, r2}\n" + " e: 6cd6 ldr r6, [r2, #76] @ 0x4c\n" + " 10: b082 sub sp, #8\n" + " 12: 9700 str r7, [sp, #0]\n" + " 14: 9902 ldr r1, [sp, #8]\n" + " 16: 2210 movs r2, #16\n" + " 18: 4b03 ldr r3, [pc, #12] @ (0x28)\n" + " 1a: 47b0 blx r6\n" + " 1c: b002 add sp, #8\n" + " 1e: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 20: def0 udf #240 @ 0xf0\n" + " 22: 9abc ldr r2, [sp, #752] @ 0x2f0\n" + " 24: 5678 ldrsb r0, [r7, r1]\n" + " 26: 1234 asrs r4, r6, #8\n" + " 28: 028b lsls r3, r1, #10\n" + " 2a: 0000 movs r0, r0" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for stack alignment issue in call_func_ptr +%% When we have an odd number of saved registers, the stack becomes misaligned +%% before the function call, violating ARM AAPCS which requires 8-byte alignment +call_func_ptr_stack_alignment_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:call_func_ptr(State3, {free, r3}, [42]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: b4ed push {r0, r2, r3, r5, r6, r7}\n" + " 8: 202a movs r0, #42 ; 0x2a\n" + " a: 4798 blx r3\n" + " c: 4604 mov r4, r0\n" + " e: bced pop {r0, r2, r3, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for register exhaustion issue in call_func_ptr with 5+ arguments +%% When all registers are used and we call a function with 5+ args, +%% set_args needs temporary registers but none are available +call_func_ptr_register_exhaustion_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Allocate all available registers to simulate register pressure + {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + State6 + end, + fun(State6) -> + [ + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r6}, + [ctx, jit_state, {free, r3}, 3, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" + " e: b082 sub sp, #8\n" + " 10: 2101 movs r1, #1\n" + " 12: 9100 str r1, [sp, #0]\n" + " 14: 9908 ldr r1, [sp, #32]\n" + " 16: 461a mov r2, r3\n" + " 18: 2303 movs r3, #3\n" + " 1a: 47b0 blx r6\n" + " 1c: 4606 mov r6, r0\n" + " 1e: b002 add sp, #8\n" + " 20: bcb7 pop {r0, r1, r2, r4, r5, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r6}, + [ctx, jit_state, {free, r3}, 1, r1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" + " e: b082 sub sp, #8\n" + " 10: 9100 str r1, [sp, #0]\n" + " 12: 9908 ldr r1, [sp, #32]\n" + " 14: 461a mov r2, r3\n" + " 16: 2301 movs r3, #1\n" + " 18: 47b0 blx r6\n" + " 1a: 4606 mov r6, r0\n" + " 1c: b002 add sp, #8\n" + " 1e: bcb7 pop {r0, r1, r2, r4, r5, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r6}, + [ctx, jit_state, {free, r3}, r1, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4b7 push {r0, r1, r2, r4, r5, r7}\n" + " e: b082 sub sp, #8\n" + " 10: 2401 movs r4, #1\n" + " 12: 9400 str r4, [sp, #0]\n" + " 14: 460f mov r7, r1\n" + " 16: 9908 ldr r1, [sp, #32]\n" + " 18: 461a mov r2, r3\n" + " 1a: 463b mov r3, r7\n" + " 1c: 47b0 blx r6\n" + " 1e: 4606 mov r6, r0\n" + " 20: b002 add sp, #8\n" + " 22: bcb7 pop {r0, r1, r2, r4, r5, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(r6, ResultReg) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, r1}, + [r6, r3] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" + " e: 460c mov r4, r1\n" + " 10: 4630 mov r0, r6\n" + " 12: 4619 mov r1, r3\n" + " 14: 47a0 blx r4\n" + " 16: 9001 str r0, [sp, #4]\n" + " 18: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {primitive, 2}, + [{free, r6}, r3] + ), + ?assertEqual(ResultReg, r6), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 6987 ldr r7, [r0, #24]\n" + " 2: 69c6 ldr r6, [r0, #28]\n" + " 4: 6a05 ldr r5, [r0, #32]\n" + " 6: 6a44 ldr r4, [r0, #36] ; 0x24\n" + " 8: 6a83 ldr r3, [r0, #40] ; 0x28\n" + " a: 6ac1 ldr r1, [r0, #44] ; 0x2c\n" + " c: b4ff push {r0, r1, r2, r3, r4, r5, r6, r7}\n" + " e: 6894 ldr r4, [r2, #8]\n" + " 10: 4630 mov r0, r6\n" + " 12: 4619 mov r1, r3\n" + " 14: 47a0 blx r4\n" + " 16: 9006 str r0, [sp, #24]\n" + " 18: bcff pop {r0, r1, r2, r3, r4, r5, r6, r7}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, r0}), + Stream = ?BACKEND:stream(State1), + % Expected: armv6m PIC sequence with function epilogue pattern + % Based on actual generated output + Dump = + << + " 0: a700 add r7, pc, #0 ; (adr r7, 0x4)\n" + " 2: 19c0 adds r0, r0, r7\n" + " 4: 2703 movs r7, #3\n" + " 6: 427f negs r7, r7\n" + " 8: 19c0 adds r0, r0, r7\n" + " a: 9f05 ldr r7, [sp, #20]\n" + " c: 9005 str r0, [sp, #20]\n" + " e: 46be mov lr, r7\n" + " 10: bdf2 pop {r1, r4, r5, r6, r7, pc}" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Mimic part of add.beam +add_beam_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}), + State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}), + State5 = ?BACKEND:call_only_or_schedule_next(State4, 2), + State6 = ?BACKEND:add_label(State5, 2), + {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [ + ctx, jit_state, 1, 0, 1 + ]), + State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + State10 = ?BACKEND:call_or_schedule_next(State9, 3), + State11 = ?BACKEND:add_label(State10, 3), + State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [ + ctx, jit_state + ]), + % OP_INT_CALL_END + State13 = ?BACKEND:add_label(State12, 0), + State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), + State15 = ?BACKEND:update_branches(State14), + Stream = ?BACKEND:stream(State15), + Dump = + << + % jump table + " 0: 4b01 ldr r3, [pc, #4] ; (0x8)\n" + " 2: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 4: 449f add pc, r3\n" + " 6: 46c0 nop ; (mov r8, r8)\n" + " 8: 00d8 lsls r0, r3, #3\n" + " a: 0000 movs r0, r0\n" + " c: 4b01 ldr r3, [pc, #4] ; (0x14)\n" + " e: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 10: 449f add pc, r3\n" + " 12: 46c0 nop ; (mov r8, r8)\n" + " 14: 001c movs r4, r3\n" + " 16: 0000 movs r0, r0\n" + " 18: 4b01 ldr r3, [pc, #4] ; (0x20)\n" + " 1a: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 1c: 449f add pc, r3\n" + " 1e: 46c0 nop ; (mov r8, r8)\n" + " 20: 0044 lsls r4, r0, #1\n" + " 22: 0000 movs r0, r0\n" + " 24: 4b01 ldr r3, [pc, #4] ; (0x2c)\n" + " 26: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " 28: 449f add pc, r3\n" + " 2a: 46c0 nop ; (mov r8, r8)\n" + " 2c: 00a8 lsls r0, r5, #2\n" + " 2e: 0000 movs r0, r0\n" + % label 1 + % {move,{integer,9},{x,1}}. + " 30: 279f movs r7, #159 ; 0x9f\n" + " 32: 61c7 str r7, [r0, #28]\n" + % {move,{integer,8},{x,0}} + " 34: 278f movs r7, #143 ; 0x8f\n" + " 36: 6187 str r7, [r0, #24]\n" + % {call_only,2,{f,2}}. + " 38: 9e00 ldr r6, [sp, #0]\n" + " 3a: 68b7 ldr r7, [r6, #8]\n" + " 3c: 3f01 subs r7, #1\n" + " 3e: 60b7 str r7, [r6, #8]\n" + " 40: d004 beq.n 0x4c\n" + " 42: e00f b.n 0x64\n" + " 44: 46c0 nop ; (mov r8, r8)\n" + " 46: 46c0 nop ; (mov r8, r8)\n" + " 48: 46c0 nop ; (mov r8, r8)\n" + " 4a: 46c0 nop ; (mov r8, r8)\n" + " 4c: a700 add r7, pc, #0 ; (adr r7, 0x50)\n" + " 4e: 2637 movs r6, #55 ; 0x37\n" + " 50: 4276 negs r6, r6\n" + " 52: 19f6 adds r6, r6, r7\n" + " 54: 9f00 ldr r7, [sp, #0]\n" + " 56: 607e str r6, [r7, #4]\n" + " 58: 6897 ldr r7, [r2, #8]\n" + " 5a: 9e05 ldr r6, [sp, #20]\n" + " 5c: 9705 str r7, [sp, #20]\n" + " 5e: 46b6 mov lr, r6\n" + " 60: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " 62: 46c0 nop ; (mov r8, r8)\n" + % label 2 + % {allocate,1,1}. + " 64: 6957 ldr r7, [r2, #20]\n" + " 66: b405 push {r0, r2}\n" + " 68: b082 sub sp, #8\n" + " 6a: 2601 movs r6, #1\n" + " 6c: 9600 str r6, [sp, #0]\n" + " 6e: 9904 ldr r1, [sp, #16]\n" + " 70: 2201 movs r2, #1\n" + " 72: 2300 movs r3, #0\n" + " 74: 47b8 blx r7\n" + " 76: 4607 mov r7, r0\n" + " 78: b002 add sp, #8\n" + " 7a: bc05 pop {r0, r2}\n" + " 7c: 07fe lsls r6, r7, #31\n" + " 7e: d405 bmi.n 0x8c\n" + " 80: 6997 ldr r7, [r2, #24]\n" + " 82: 2282 movs r2, #130 ; 0x82\n" + " 84: 9e05 ldr r6, [sp, #20]\n" + " 86: 9705 str r7, [sp, #20]\n" + " 88: 46b6 mov lr, r6\n" + " 8a: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + % {init_yregs,{list,[{y,0}]}}. + %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + " 8c: 263b movs r6, #59 ; 0x3b\n" + " 8e: 6947 ldr r7, [r0, #20]\n" + " 90: 603e str r6, [r7, #0]\n" + % {call,1,{f,3}} + %% call_or_schedule_next(State9, 3), + " 92: 9e00 ldr r6, [sp, #0]\n" + " 94: 6837 ldr r7, [r6, #0]\n" + " 96: 683f ldr r7, [r7, #0]\n" + " 98: 063f lsls r7, r7, #24\n" + " 9a: 4e0c ldr r6, [pc, #48] ; (0xcc)\n" + " 9c: 4337 orrs r7, r6\n" + " 9e: 65c7 str r7, [r0, #92] ; 0x5c\n" + " a0: 9e00 ldr r6, [sp, #0]\n" + " a2: 68b7 ldr r7, [r6, #8]\n" + " a4: 3f01 subs r7, #1\n" + " a6: 60b7 str r7, [r6, #8]\n" + " a8: d004 beq.n 0xb4\n" + " aa: e013 b.n 0xd4\n" + " ac: 46c0 nop ; (mov r8, r8)\n" + " ae: 46c0 nop ; (mov r8, r8)\n" + " b0: 46c0 nop ; (mov r8, r8)\n" + " b2: 46c0 nop ; (mov r8, r8)\n" + " b4: a700 add r7, pc, #0 ; (adr r7, 0xb8)\n" + " b6: 2693 movs r6, #147 ; 0x93\n" + " b8: 4276 negs r6, r6\n" + " ba: 19f6 adds r6, r6, r7\n" + " bc: 9f00 ldr r7, [sp, #0]\n" + " be: 607e str r6, [r7, #4]\n" + " c0: 6897 ldr r7, [r2, #8]\n" + " c2: 9e05 ldr r6, [sp, #20]\n" + " c4: 9705 str r7, [sp, #20]\n" + " c6: 46b6 mov lr, r6\n" + " c8: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " ca: 0000 movs r0, r0\n" + " cc: 0340 lsls r0, r0, #13\n" + " ce: 0000 movs r0, r0\n" + %% (continuation) + " d0: b5f2 push {r1, r4, r5, r6, r7, lr}\n" + " d2: 46c0 nop ; (mov r8, r8)\n" + % label 3 + " d4: 6857 ldr r7, [r2, #4]\n" + " d6: 9e05 ldr r6, [sp, #20]\n" + " d8: 9705 str r7, [sp, #20]\n" + " da: 46b6 mov lr, r6\n" + " dc: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + " de: 46c0 nop ; (mov r8, r8)\n" + % label 0 + " e0: 6857 ldr r7, [r2, #4]\n" + " e2: 9e05 ldr r6, [sp, #20]\n" + " e4: 9705 str r7, [sp, #20]\n" + " e6: 46b6 mov lr, r6\n" + " e8: bdf2 pop {r1, r4, r5, r6, r7, pc}\n" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +%% Handle 32-bits undefined instruction +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + InstrA = list_to_integer([H1, H2, H3, H4], 16), + InstrB = list_to_integer([H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<>, <> | Acc]); +%% Handle 16-bit ARM32 Thumb instructions (4 hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) +-> + %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/jit_dwarf_tests.erl b/tests/libs/jit/jit_dwarf_tests.erl new file mode 100644 index 0000000000..0588ae25f2 --- /dev/null +++ b/tests/libs/jit/jit_dwarf_tests.erl @@ -0,0 +1,276 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_dwarf_tests). + +-include_lib("eunit/include/eunit.hrl"). +-include("../../../libs/jit/src/opcodes.hrl"). + +basic_dwarf_state_test() -> + % Create a basic DWARF state + State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024), + + % Add some test data + State1 = jit_dwarf:opcode(State, ?OP_FUNC_INFO), + State2 = jit_dwarf:function(State1, get_value, 2), + State3 = jit_dwarf:line(State2, 42), + + % Verify state contains our data + ?assert(is_tuple(State3)), + + % Test stream interface + Stream = jit_dwarf:stream(State3), + ?assert(is_binary(Stream)). + +elf_generation_test() -> + % Create state with some debug info + State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024), + % Some opcode + State1 = jit_dwarf:opcode(State, <<"test_opcode/2">>), + State2 = jit_dwarf:function(State1, test_func, 1), + State3 = jit_dwarf:line(State2, 100), + + % Generate ELF + case jit_dwarf:elf(State3, <<>>) of + false -> + ok; + {ok, ElfBinary, _ElfWithText} -> + % Verify ELF magic + <<127, $E, $L, $F, _Rest/binary>> = ElfBinary, + + % Verify ELF header structure + ?assert(byte_size(ElfBinary) >= 52), + + % Extract and verify key header fields + <<_Magic:4/binary, Class, Endian, _Version, _OSABI, _ABIVersion:8/binary, + Type:16/little, Machine:16/little, _ElfVersion:32/little, _Entry:32/little, + _PHOff:32/little, SHOff:32/little, _Flags:32/little, EHSize:16/little, + _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, SHNum:16/little, + _SHStrNdx:16/little, _/binary>> = ElfBinary, + + % Verify basic ELF structure + + % ELFCLASS32 + ?assertEqual(1, Class), + % ELFDATA2LSB + ?assertEqual(1, Endian), + % ET_REL + ?assertEqual(1, Type), + % EM_ARM + ?assertEqual(40, Machine), + % ELF header size + ?assertEqual(52, EHSize), + + % Verify we have the expected sections + + % null + 4 debug sections + shstrtab + ?assert(SHNum >= 6), + % Section headers after ELF header + ?assert(SHOff > 52), + + % Verify the ELF is complete (section headers exist) + + % Headers should exist + ExpectedMinSize = SHOff + (SHNum * 40), + ?assert(byte_size(ElfBinary) >= ExpectedMinSize) + end. + +section_header_test() -> + State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024), + State1 = jit_dwarf:function(State, main, 0), + + case jit_dwarf:elf(State1, <<>>) of + false -> + ok; + {ok, ElfBinary, _ElfWithText} -> + % Extract section header info from ELF header (parse full header) + <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little, + _Entry:32/little, _PHOff:32/little, SHOff:32/little, _Flags:32/little, + _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, + SHNum:16/little, _SHStrNdx:16/little, _Rest/binary>> = ElfBinary, + + % Verify we can read section headers + SectionHeadersSize = SHNum * 40, + SectionHeadersStart = SHOff, + + ?assert(byte_size(ElfBinary) >= SectionHeadersStart + SectionHeadersSize), + + % Extract first section header (should be null) + <<_:SectionHeadersStart/binary, NullHeader:40/binary, _/binary>> = ElfBinary, + % All zeros + ?assertEqual(<<0:320>>, NullHeader) + end. + +string_table_test() -> + State = jit_dwarf:new(jit_armv6m, string_test, jit_stream_binary, 1024), + + case jit_dwarf:elf(State, <<>>) of + false -> + ok; + {ok, ElfBinary, _ElfWithText} -> + % Find string table section - parse ELF header + <<_ElfMagic2:16/binary, _Type2:16/little, _Machine2:16/little, _Version2:32/little, + _Entry2:32/little, _PHOff2:32/little, SHOff:32/little, _Flags2:32/little, + _EHSize2:16/little, _PHEntSize2:16/little, _PHNum2:16/little, _SHEntSize2:16/little, + _SHNum:16/little, SHStrNdx:16/little, _Rest2/binary>> = ElfBinary, + + % Extract string table section header + StrTabHeaderOffset = SHOff + (SHStrNdx * 40), + <<_:StrTabHeaderOffset/binary, _StrName:32/little, _StrType:32/little, + _StrFlags:32/little, _StrAddr:32/little, StrOffset:32/little, StrSize:32/little, + _/binary>> = ElfBinary, + + % Extract string table data + <<_:StrOffset/binary, StringTable:StrSize/binary, _/binary>> = ElfBinary, + + % Verify string table contains expected section names + StrTabStrings = binary:split(StringTable, <<0>>, [global]), + + ?assert(lists:member(<<".debug_info">>, StrTabStrings)), + ?assert(lists:member(<<".shstrtab">>, StrTabStrings)) + end. + +elf_with_text_test() -> + % Test the new elf_with_text/2 function that creates complete ELF with .text section + State = jit_dwarf:new(jit_x86_64, test_module, jit_stream_binary, 1024), + + % Some dummy x86_64 native code (mov rax, 1; ret) + NativeCode = <<16#48, 16#c7, 16#c0, 16#01, 16#00, 16#00, 16#00, 16#c3>>, + + % Generate complete ELF with debug info and .text section + case jit_dwarf:elf(State, NativeCode) of + false -> + ok; + {ok, _DebugOnlyELF, CombinedELF} -> + % Verify ELF magic + <<127, $E, $L, $F, _Rest/binary>> = CombinedELF, + + % Parse ELF header to check section count (should be 9: null + 6 debug sections + .text + shstrtab) + <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little, + _Entry:32/little, _PHOff:32/little, _SHOff:32/little, _Flags:32/little, + _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, + SHNum:16/little, _SHStrNdx:16/little, _/binary>> = CombinedELF, + + % Should have 9 sections total + ?assertEqual(9, SHNum), + + % Verify the native code is present in the binary + ?assert(binary:match(CombinedELF, NativeCode) =/= nomatch), + + % Verify ELF is larger due to added .text section + {ok, DebugOnlyELF, _} = jit_dwarf:elf(State, <<>>), + ?assert(byte_size(CombinedELF) > byte_size(DebugOnlyELF)) + end. + +text_section_properties_test() -> + % Test that the .text section has proper properties + State = jit_dwarf:new(jit_aarch64, test_module, jit_stream_binary, 1024), + + % AArch64 native code (mov x0, #42; ret) + NativeCode = <<16#d2800540, 16#d65f03c0>>, + + case jit_dwarf:elf(State, NativeCode) of + false -> + ok; + {ok, _DebugOnlyELF, CombinedELF} -> + % Parse ELF to find .text section + <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little, + _Entry:32/little, _PHOff:32/little, SHOff:32/little, _Flags:32/little, + _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, + SHNum:16/little, SHStrNdx:16/little, RestOfFile/binary>> = CombinedELF, + + % Extract section headers + SectionHeadersStart = SHOff - 52, + <<_SectionData:SectionHeadersStart/binary, SectionHeaders/binary>> = RestOfFile, + + % Extract string table to find .text section by name + StringTableHeaderOffset = SHStrNdx * 40, + <<_:StringTableHeaderOffset/binary, _StrName:32/little, _StrType:32/little, + _StrFlags:32/little, _StrAddr:32/little, StrOffset:32/little, StrSize:32/little, + _/binary>> = SectionHeaders, + + % Extract string table content + StrTableFileOffset = StrOffset - 52, + <<_:StrTableFileOffset/binary, StringTable:StrSize/binary, _/binary>> = _SectionData, + + % Find .text section by scanning all section headers + TextSectionFound = find_text_section(SectionHeaders, StringTable, SHNum, 0), + ?assert(TextSectionFound =/= not_found), + + {TextType, TextFlags, TextSize, TextAddr} = TextSectionFound, + + % Verify .text section properties + SHT_PROGBITS = 1, + SHF_ALLOC = 2, + SHF_EXECINSTR = 4, + ExpectedFlags = SHF_ALLOC bor SHF_EXECINSTR, + + ?assertEqual(SHT_PROGBITS, TextType), + ?assertEqual(ExpectedFlags, TextFlags), + ?assertEqual(byte_size(NativeCode), TextSize), + % Should be 0 for relocatable + ?assertEqual(0, TextAddr) + end. + +different_architectures_test() -> + % Test elf_with_text with different JIT backends + Backends = [jit_x86_64, jit_aarch64, jit_armv6m], + % Simple nop instruction + NativeCode = <<16#90>>, + + lists:foreach( + fun(Backend) -> + State = jit_dwarf:new(Backend, test_module, jit_stream_binary, 1024), + case jit_dwarf:elf(State, NativeCode) of + false -> + ok; + {ok, _DebugOnlyELF, CombinedELF} -> + % Verify ELF magic and basic structure + <<127, $E, $L, $F, _Rest/binary>> = CombinedELF, + % Verify native code is present + ?assert(binary:match(CombinedELF, NativeCode) =/= nomatch) + end + end, + Backends + ). + +% Helper function to find .text section in ELF +find_text_section(_Headers, _StringTable, 0, _Index) -> + not_found; +find_text_section(Headers, StringTable, Remaining, Index) -> + HeaderOffset = Index * 40, + <<_:HeaderOffset/binary, NameOffset:32/little, Type:32/little, Flags:32/little, Addr:32/little, + _Offset:32/little, Size:32/little, _/binary>> = Headers, + + % Extract section name from string table + SectionName = extract_string_at_offset(StringTable, NameOffset), + + case SectionName of + <<".text">> -> + {Type, Flags, Size, Addr}; + _ -> + find_text_section(Headers, StringTable, Remaining - 1, Index + 1) + end. + +% Helper function to extract null-terminated string at given offset +extract_string_at_offset(StringTable, Offset) -> + <<_:Offset/binary, Rest/binary>> = StringTable, + [String | _] = binary:split(Rest, <<0>>, []), + String. diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl new file mode 100644 index 0000000000..28a0f4fa58 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_asm_tests.erl @@ -0,0 +1,900 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_asm_tests). + +-include_lib("eunit/include/eunit.hrl"). + +-define(_assertAsmEqual(Bin, Str, Value), + ?_assertEqual(jit_tests_common:asm(riscv32, Bin, Str), Value) +). + +%%----------------------------------------------------------------------------- +%% R-type arithmetic and logical instruction tests +%%----------------------------------------------------------------------------- + +add_test_() -> + [ + ?_assertAsmEqual( + <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#95aa:16/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#97fa:16/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5) + ) + ]. + +sub_test_() -> + [ + ?_assertAsmEqual( + <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8d89:16/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0) + ), + ?_assertAsmEqual( + <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5) + ) + ]. + +and_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8df1:16/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2) + ) + ]. + +or_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8dd1:16/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2) + ) + ]. + +xor_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#8db1:16/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2) + ) + ]. + +sll_test_() -> + [ + ?_assertAsmEqual( + <<16#00629533:32/little>>, "sll a0, t0, t1", jit_riscv32_asm:sll(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c59633:32/little>>, "sll a2, a1, a2", jit_riscv32_asm:sll(a2, a1, a2) + ) + ]. + +srl_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d533:32/little>>, "srl a0, t0, t1", jit_riscv32_asm:srl(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5d633:32/little>>, "srl a2, a1, a2", jit_riscv32_asm:srl(a2, a1, a2) + ) + ]. + +sra_test_() -> + [ + ?_assertAsmEqual( + <<16#4062d533:32/little>>, "sra a0, t0, t1", jit_riscv32_asm:sra(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#40c5d633:32/little>>, "sra a2, a1, a2", jit_riscv32_asm:sra(a2, a1, a2) + ) + ]. + +slt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062a533:32/little>>, "slt a0, t0, t1", jit_riscv32_asm:slt(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5a633:32/little>>, "slt a2, a1, a2", jit_riscv32_asm:slt(a2, a1, a2) + ) + ]. + +sltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062b533:32/little>>, "sltu a0, t0, t1", jit_riscv32_asm:sltu(a0, t0, t1) + ), + ?_assertAsmEqual( + <<16#00c5b633:32/little>>, "sltu a2, a1, a2", jit_riscv32_asm:sltu(a2, a1, a2) + ) + ]. + +%%----------------------------------------------------------------------------- +%% I-type immediate instruction tests +%%----------------------------------------------------------------------------- + +addi_test_() -> + [ + ?_assertAsmEqual( + <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1) + ), + ?_assertAsmEqual( + <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047) + ), + ?_assertAsmEqual( + <<16#80000593:32/little>>, "addi a1, zero, -2048", jit_riscv32_asm:addi(a1, zero, -2048) + ) + ]. + +andi_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#89bd:16/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15) + ) + ]. + +ori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2e513:32/little>>, "ori a0, t0, 255", jit_riscv32_asm:ori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#00f5e593:32/little>>, "ori a1, a1, 15", jit_riscv32_asm:ori(a1, a1, 15) + ) + ]. + +xori_test_() -> + [ + ?_assertAsmEqual( + <<16#0ff2c513:32/little>>, "xori a0, t0, 255", jit_riscv32_asm:xori(a0, t0, 255) + ), + ?_assertAsmEqual( + <<16#fff5c593:32/little>>, "xori a1, a1, -1", jit_riscv32_asm:xori(a1, a1, -1) + ) + ]. + +slli_test_() -> + [ + ?_assertAsmEqual( + <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#05fe:16/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31) + ), + ?_assertAsmEqual( + <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0) + ) + ]. + +srli_test_() -> + [ + ?_assertAsmEqual( + <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31) + ) + ]. + +srai_test_() -> + [ + ?_assertAsmEqual( + <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31) + ) + ]. + +slti_test_() -> + [ + ?_assertAsmEqual( + <<16#0142a513:32/little>>, "slti a0, t0, 20", jit_riscv32_asm:slti(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#fff5a593:32/little>>, "slti a1, a1, -1", jit_riscv32_asm:slti(a1, a1, -1) + ) + ]. + +sltiu_test_() -> + [ + ?_assertAsmEqual( + <<16#0142b513:32/little>>, "sltiu a0, t0, 20", jit_riscv32_asm:sltiu(a0, t0, 20) + ), + ?_assertAsmEqual( + <<16#00153513:32/little>>, "sltiu a0, a0, 1", jit_riscv32_asm:sltiu(a0, a0, 1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Load instruction tests +%%----------------------------------------------------------------------------- + +lw_test_() -> + [ + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)), + ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)), + ?_assertAsmEqual(<<16#414c:16/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)), + ?_assertAsmEqual( + <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4) + ), + ?_assertAsmEqual( + <<16#7ff52503:32/little>>, "lw a0, 2047(a0)", jit_riscv32_asm:lw(a0, a0, 2047) + ) + ]. + +lh_test_() -> + [ + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0, 0)), + ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0)), + ?_assertAsmEqual(<<16#00251583:32/little>>, "lh a1, 2(a0)", jit_riscv32_asm:lh(a1, a0, 2)) + ]. + +lhu_test_() -> + [ + ?_assertAsmEqual( + <<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0)), + ?_assertAsmEqual(<<16#00255583:32/little>>, "lhu a1, 2(a0)", jit_riscv32_asm:lhu(a1, a0, 2)) + ]. + +lb_test_() -> + [ + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0, 0)), + ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0)), + ?_assertAsmEqual(<<16#00150583:32/little>>, "lb a1, 1(a0)", jit_riscv32_asm:lb(a1, a0, 1)) + ]. + +lbu_test_() -> + [ + ?_assertAsmEqual( + <<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0, 0) + ), + ?_assertAsmEqual(<<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0)), + ?_assertAsmEqual(<<16#00154583:32/little>>, "lbu a1, 1(a0)", jit_riscv32_asm:lbu(a1, a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Store instruction tests +%%----------------------------------------------------------------------------- + +sw_test_() -> + [ + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)), + ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)), + ?_assertAsmEqual(<<16#c14c:16/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)), + ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4)) + ]. + +sh_test_() -> + [ + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a1, a0)), + ?_assertAsmEqual(<<16#00b51123:32/little>>, "sh a1, 2(a0)", jit_riscv32_asm:sh(a0, a1, 2)) + ]. + +sb_test_() -> + [ + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a0, a1, 0)), + ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a1, a0)), + ?_assertAsmEqual(<<16#00b500a3:32/little>>, "sb a1, 1(a0)", jit_riscv32_asm:sb(a0, a1, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Branch instruction tests +%%----------------------------------------------------------------------------- + +beq_test_() -> + [ + ?_assertAsmEqual( + <<16#00628463:32/little>>, "beq t0, t1, .+8", jit_riscv32_asm:beq(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4) + ), + ?_assertAsmEqual( + <<16#c101:16/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0) + ) + ]. + +bne_test_() -> + [ + ?_assertAsmEqual( + <<16#00629463:32/little>>, "bne t0, t1, .+8", jit_riscv32_asm:bne(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb51ee3:32/little>>, "bne a0, a1, .-4", jit_riscv32_asm:bne(a0, a1, -4) + ) + ]. + +blt_test_() -> + [ + ?_assertAsmEqual( + <<16#0062c463:32/little>>, "blt t0, t1, .+8", jit_riscv32_asm:blt(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb54ee3:32/little>>, "blt a0, a1, .-4", jit_riscv32_asm:blt(a0, a1, -4) + ) + ]. + +bge_test_() -> + [ + ?_assertAsmEqual( + <<16#0062d463:32/little>>, "bge t0, t1, .+8", jit_riscv32_asm:bge(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb55ee3:32/little>>, "bge a0, a1, .-4", jit_riscv32_asm:bge(a0, a1, -4) + ) + ]. + +bltu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062e463:32/little>>, "bltu t0, t1, .+8", jit_riscv32_asm:bltu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb56ee3:32/little>>, "bltu a0, a1, .-4", jit_riscv32_asm:bltu(a0, a1, -4) + ) + ]. + +bgeu_test_() -> + [ + ?_assertAsmEqual( + <<16#0062f463:32/little>>, "bgeu t0, t1, .+8", jit_riscv32_asm:bgeu(t0, t1, 8) + ), + ?_assertAsmEqual( + <<16#feb57ee3:32/little>>, "bgeu a0, a1, .-4", jit_riscv32_asm:bgeu(a0, a1, -4) + ) + ]. + +%%----------------------------------------------------------------------------- +%% Jump instruction tests +%%----------------------------------------------------------------------------- + +jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4) + ), + ?_assertAsmEqual( + <<16#00000517:32/little, 16#9502:16/little>>, + "auipc a0, 0\njalr a0", + jit_riscv32_asm:call(a0, 0) + ), + ?_assertAsmEqual( + <<16#00002517:32/little, 16#800500e7:32/little>>, + "auipc a0, 0x2\njalr -2048(a0)", + jit_riscv32_asm:call(a0, 16#1800) + ) + ]. + +jalr_test_() -> + [ + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)), + ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)), + ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4)) + ]. + +%%----------------------------------------------------------------------------- +%% Upper immediate instruction tests +%%----------------------------------------------------------------------------- + +lui_test_() -> + [ + ?_assertAsmEqual(<<16#65c9:16/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)), + ?_assertAsmEqual(<<16#6505:16/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)), + ?_assertAsmEqual(<<16#75fd:16/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1)) + ]. + +auipc_test_() -> + [ + ?_assertAsmEqual(<<16#00012597:32/little>>, "auipc a1, 18", jit_riscv32_asm:auipc(a1, 18)), + ?_assertAsmEqual(<<16#00001517:32/little>>, "auipc a0, 1", jit_riscv32_asm:auipc(a0, 1)) + ]. + +%%----------------------------------------------------------------------------- +%% Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +nop_test_() -> + [ + % We want a 4-byte NOP for padding, so use .option norvc to force non-compressed + ?_assertAsmEqual(<<16#00000013:32/little>>, ".option norvc\nnop", jit_riscv32_asm:nop()) + ]. + +li_small_test_() -> + [ + ?_assertAsmEqual(<<16#4529:16/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)), + ?_assertAsmEqual(<<16#557d:16/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)), + ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047)) + ]. + +li_large_test_() -> + [ + % 0x12345 = 74565 - requires lui + addi + ?_assertAsmEqual( + <<16#6549:16/little, 16#34550513:32/little>>, + "lui a0, 0x12\naddi a0, a0, 0x345", + jit_riscv32_asm:li(a0, 16#12345) + ), + % 0x80000000 = -2147483648 (minimum 32-bit signed) + ?_assertAsmEqual( + <<16#800005b7:32/little, 16#0581:16/little>>, + "lui a1, 0x80000\nc.addi a1, 0", + jit_riscv32_asm:li(a1, -16#80000000) + ), + % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed) + ?_assertAsmEqual( + <<16#80000537:32/little, 16#157d:16/little>>, + "lui a0, 0x80000\naddi a0, a0, -1", + jit_riscv32_asm:li(a0, 16#7FFFFFFF) + ) + ]. + +mv_test_() -> + [ + ?_assertAsmEqual(<<16#852a:16/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)), + ?_assertAsmEqual(<<16#85ae:16/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1)) + ]. + +not_test_() -> + [ + ?_assertAsmEqual(<<16#fff54513:32/little>>, "not a0, a0", jit_riscv32_asm:not_(a0, a0)), + ?_assertAsmEqual(<<16#fff5c593:32/little>>, "not a1, a1", jit_riscv32_asm:not_(a1, a1)) + ]. + +neg_test_() -> + [ + ?_assertAsmEqual(<<16#40a00533:32/little>>, "neg a0, a0", jit_riscv32_asm:neg(a0, a0)), + ?_assertAsmEqual(<<16#40b005b3:32/little>>, "neg a1, a1", jit_riscv32_asm:neg(a1, a1)) + ]. + +j_test_() -> + [ + ?_assertAsmEqual( + <<16#a021:16/little>>, "j .+8", jit_riscv32_asm:j(8) + ), + ?_assertAsmEqual( + <<16#bff5:16/little>>, "j .-4", jit_riscv32_asm:j(-4) + ) + ]. + +jr_test_() -> + [ + ?_assertAsmEqual(<<16#8502:16/little>>, "jr a0", jit_riscv32_asm:jr(a0)), + ?_assertAsmEqual(<<16#8282:16/little>>, "jr t0", jit_riscv32_asm:jr(t0)) + ]. + +ret_test_() -> + [ + ?_assertAsmEqual(<<16#8082:16/little>>, "ret", jit_riscv32_asm:ret()) + ]. + +%%----------------------------------------------------------------------------- +%% M Extension (Multiply/Divide) instruction tests +%%----------------------------------------------------------------------------- + +mul_test_() -> + [ + ?_assertAsmEqual( + <<16#02f50533:32/little>>, "mul a0, a0, a5", jit_riscv32_asm:mul(a0, a0, a5) + ), + ?_assertAsmEqual( + <<16#03f60633:32/little>>, "mul a2, a2, t6", jit_riscv32_asm:mul(a2, a2, t6) + ), + ?_assertAsmEqual( + <<16#026585b3:32/little>>, "mul a1, a1, t1", jit_riscv32_asm:mul(a1, a1, t1) + ), + ?_assertAsmEqual( + <<16#02d282b3:32/little>>, "mul t0, t0, a3", jit_riscv32_asm:mul(t0, t0, a3) + ) + ]. + +%%----------------------------------------------------------------------------- +%% System instruction tests +%%----------------------------------------------------------------------------- + +c_ebreak_test_() -> + [ + ?_assertAsmEqual( + <<16#9002:16/little>>, "c.ebreak", jit_riscv32_asm:c_ebreak() + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Arithmetic and Logical instruction tests +%%----------------------------------------------------------------------------- + +c_add_test_() -> + [ + ?_assertAsmEqual( + <<16#9532:16/little>>, "c.add a0, a2", jit_riscv32_asm:c_add(a0, a2) + ), + ?_assertAsmEqual( + <<16#95be:16/little>>, "c.add a1, a5", jit_riscv32_asm:c_add(a1, a5) + ), + ?_assertAsmEqual( + <<16#9522:16/little>>, "c.add a0, s0", jit_riscv32_asm:c_add(a0, s0) + ) + ]. + +c_mv_test_() -> + [ + ?_assertAsmEqual( + <<16#8532:16/little>>, "c.mv a0, a2", jit_riscv32_asm:c_mv(a0, a2) + ), + ?_assertAsmEqual( + <<16#85be:16/little>>, "c.mv a1, a5", jit_riscv32_asm:c_mv(a1, a5) + ), + ?_assertAsmEqual( + <<16#842a:16/little>>, "c.mv s0, a0", jit_riscv32_asm:c_mv(s0, a0) + ) + ]. + +c_sub_test_() -> + [ + ?_assertAsmEqual( + <<16#8d09:16/little>>, "c.sub a0, a0", jit_riscv32_asm:c_sub(a0, a0) + ), + ?_assertAsmEqual( + <<16#8d8d:16/little>>, "c.sub a1, a1", jit_riscv32_asm:c_sub(a1, a1) + ), + ?_assertAsmEqual( + <<16#8c0d:16/little>>, "c.sub s0, a1", jit_riscv32_asm:c_sub(s0, a1) + ) + ]. + +c_and_test_() -> + [ + ?_assertAsmEqual( + <<16#8d6d:16/little>>, "c.and a0, a1", jit_riscv32_asm:c_and(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fed:16/little>>, "c.and a5, a1", jit_riscv32_asm:c_and(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c6d:16/little>>, "c.and s0, a1", jit_riscv32_asm:c_and(s0, a1) + ) + ]. + +c_or_test_() -> + [ + ?_assertAsmEqual( + <<16#8d4d:16/little>>, "c.or a0, a1", jit_riscv32_asm:c_or(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fcd:16/little>>, "c.or a5, a1", jit_riscv32_asm:c_or(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c4d:16/little>>, "c.or s0, a1", jit_riscv32_asm:c_or(s0, a1) + ) + ]. + +c_xor_test_() -> + [ + ?_assertAsmEqual( + <<16#8d2d:16/little>>, "c.xor a0, a1", jit_riscv32_asm:c_xor(a0, a1) + ), + ?_assertAsmEqual( + <<16#8fad:16/little>>, "c.xor a5, a1", jit_riscv32_asm:c_xor(a5, a1) + ), + ?_assertAsmEqual( + <<16#8c2d:16/little>>, "c.xor s0, a1", jit_riscv32_asm:c_xor(s0, a1) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Immediate instruction tests +%%----------------------------------------------------------------------------- + +c_addi_test_() -> + [ + ?_assertAsmEqual( + <<16#0511:16/little>>, "c.addi a0, 4", jit_riscv32_asm:c_addi(a0, 4) + ), + ?_assertAsmEqual( + <<16#15fd:16/little>>, "c.addi a1, -1", jit_riscv32_asm:c_addi(a1, -1) + ), + ?_assertAsmEqual( + <<16#0541:16/little>>, "c.addi a0, 16", jit_riscv32_asm:c_addi(a0, 16) + ), + ?_assertAsmEqual( + <<16#1561:16/little>>, "c.addi a0, -8", jit_riscv32_asm:c_addi(a0, -8) + ) + ]. + +c_andi_test_() -> + [ + ?_assertAsmEqual( + <<16#8929:16/little>>, "c.andi a0, 10", jit_riscv32_asm:c_andi(a0, 10) + ), + ?_assertAsmEqual( + <<16#99fd:16/little>>, "c.andi a1, -1", jit_riscv32_asm:c_andi(a1, -1) + ), + ?_assertAsmEqual( + <<16#8941:16/little>>, "c.andi a0, 16", jit_riscv32_asm:c_andi(a0, 16) + ) + ]. + +c_li_test_() -> + [ + ?_assertAsmEqual( + <<16#4529:16/little>>, "c.li a0, 10", jit_riscv32_asm:c_li(a0, 10) + ), + ?_assertAsmEqual( + <<16#55fd:16/little>>, "c.li a1, -1", jit_riscv32_asm:c_li(a1, -1) + ), + ?_assertAsmEqual( + <<16#4505:16/little>>, "c.li a0, 1", jit_riscv32_asm:c_li(a0, 1) + ), + ?_assertAsmEqual( + <<16#5501:16/little>>, "c.li a0, -32", jit_riscv32_asm:c_li(a0, -32) + ) + ]. + +c_lui_test_() -> + [ + ?_assertAsmEqual( + <<16#6529:16/little>>, "c.lui a0, 10", jit_riscv32_asm:c_lui(a0, 10) + ), + ?_assertAsmEqual( + <<16#75fd:16/little>>, "c.lui a1, 0xfffff", jit_riscv32_asm:c_lui(a1, -1) + ), + ?_assertAsmEqual( + <<16#6505:16/little>>, "c.lui a0, 1", jit_riscv32_asm:c_lui(a0, 1) + ) + ]. + +c_addi16sp_test_() -> + [ + ?_assertAsmEqual( + <<16#6141:16/little>>, "c.addi16sp sp, 16", jit_riscv32_asm:c_addi16sp(16) + ), + ?_assertAsmEqual( + <<16#7101:16/little>>, "c.addi16sp sp, -512", jit_riscv32_asm:c_addi16sp(-512) + ), + ?_assertAsmEqual( + <<16#6161:16/little>>, "c.addi16sp sp, 80", jit_riscv32_asm:c_addi16sp(80) + ) + ]. + +c_addi4spn_test_() -> + [ + ?_assertAsmEqual( + <<16#0048:16/little>>, "c.addi4spn a0, sp, 4", jit_riscv32_asm:c_addi4spn(a0, 4) + ), + ?_assertAsmEqual( + <<16#1010:16/little>>, "c.addi4spn a2, sp, 32", jit_riscv32_asm:c_addi4spn(a2, 32) + ), + ?_assertAsmEqual( + <<16#1ffc:16/little>>, + "c.addi4spn a5, sp, 1020", + jit_riscv32_asm:c_addi4spn(a5, 1020) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Shift instruction tests +%%----------------------------------------------------------------------------- + +c_slli_test_() -> + [ + ?_assertAsmEqual( + <<16#050e:16/little>>, "c.slli a0, 3", jit_riscv32_asm:c_slli(a0, 3) + ), + ?_assertAsmEqual( + <<16#05fe:16/little>>, "c.slli a1, 31", jit_riscv32_asm:c_slli(a1, 31) + ), + ?_assertAsmEqual( + <<16#0542:16/little>>, "c.slli a0, 16", jit_riscv32_asm:c_slli(a0, 16) + ) + ]. + +c_srli_test_() -> + [ + ?_assertAsmEqual( + <<16#810d:16/little>>, "c.srli a0, 3", jit_riscv32_asm:c_srli(a0, 3) + ), + ?_assertAsmEqual( + <<16#81fd:16/little>>, "c.srli a1, 31", jit_riscv32_asm:c_srli(a1, 31) + ), + ?_assertAsmEqual( + <<16#8141:16/little>>, "c.srli a0, 16", jit_riscv32_asm:c_srli(a0, 16) + ) + ]. + +c_srai_test_() -> + [ + ?_assertAsmEqual( + <<16#850d:16/little>>, "c.srai a0, 3", jit_riscv32_asm:c_srai(a0, 3) + ), + ?_assertAsmEqual( + <<16#85fd:16/little>>, "c.srai a1, 31", jit_riscv32_asm:c_srai(a1, 31) + ), + ?_assertAsmEqual( + <<16#8541:16/little>>, "c.srai a0, 16", jit_riscv32_asm:c_srai(a0, 16) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Load/Store instruction tests +%%----------------------------------------------------------------------------- + +c_lw_test_() -> + [ + ?_assertAsmEqual( + <<16#4188:16/little>>, "c.lw a0, 0(a1)", jit_riscv32_asm:c_lw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#41d8:16/little>>, "c.lw a4, 4(a1)", jit_riscv32_asm:c_lw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#5ffc:16/little>>, "c.lw a5, 124(a5)", jit_riscv32_asm:c_lw(a5, {a5, 124}) + ) + ]. + +c_sw_test_() -> + [ + ?_assertAsmEqual( + <<16#c188:16/little>>, "c.sw a0, 0(a1)", jit_riscv32_asm:c_sw(a0, {a1, 0}) + ), + ?_assertAsmEqual( + <<16#c1d8:16/little>>, "c.sw a4, 4(a1)", jit_riscv32_asm:c_sw(a4, {a1, 4}) + ), + ?_assertAsmEqual( + <<16#dffc:16/little>>, "c.sw a5, 124(a5)", jit_riscv32_asm:c_sw(a5, {a5, 124}) + ) + ]. + +c_lwsp_test_() -> + [ + ?_assertAsmEqual( + <<16#4502:16/little>>, "c.lwsp a0, 0(sp)", jit_riscv32_asm:c_lwsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#4512:16/little>>, "c.lwsp a0, 4(sp)", jit_riscv32_asm:c_lwsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#50fe:16/little>>, "c.lwsp ra, 252(sp)", jit_riscv32_asm:c_lwsp(ra, 252) + ) + ]. + +c_swsp_test_() -> + [ + ?_assertAsmEqual( + <<16#c02a:16/little>>, "c.swsp a0, 0(sp)", jit_riscv32_asm:c_swsp(a0, 0) + ), + ?_assertAsmEqual( + <<16#c22a:16/little>>, "c.swsp a0, 4(sp)", jit_riscv32_asm:c_swsp(a0, 4) + ), + ?_assertAsmEqual( + <<16#dfe6:16/little>>, "c.swsp s9, 252(sp)", jit_riscv32_asm:c_swsp(s9, 252) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Branch and Jump instruction tests +%%----------------------------------------------------------------------------- + +c_beqz_test_() -> + [ + ?_assertAsmEqual( + <<16#c111:16/little>>, "c.beqz a0, .+4", jit_riscv32_asm:c_beqz(a0, 4) + ), + ?_assertAsmEqual( + <<16#dced:16/little>>, "c.beqz s1, .-6", jit_riscv32_asm:c_beqz(s1, -6) + ), + ?_assertAsmEqual( + <<16#c101:16/little>>, "c.beqz a0, .", jit_riscv32_asm:c_beqz(a0, 0) + ) + ]. + +c_bnez_test_() -> + [ + ?_assertAsmEqual( + <<16#e111:16/little>>, "c.bnez a0, .+4", jit_riscv32_asm:c_bnez(a0, 4) + ), + ?_assertAsmEqual( + <<16#fced:16/little>>, "c.bnez s1, .-6", jit_riscv32_asm:c_bnez(s1, -6) + ), + ?_assertAsmEqual( + <<16#e101:16/little>>, "c.bnez a0, .", jit_riscv32_asm:c_bnez(a0, 0) + ) + ]. + +c_j_test_() -> + [ + ?_assertAsmEqual( + <<16#a011:16/little>>, "c.j .+4", jit_riscv32_asm:c_j(4) + ), + ?_assertAsmEqual( + <<16#bfed:16/little>>, "c.j .-6", jit_riscv32_asm:c_j(-6) + ), + ?_assertAsmEqual( + <<16#a001:16/little>>, "c.j .", jit_riscv32_asm:c_j(0) + ) + ]. + +c_jal_test_() -> + [ + ?_assertAsmEqual( + <<16#2021:16/little>>, "c.jal .+8", jit_riscv32_asm:c_jal(8) + ), + ?_assertAsmEqual( + <<16#3ff5:16/little>>, "c.jal .-4", jit_riscv32_asm:c_jal(-4) + ), + ?_assertAsmEqual( + <<16#2001:16/little>>, "c.jal .", jit_riscv32_asm:c_jal(0) + ) + ]. + +c_jr_test_() -> + [ + ?_assertAsmEqual( + <<16#8502:16/little>>, "c.jr a0", jit_riscv32_asm:c_jr(a0) + ), + ?_assertAsmEqual( + <<16#8402:16/little>>, "c.jr s0", jit_riscv32_asm:c_jr(s0) + ), + ?_assertAsmEqual( + <<16#8082:16/little>>, "c.jr ra", jit_riscv32_asm:c_jr(ra) + ) + ]. + +c_jalr_test_() -> + [ + ?_assertAsmEqual( + <<16#9502:16/little>>, "c.jalr a0", jit_riscv32_asm:c_jalr(a0) + ), + ?_assertAsmEqual( + <<16#9402:16/little>>, "c.jalr s0", jit_riscv32_asm:c_jalr(s0) + ) + ]. + +%%----------------------------------------------------------------------------- +%% C Extension - Pseudo-instruction tests +%%----------------------------------------------------------------------------- + +c_nop_test_() -> + [ + ?_assertAsmEqual( + <<16#0001:16/little>>, "c.nop", jit_riscv32_asm:c_nop() + ) + ]. diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl new file mode 100644 index 0000000000..f398cb3f49 --- /dev/null +++ b/tests/libs/jit/jit_riscv32_tests.erl @@ -0,0 +1,3432 @@ +% +% This file is part of AtomVM. +% +% Copyright 2025 Paul Guyot +% +% Licensed under the Apache License, Version 2.0 (the "License"); +% you may not use this file except in compliance with the License. +% You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +% See the License for the specific language governing permissions and +% limitations under the License. +% +% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later +% + +-module(jit_riscv32_tests). + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). +-endif. + +-include("jit/include/jit.hrl"). +-include("jit/src/term.hrl"). +-include("jit/src/default_atoms.hrl"). +-include("jit/src/primitives.hrl"). + +-define(BACKEND, jit_riscv32). + +% disassembly obtained with: +% arm-elf-objdump -b binary -D dump.bin -M arm + +call_primitive_0_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_1_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00462f83 lw t6,4(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_2_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00862f83 lw t6,8(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 02b00613 li a2,43\n" + " 16: 02c00693 li a3,44\n" + " 1a: 9f82 jalr t6\n" + " 1c: 8faa mv t6,a0\n" + " 1e: 4082 lw ra,0(sp)\n" + " 20: 4512 lw a0,4(sp)\n" + " 22: 45a2 lw a1,8(sp)\n" + " 24: 4632 lw a2,12(sp)\n" + " 26: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01462f83 lw t6,20(a2)\n" + " 4: 4641 li a2,16\n" + " 6: 02000693 li a3,32\n" + " a: 4709 li a4,2\n" + " c: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_6_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get bin_ptr from x_reg 0 (similar to get_list_test pattern) + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK), + % Get another register for the last parameter to test {free, Reg} handling + {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}), + % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments + {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [ + ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg} + ]), + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 4f0d li t5,3\n" + " 6: ffff4f13 not t5,t5\n" + " a: 01efffb3 and t6,t6,t5\n" + " e: 01c52f03 lw t5,28(a0)\n" + " 12: 0b800e93 li t4,184\n" + " 16: 9eb2 add t4,t4,a2\n" + " 18: 000eae83 lw t4,0(t4)\n" + " 1c: 1141 addi sp,sp,-16\n" + " 1e: c006 sw ra,0(sp)\n" + " 20: c22a sw a0,4(sp)\n" + " 22: c42e sw a1,8(sp)\n" + " 24: c632 sw a2,12(sp)\n" + " 26: 867e mv a2,t6\n" + " 28: 04000693 li a3,64\n" + " 2c: 4721 li a4,8\n" + " 2e: 87fa mv a5,t5\n" + " 30: 9e82 jalr t4\n" + " 32: 8eaa mv t4,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_extended_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]), + {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]), + {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [ + ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}} + ]), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = << + " 0: 04862f83 lw t6,72(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 45cd li a1,19\n" + " 10: 9f82 jalr t6\n" + " 12: 8faa mv t6,a0\n" + " 14: 4082 lw ra,0(sp)\n" + " 16: 4512 lw a0,4(sp)\n" + " 18: 45a2 lw a1,8(sp)\n" + " 1a: 4632 lw a2,12(sp)\n" + " 1c: 0141 addi sp,sp,16\n" + " 1e: 04862f03 lw t5,72(a2)\n" + " 22: 1101 addi sp,sp,-32\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: c87e sw t6,16(sp)\n" + " 2e: 45d1 li a1,20\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4fc2 lw t6,16(sp)\n" + " 3e: 02010113 addi sp,sp,32\n" + " 42: 04862e83 lw t4,72(a2)\n" + " 46: 1101 addi sp,sp,-32\n" + " 48: c006 sw ra,0(sp)\n" + " 4a: c22a sw a0,4(sp)\n" + " 4c: c42e sw a1,8(sp)\n" + " 4e: c632 sw a2,12(sp)\n" + " 50: c87a sw t5,16(sp)\n" + " 52: ca7e sw t6,20(sp)\n" + " 54: 45cd li a1,19\n" + " 56: 9e82 jalr t4\n" + " 58: 8eaa mv t4,a0\n" + " 5a: 4082 lw ra,0(sp)\n" + " 5c: 4512 lw a0,4(sp)\n" + " 5e: 45a2 lw a1,8(sp)\n" + " 60: 4632 lw a2,12(sp)\n" + " 62: 4f42 lw t5,16(sp)\n" + " 64: 4fd2 lw t6,20(sp)\n" + " 66: 02010113 addi sp,sp,32\n" + " 6a: 03462e03 lw t3,52(a2)\n" + " 6e: 1101 addi sp,sp,-32\n" + " 70: c006 sw ra,0(sp)\n" + " 72: c22a sw a0,4(sp)\n" + " 74: c42e sw a1,8(sp)\n" + " 76: c632 sw a2,12(sp)\n" + " 78: c876 sw t4,16(sp)\n" + " 7a: 000fa583 lw a1,0(t6)\n" + " 7e: 000f2603 lw a2,0(t5)\n" + " 82: 9e02 jalr t3\n" + " 84: 8e2a mv t3,a0\n" + " 86: 4082 lw ra,0(sp)\n" + " 88: 4512 lw a0,4(sp)\n" + " 8a: 45a2 lw a1,8(sp)\n" + " 8c: 4632 lw a2,12(sp)\n" + " 8e: 4ec2 lw t4,16(sp)\n" + " 90: 02010113 addi sp,sp,32\n" + " 94: 01cea023 sw t3,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_few_free_regs_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, 1), + {State2, t5} = ?BACKEND:move_to_native_register(State1, 2), + {State3, t4} = ?BACKEND:move_to_native_register(State2, 3), + {State4, t3} = ?BACKEND:move_to_native_register(State3, 4), + {State5, t2} = ?BACKEND:move_to_native_register(State4, 5), + {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [ + t5, t6, {free, t3}, t4, {free, t2} + ]), + State7 = ?BACKEND:free_native_registers(State6, [ResultReg, t5, t6, t4]), + ?BACKEND:assert_all_native_free(State7), + Stream = ?BACKEND:stream(State7), + Dump = << + " 0: 4f85 li t6,1\n" + " 2: 4f09 li t5,2\n" + " 4: 4e8d li t4,3\n" + " 6: 4e11 li t3,4\n" + " 8: 4395 li t2,5\n" + " a: 0e400313 li t1,228\n" + " e: 9332 add t1,t1,a2\n" + " 10: 00032303 lw t1,0(t1)\n" + " 14: 1101 addi sp,sp,-32\n" + " 16: c006 sw ra,0(sp)\n" + " 18: c22a sw a0,4(sp)\n" + " 1a: c42e sw a1,8(sp)\n" + " 1c: c632 sw a2,12(sp)\n" + " 1e: c876 sw t4,16(sp)\n" + " 20: ca7a sw t5,20(sp)\n" + " 22: cc7e sw t6,24(sp)\n" + " 24: 857a mv a0,t5\n" + " 26: 85fe mv a1,t6\n" + " 28: 8672 mv a2,t3\n" + " 2a: 86f6 mv a3,t4\n" + " 2c: 871e mv a4,t2\n" + " 2e: 9302 jalr t1\n" + " 30: 832a mv t1,a0\n" + " 32: 4082 lw ra,0(sp)\n" + " 34: 4512 lw a0,4(sp)\n" + " 36: 45a2 lw a1,8(sp)\n" + " 38: 4632 lw a2,12(sp)\n" + " 3a: 4ec2 lw t4,16(sp)\n" + " 3c: 4f52 lw t5,20(sp)\n" + " 3e: 4fe2 lw t6,24(sp)\n" + " 40: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_only_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 57fd li a5,-1\n" + " 2e: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_5_args_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA} + ]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04c62f03 lw t5,76(a2)\n" + " 8: 4621 li a2,8\n" + " a: 2cb00693 li a3,715\n" + " e: 877e mv a4,t6\n" + " 10: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01062f83 lw t6,16(a2)\n" + " 24: 02400613 li a2,36\n" + " 28: 4689 li a3,2\n" + " 2a: 4709 li a4,2\n" + " 2c: 47a9 li a5,10\n" + " 2e: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_primitive_last_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 00062f83 lw t6,0(a2)\n" + " 4: 02a00613 li a2,42\n" + " 8: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +return_if_not_equal_to_ctx_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 00af8463 beq t6,a0,0x24\n" + " 20: 857e mv a0,t6\n" + " 22: 8082 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State1, ResultReg} = ?BACKEND:call_primitive( + State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ] + ), + ?assertEqual(t6, ResultReg), + {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg), + ?assertEqual(t5, OtherReg), + State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}), + Stream = ?BACKEND:stream(State3), + Dump = + << + " 0: 05462f83 lw t6,84(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9f82 jalr t6\n" + " 10: 8faa mv t6,a0\n" + " 12: 4082 lw ra,0(sp)\n" + " 14: 4512 lw a0,4(sp)\n" + " 16: 45a2 lw a1,8(sp)\n" + " 18: 4632 lw a2,12(sp)\n" + " 1a: 0141 addi sp,sp,16\n" + " 1c: 8f7e mv t5,t6\n" + " 1e: 00af0463 beq t5,a0,0x26\n" + " 22: 857a mv a0,t5\n" + " 24: 8082 ret" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_cp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 05f52e23 sw t6,92(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +increment_sp_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:increment_sp(State0, 7), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 0ff1 addi t6,t6,28\n" + " 6: 01f52a23 sw t6,20(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +if_block_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State2, RegA, RegB} + end, + fun({State0, RegA, RegB}) -> + [ + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000fd363 bgez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '<', 1024}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 40000e93 li t4,1024\n" + " c: 01dfd363 bge t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2\n" + " 12: a0fd j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', -1}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 5efd li t4,-1\n" + " a: 01df9363 bne t6,t4,0x10\n" + " e: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 000f9363 bnez t6,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '!=', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + % Test large immediate (1995) that requires temporary register + State1 = ?BACKEND:if_block( + State0, + {RegA, '!=', 1995}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 1) + end + ), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 7cb00e93 li t4,1995\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f05 addi t5,t5,1\n" + " 12: a0fd j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '!=', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df8363 beq t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03b00e93 li t4,59\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', RegA, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(int)', {free, RegA}, '==', 42}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02a00e93 li t4,42\n" + " c: 01df9363 bne t6,t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '==', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ec363 bltz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', RegA, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {'(bool)', {free, RegA}, '!=', false}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01ff9e93 slli t4,t6,0x1f\n" + " c: 000ed363 bgez t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#5, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 005ffe93 andi t4,t6,5\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', 16#7, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 007ffe93 andi t4,t6,7\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffce93 not t4,t6\n" + " c: 0ef2 slli t4,t4,0x1c\n" + " e: 000e8363 beqz t4,0x14\n" + " 12: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: ffffcf93 not t6,t6\n" + " c: 0ff2 slli t6,t6,0x1c\n" + " e: 000f8363 beqz t6,0x14\n" + " 12: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 8efe mv t4,t6\n" + " a: 03f00e13 li t3,63\n" + " e: 01cefeb3 and t4,t4,t3\n" + " 12: 4e21 li t3,8\n" + " 14: 01ce8363 beq t4,t3,0x1a\n" + " 18: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {{free, RegA}, '<', RegB}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 01efd363 bge t6,t5,0xe\n" + " c: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + { + {free, RegA}, + '&', + ?TERM_BOXED_TAG_MASK, + '!=', + ?TERM_BOXED_POSITIVE_INTEGER + }, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 03f00e93 li t4,63\n" + " c: 01dfffb3 and t6,t6,t4\n" + " 10: 4ea1 li t4,8\n" + " 12: 01df8363 beq t6,t4,0x18\n" + " 16: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB], ?BACKEND:used_regs(State1)) + end), + %% Test {RegA, '&', 16#3, '!=', 0} using ANDI instruction + ?_test(begin + State1 = ?BACKEND:if_block( + State0, + {RegA, '&', 16#3, '!=', 0}, + fun(BSt0) -> + ?BACKEND:add(BSt0, RegB, 2) + end + ), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 003ffe93 andi t4,t6,3\n" + " c: 000e8363 beqz t4,0x12\n" + " 10: 0f09 addi t5,t5,2" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1)) + end) + ] + end}. + +if_else_block_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + State3 = ?BACKEND:if_else_block( + State2, + {Reg1, '==', ?TERM_NIL}, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 2) + end, + fun(BSt0) -> + ?BACKEND:add(BSt0, Reg2, 4) + end + ), + Stream = ?BACKEND:stream(State3), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 01c52f03 lw t5,28(a0)\n" + "8: 03b00e93 li t4,59\n" + "c: 01df9463 bne t6,t4,0x14\n" + "10: 0f09 addi t5,t5,2\n" + "12: a011 j 0x16\n" + "14: 0f11 addi t5,t5,4" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf93 srli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 003fdf13 srli t5,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +shift_left_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:shift_left(State1, Reg, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 0f8e slli t6,t6,0x3" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_only_or_schedule_next_and_label_relocation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + Dump = + << + " 0: 00000697 auipc a3,0x0\n" + " 4: 04668067 jr 70(a3) # 0x46\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01068067 jr 16(a3) # 0x18\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 03068067 jr 48(a3) # 0x40\n" + " 18: 0085af83 lw t6,8(a1)\n" + " 1c: 1ffd addi t6,t6,-1\n" + " 1e: 01f5a423 sw t6,8(a1)\n" + " 22: 000f8663 beqz t6,0x2e\n" + " 26: a829 j 0x40\n" + " 28: 0001 nop\n" + " 2a: 00000013 nop\n" + " 2e: 00000f97 auipc t6,0x0\n" + " 32: 0fd1 addi t6,t6,20 # 0x42\n" + " 34: 0001 nop\n" + " 36: 01f5a223 sw t6,4(a1)\n" + " 3a: 00862f83 lw t6,8(a2)\n" + " 3e: 8f82 jr t6\n" + " 40: 00062f83 lw t6,0(a2)\n" + " 44: 8f82 jr t6\n" + " 46: 00462f83 lw t6,4(a2)\n" + " 4a: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test with large gap (256+ bytes) to force mov_immediate path +call_only_or_schedule_next_and_label_relocation_large_gap_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 2), + % Add large padding by emitting many move_to_native_register operations + % This creates a large gap between the jump table and the actual function bodies + % Each operation emits ~2 bytes, so 128 operations = ~256 bytes + StatePadded = lists:foldl( + fun(_, S) -> + ?BACKEND:move_to_native_register(S, {x_reg, 2}, a3) + end, + State1, + lists:seq(1, 128) + ), + State2 = ?BACKEND:add_label(StatePadded, 1), + State3 = ?BACKEND:call_only_or_schedule_next(State2, 2), + State4 = ?BACKEND:add_label(State3, 2), + State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]), + % OP_INT_CALL_END + State6 = ?BACKEND:add_label(State5, 0), + State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]), + State8 = ?BACKEND:update_branches(State7), + Stream = ?BACKEND:stream(State8), + % Extract the final section starting at 0x118 (after jump table 24 bytes + 128 loads 256 bytes) + % RISC-V: Jump table is 3×8=24 bytes, loads are 2 bytes each (compressed) + Dump = << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f8663 beqz t6,0x16\n" + " e: a829 j 0x28\n" + " 10: 0001 nop\n" + " 12: 00000013 nop\n" + " 16: 00000f97 auipc t6,0x0\n" + " 1a: 0fd1 addi t6,t6,20 # 0x2a\n" + " 1c: 0001 nop\n" + " 1e: 01f5a223 sw t6,4(a1)\n" + " 22: 00862f83 lw t6,8(a2)\n" + " 26: 8f82 jr t6\n" + " 28: 00062f83 lw t6,0(a2)\n" + " 2c: 8f82 jr t6\n" + " 2e: 00462f83 lw t6,4(a2)\n" + " 32: 8f82 jr t6" + >>, + {_, RelevantBinary} = split_binary(Stream, 16#118), + ?assertEqual(dump_to_bin(Dump), RelevantBinary). + +call_bif_with_large_literal_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]), + {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]), + {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [ + ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg} + ]), + State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}), + State6 = ?BACKEND:free_native_registers(State5, [ResultReg]), + ?BACKEND:assert_all_native_free(State6), + Stream = ?BACKEND:stream(State6), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 4589 li a1,2\n" + " 12: 9f82 jalr t6\n" + " 14: 8faa mv t6,a0\n" + " 16: 4082 lw ra,0(sp)\n" + " 18: 4512 lw a0,4(sp)\n" + " 1a: 45a2 lw a1,8(sp)\n" + " 1c: 4632 lw a2,12(sp)\n" + " 1e: 0141 addi sp,sp,16\n" + " 20: 03c62f03 lw t5,60(a2)\n" + " 24: 1101 addi sp,sp,-32\n" + " 26: c006 sw ra,0(sp)\n" + " 28: c22a sw a0,4(sp)\n" + " 2a: c42e sw a1,8(sp)\n" + " 2c: c632 sw a2,12(sp)\n" + " 2e: c87e sw t6,16(sp)\n" + " 30: 3b7ff5b7 lui a1,0x3b7ff\n" + " 34: 89558593 addi a1,a1,-1899 # 0x3b7fe895\n" + " 38: 9f02 jalr t5\n" + " 3a: 8f2a mv t5,a0\n" + " 3c: 4082 lw ra,0(sp)\n" + " 3e: 4512 lw a0,4(sp)\n" + " 40: 45a2 lw a1,8(sp)\n" + " 42: 4632 lw a2,12(sp)\n" + " 44: 4fc2 lw t6,16(sp)\n" + " 46: 02010113 addi sp,sp,32\n" + " 4a: 1141 addi sp,sp,-16\n" + " 4c: c006 sw ra,0(sp)\n" + " 4e: c22a sw a0,4(sp)\n" + " 50: c42e sw a1,8(sp)\n" + " 52: c632 sw a2,12(sp)\n" + " 54: 4581 li a1,0\n" + " 56: 4605 li a2,1\n" + " 58: 4d14 lw a3,24(a0)\n" + " 5a: 877a mv a4,t5\n" + " 5c: 9f82 jalr t6\n" + " 5e: 8faa mv t6,a0\n" + " 60: 4082 lw ra,0(sp)\n" + " 62: 4512 lw a0,4(sp)\n" + " 64: 45a2 lw a1,8(sp)\n" + " 66: 4632 lw a2,12(sp)\n" + " 68: 0141 addi sp,sp,16\n" + " 6a: 000f9763 bnez t6,0x78\n" + " 6e: 01862f83 lw t6,24(a2)\n" + " 72: 07200613 li a2,114\n" + " 76: 8f82 jr t6\n" + " 78: 01f52c23 sw t6,24(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +get_list_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}), + State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}), + State5 = ?BACKEND:free_native_registers(State4, [Reg]), + ?BACKEND:assert_all_native_free(State5), + Stream = ?BACKEND:stream(State5), + Dump = + << + "0: 01852f83 lw t6,24(a0)\n" + "4: 4f0d li t5,3\n" + "6: ffff4f13 not t5,t5\n" + "a: 01efffb3 and t6,t6,t5\n" + "e: 004fae83 lw t4,4(t6)\n" + "12: 01452f03 lw t5,20(a0)\n" + "16: 01df2223 sw t4,4(t5)\n" + "1a: 000fae83 lw t4,0(t6)\n" + "1e: 01452f03 lw t5,20(a0)\n" + "22: 01df2023 sw t4,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_integer_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) -> + MSt1 = ?BACKEND:if_block( + MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ), + MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg), + ?BACKEND:if_block( + MSt3, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + fun(BSt0) -> + ?BACKEND:jump_to_label(BSt0, Label) + end + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 020f0f63 beqz t5,0x48\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 03f00f13 li t5,63\n" + " 36: 01efffb3 and t6,t6,t5\n" + " 3a: 4f21 li t5,8\n" + " 3c: 01ef8663 beq t6,t5,0x48\n" + " 40: a0c1 j 0x100\n" + " 42: 0001 nop\n" + " 44: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +cond_jump_to_label(Cond, Label, MMod, MSt0) -> + MMod:if_block(MSt0, Cond, fun(BSt0) -> + MMod:jump_to_label(BSt0, Label) + end). + +%% Keep the unoptimized version to test the and case. +is_number_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + Arg1 = {x_reg, 0}, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1), + State2 = ?BACKEND:if_block( + State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) -> + BSt1 = cond_jump_to_label( + {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0 + ), + BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK), + BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg), + cond_jump_to_label( + {'and', [ + {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER}, + {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT} + ]}, + Label, + ?BACKEND, + BSt3 + ) + end + ), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: ffffcf13 not t5,t6\n" + " 8: 0f72 slli t5,t5,0x1c\n" + " a: 040f0763 beqz t5,0x58\n" + " e: 8f7e mv t5,t6\n" + " 10: 4e8d li t4,3\n" + " 12: 01df7f33 and t5,t5,t4\n" + " 16: 4e89 li t4,2\n" + " 18: 01df0663 beq t5,t4,0x24\n" + " 1c: a0d5 j 0x100\n" + " 1e: 0001 nop\n" + " 20: 00000013 nop\n" + " 24: 4f0d li t5,3\n" + " 26: ffff4f13 not t5,t5\n" + " 2a: 01efffb3 and t6,t6,t5\n" + " 2e: 000faf83 lw t6,0(t6)\n" + " 32: 8f7e mv t5,t6\n" + " 34: 03f00e93 li t4,63\n" + " 38: 01df7f33 and t5,t5,t4\n" + " 3c: 4ea1 li t4,8\n" + " 3e: 01df0d63 beq t5,t4,0x58\n" + " 42: 03f00f13 li t5,63\n" + " 46: 01efffb3 and t6,t6,t5\n" + " 4a: 4f61 li t5,24\n" + " 4c: 01ef8663 beq t6,t5,0x58\n" + " 50: a845 j 0x100\n" + " 52: 0001 nop\n" + " 54: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#100), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: a0fd j 0x100\n" + " 14: 0001 nop\n" + " 16: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State3 = ?BACKEND:free_native_registers(State2, [Reg]), + ?BACKEND:assert_all_native_free(State3), + State4 = ?BACKEND:add_label(State3, Label, 16#1000), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 7ef0006f j 0x1000\n" + " 16: 00000013 nop" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +is_boolean_far_known_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + Label = 1, + State1 = ?BACKEND:add_label(State0, Label, 16#1000), + {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}), + State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) -> + ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) -> + ?BACKEND:jump_to_label(BSt1, Label) + end) + end), + State4 = ?BACKEND:free_native_registers(State3, [Reg]), + ?BACKEND:assert_all_native_free(State4), + State5 = ?BACKEND:update_branches(State4), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 04b00f13 li t5,75\n" + " 8: 01ef8963 beq t6,t5,0x1a\n" + " c: 4f2d li t5,11\n" + " e: 01ef8663 beq t6,t5,0x1a\n" + " 12: 00001f17 auipc t5,0x1\n" + " 16: feef0067 jr -18(t5) # 0x1000" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point +wait_timeout_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + Label = 42, + {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0), + {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000), + State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [ + ctx, jit_state, {free, TimeoutReg}, Label + ]), + State4 = ?BACKEND:add_label(State3, OffsetRef0), + State5 = ?BACKEND:continuation_entry_point(State4), + {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [ + ctx, jit_state + ]), + State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}), + % ?WAITING_TIMEOUT_EXPIRED + {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]), + State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) -> + ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [ + ctx, jit_state, Label + ]) + end), + State10 = ?BACKEND:update_branches(State9), + + Stream = ?BACKEND:stream(State10), + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 020f8f93 addi t6,t6,32 # 0x20\n" + " 8: 01f5a223 sw t6,4(a1)\n" + " c: 6f85 lui t6,0x1\n" + " e: 388f8f93 addi t6,t6,904 # 0x1388\n" + " 12: 07862f03 lw t5,120(a2)\n" + " 16: 867e mv a2,t6\n" + " 18: 02a00693 li a3,42\n" + " 1c: 8f02 jr t5\n" + " 1e: 05462f83 lw t6,84(a2)\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 9f82 jalr t6\n" + " 2e: 8faa mv t6,a0\n" + " 30: 4082 lw ra,0(sp)\n" + " 32: 4512 lw a0,4(sp)\n" + " 34: 45a2 lw a1,8(sp)\n" + " 36: 4632 lw a2,12(sp)\n" + " 38: 0141 addi sp,sp,16\n" + " 3a: 00af8463 beq t6,a0,0x42\n" + " 3e: 857e mv a0,t6\n" + " 40: 8082 ret\n" + " 42: 08400f93 li t6,132\n" + " 46: 9fb2 add t6,t6,a2\n" + " 48: 000faf83 lw t6,0(t6)\n" + " 4c: 1141 addi sp,sp,-16\n" + " 4e: c006 sw ra,0(sp)\n" + " 50: c22a sw a0,4(sp)\n" + " 52: c42e sw a1,8(sp)\n" + " 54: c632 sw a2,12(sp)\n" + " 56: 4589 li a1,2\n" + " 58: 9f82 jalr t6\n" + " 5a: 8faa mv t6,a0\n" + " 5c: 4082 lw ra,0(sp)\n" + " 5e: 4512 lw a0,4(sp)\n" + " 60: 45a2 lw a1,8(sp)\n" + " 62: 4632 lw a2,12(sp)\n" + " 64: 0141 addi sp,sp,16\n" + " 66: 000f9763 bnez t6,0x74\n" + " 6a: 07c62f83 lw t6,124(a2)\n" + " 6e: 02a00613 li a2,42\n" + " 72: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test OP_WAIT pattern that uses set_continuation_to_label +wait_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + State1 = ?BACKEND:jump_table(State0, 5), + State2 = ?BACKEND:add_label(State1, 1), + Label = 2, + State3 = ?BACKEND:set_continuation_to_label(State2, Label), + State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]), + + Stream = ?BACKEND:stream(State4), + Dump = + << + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 6: ffffffff .insn 4, 0xffffffff\n" + " a: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 12: ffffffff .insn 4, 0xffffffff\n" + " 16: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 1e: ffffffff .insn 4, 0xffffffff\n" + " 22: ffffffff .insn 4, 0xffffffff\n" + " 24: ffffffff .insn 4, 0xffffffff\n" + " 28: ffffffff .insn 4, 0xffffffff\n" + " 2c: 01f5a223 sw t6,4(a1)\n" + " 30: 07462f83 lw t6,116(a2)\n" + " 34: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test return_labels_and_lines/2 function +return_labels_and_lines_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Test return_labels_and_lines with some sample labels and lines + State1 = ?BACKEND:add_label(State0, 2, 32), + State2 = ?BACKEND:add_label(State1, 1, 16), + + % {Line, Offset} pairs + SortedLines = [{10, 16}, {20, 32}], + + State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines), + Stream = ?BACKEND:stream(State3), + + % Should have generated auipc + addi + ret + labels table + lines table + % auipc = 4 bytes, addi = 2 bytes (compressed), ret = 2 bytes, labels table = 3*2*2 = 12 bytes, lines table = 3*2*2 = 12 bytes + % Total: 4 + 2 + 2 + 12 + 12 = 32 bytes + ?assert(byte_size(Stream) >= 32), + + % Expected: auipc a0, 0 + addi a0, a0, 12 + ret + labels table + lines table + % The data tables start at offset 0x8 (8), but we need offset 0xC (12) because of alignment + Dump = + << + " 0: 00000517 auipc a0,0x0\n" + " 4: 0531 addi a0,a0,12 # 0xc\n" + " 6: 8082 ret\n" + " 8: 0200 addi s0,sp,256\n" + " a: 0100 addi s0,sp,128\n" + " c: 0000 unimp\n" + " e: 1000 addi s0,sp,32\n" + " 10: 0200 addi s0,sp,256\n" + " 12: 0000 unimp\n" + " 14: 2000 fld fs0,0(s0)\n" + " 16: 0200 addi s0,sp,256\n" + " 18: 0a00 addi s0,sp,272\n" + " 1a: 0000 unimp\n" + " 1c: 1000 addi s0,sp,32\n" + " 1e: 1400 addi s0,sp,544\n" + " 20: 0000 unimp\n" + " 22: 2000 fld fs0,0(s0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test call_primitive with {free, {x_reg, X}} +gc_bif2_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]), + {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [ + ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}} + ]), + + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 02062f83 lw t6,32(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 852e mv a0,a1\n" + " 10: 02a00593 li a1,42\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16\n" + " 22: 1141 addi sp,sp,-16\n" + " 24: c006 sw ra,0(sp)\n" + " 26: c22a sw a0,4(sp)\n" + " 28: c42e sw a1,8(sp)\n" + " 2a: c632 sw a2,12(sp)\n" + " 2c: 4581 li a1,0\n" + " 2e: 460d li a2,3\n" + " 30: 01452f03 lw t5,20(a0)\n" + " 34: 000f2683 lw a3,0(t5)\n" + " 38: 4d18 lw a4,24(a0)\n" + " 3a: 9f82 jalr t6\n" + " 3c: 8faa mv t6,a0\n" + " 3e: 4082 lw ra,0(sp)\n" + " 40: 4512 lw a0,4(sp)\n" + " 42: 45a2 lw a1,8(sp)\n" + " 44: 4632 lw a2,12(sp)\n" + " 46: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test case where parameter value is in a1 +memory_ensure_free_with_roots_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [ + ctx, jit_state, {free, a1}, 4, 1 + ]), + + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 0b000f93 li t6,176\n" + " 4: 9fb2 add t6,t6,a2\n" + " 6: 000faf83 lw t6,0(t6)\n" + " a: 1141 addi sp,sp,-16\n" + " c: c006 sw ra,0(sp)\n" + " e: c22a sw a0,4(sp)\n" + " 10: c42e sw a1,8(sp)\n" + " 12: c632 sw a2,12(sp)\n" + " 14: 8f2e mv t5,a1\n" + " 16: 867a mv a2,t5\n" + " 18: 4691 li a3,4\n" + " 1a: 4705 li a4,1\n" + " 1c: 9f82 jalr t6\n" + " 1e: 8faa mv t6,a0\n" + " 20: 4082 lw ra,0(sp)\n" + " 22: 4512 lw a0,4(sp)\n" + " 24: 45a2 lw a1,8(sp)\n" + " 26: 4632 lw a2,12(sp)\n" + " 28: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_ext_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]), + ?BACKEND:assert_all_native_free(State2), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 0005af03 lw t5,0(a1)\n" + " 24: 000f2f03 lw t5,0(t5)\n" + " 28: 0f62 slli t5,t5,0x18\n" + " 2a: 11800f93 li t6,280\n" + " 2e: 00000013 nop\n" + " 32: 01ff6f33 or t5,t5,t6\n" + " 36: 05e52e23 sw t5,92(a0)\n" + " 3a: 01062f83 lw t6,16(a2)\n" + " 3e: 4609 li a2,2\n" + " 40: 4695 li a3,5\n" + " 42: 577d li a4,-1\n" + " 44: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +call_fun_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0), + FuncReg = {x_reg, 0}, + ArgsCount = 0, + {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg), + {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg), + State4 = ?BACKEND:if_block( + State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK), + State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy), + State7 = ?BACKEND:if_block( + State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy + ]) + end + ), + State8 = ?BACKEND:free_native_registers(State7, [RegCopy]), + State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [ + ctx, jit_state, Reg, ArgsCount + ]), + ?BACKEND:assert_all_native_free(State9), + Stream = ?BACKEND:stream(State9), + Dump = + << + " 0: 0085af83 lw t6,8(a1)\n" + " 4: 1ffd addi t6,t6,-1\n" + " 6: 01f5a423 sw t6,8(a1)\n" + " a: 000f9b63 bnez t6,0x20\n" + " e: 00000f97 auipc t6,0x0\n" + " 12: 0fc9 addi t6,t6,18 # 0x20\n" + " 14: 0001 nop\n" + " 16: 01f5a223 sw t6,4(a1)\n" + " 1a: 00862f83 lw t6,8(a2)\n" + " 1e: 8f82 jr t6\n" + " 20: 01852f83 lw t6,24(a0)\n" + " 24: 8f7e mv t5,t6\n" + " 26: 8efa mv t4,t5\n" + " 28: 4e0d li t3,3\n" + " 2a: 01cefeb3 and t4,t4,t3\n" + " 2e: 4e09 li t3,2\n" + " 30: 01ce8a63 beq t4,t3,0x44\n" + " 34: 04c62f83 lw t6,76(a2)\n" + " 38: 03800613 li a2,56\n" + " 3c: 18b00693 li a3,395\n" + " 40: 877a mv a4,t5\n" + " 42: 8f82 jr t6\n" + " 44: 4e8d li t4,3\n" + " 46: fffece93 not t4,t4\n" + " 4a: 01df7f33 and t5,t5,t4\n" + " 4e: 000f2f03 lw t5,0(t5)\n" + " 52: 8efa mv t4,t5\n" + " 54: 03f00e13 li t3,63\n" + " 58: 01cefeb3 and t4,t4,t3\n" + " 5c: 4e51 li t3,20\n" + " 5e: 01ce8a63 beq t4,t3,0x72\n" + " 62: 04c62f83 lw t6,76(a2)\n" + " 66: 06600613 li a2,102\n" + " 6a: 18b00693 li a3,395\n" + " 6e: 877a mv a4,t5\n" + " 70: 8f82 jr t6\n" + " 72: 0005ae83 lw t4,0(a1)\n" + " 76: 000eae83 lw t4,0(t4)\n" + " 7a: 0ee2 slli t4,t4,0x18\n" + " 7c: 27000f13 li t5,624\n" + " 80: 00000013 nop\n" + " 84: 01eeeeb3 or t4,t4,t5\n" + " 88: 05d52e23 sw t4,92(a0)\n" + " 8c: 08000f13 li t5,128\n" + " 90: 9f32 add t5,t5,a2\n" + " 92: 000f2f03 lw t5,0(t5)\n" + " 96: 867e mv a2,t6\n" + " 98: 4681 li a3,0\n" + " 9a: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test0(State, Source, Dest, Dump) -> + State1 = ?BACKEND:move_to_vm_register(State, Source, Dest), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_to_vm_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, 0}, << + " 0: 4f81 li t6,0\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {x_reg, extra}, << + " 0: 4f81 li t6,0\n" + " 2: 05f52c23 sw t6,88(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {ptr, t5}, << + " 0: 4f81 li t6,0\n" + " 2: 01ff2023 sw t6,0(t5)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 2}, << + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 01efa423 sw t5,8(t6)\n" + " a: a8dd j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 0, {y_reg, 20}, << + " 0: 4f01 li t5,0\n" + " 2: 01452f83 lw t6,20(a0)\n" + " 6: 05efa823 sw t5,80(t6)\n" + " a: a8dd j 0x100" + >>) + end), + %% Test: Immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, 0}, << + " 0: 02a00f93 li t6,42\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {x_reg, extra}, << + " 0: 02a00f93 li t6,42\n" + " 4: 05f52c23 sw t6,88(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 2}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 20}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 05efa823 sw t5,80(t6)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 99, {ptr, a3}, << + " 0: 06300f93 li t6,99\n" + " 4: 01f6a023 sw t6,0(a3)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 03f52023 sw t6,32(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to ptr + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, << + " 0: 01c52f83 lw t6,28(a0)\n" + " 4: 01f5a023 sw t6,0(a1)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: ptr to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, << + " 0: 000e2f83 lw t6,0(t3)\n" + " 4: 03f52223 sw t6,36(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: x_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 01ff2223 sw t6,4(t5)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to x_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 000f2f83 lw t6,0(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to y_reg + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 004f2f83 lw t6,4(t5)\n" + " 8: 03f52223 sw t6,36(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Native register to x_reg + ?_test(begin + move_to_vm_register_test0(State0, t4, {x_reg, 0}, << + " 0: 01d52c23 sw t4,24(a0)\n" + " 4: a8f5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, t5, {x_reg, extra}, << + " 0: 05e52c23 sw t5,88(a0)\n" + " 4: a8f5 j 0x100" + >>) + end), + %% Test: Native register to ptr + ?_test(begin + move_to_vm_register_test0(State0, t3, {ptr, a3}, << + " 0: 01c6a023 sw t3,0(a3)\n" + " 4: a8f5 j 0x100" + >>) + end), + %% Test: Native register to y_reg + ?_test(begin + move_to_vm_register_test0(State0, a1, {y_reg, 0}, << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 00bfa023 sw a1,0(t6)\n" + " 8: a8e5 j 0x100" + >>) + end), + %% Test: Large immediate to x_reg (uses lui + addi in RISC-V) + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f52c23 sw t6,24(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 05f52c23 sw t6,88(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 01ff2423 sw t6,8(t5)\n" + " 10: a8c5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01452f03 lw t5,20(a0)\n" + " c: 05ff2823 sw t6,80(t5)\n" + " 10: a8c5 j 0x100" + >>) + end), + %% Test: Large immediate to ptr + ?_test(begin + move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, << + " 0: 12345fb7 lui t6,0x12345\n" + " 4: 678f8f93 addi t6,t6,1656 # 0x12345678\n" + " 8: 01f6a023 sw t6,0(a3)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: x_reg to y_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, << + " 0: 05452f83 lw t6,84(a0)\n" + " 4: 01452f03 lw t5,20(a0)\n" + " 8: 07ff2e23 sw t6,124(t5)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: y_reg to x_reg (high index) + ?_test(begin + move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)\n" + " 8: 05f52a23 sw t6,84(a0)\n" + " c: a8d5 j 0x100" + >>) + end), + %% Test: Large y_reg index (32) that exceeds str immediate offset limit + ?_test(begin + move_to_vm_register_test0(State0, 42, {y_reg, 32}, << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 08000e93 li t4,128\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)\n" + " 12: a0fd j 0x100" + >>) + end), + %% Test: Negative immediate to x_reg + ?_test(begin + move_to_vm_register_test0(State0, -1, {x_reg, 0}, << + " 0: 5ffd li t6,-1\n" + " 2: 01f52c23 sw t6,24(a0)\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -100, {x_reg, 0}, << + " 0: f9c00f93 li t6,-100\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + move_to_vm_register_test0(State0, -1000, {x_reg, 0}, << + " 0: c1800f93 li t6,-1000\n" + " 4: 01f52c23 sw t6,24(a0)\n" + " 8: a8e5 j 0x100" + >>) + end) + ] + end}. + +move_array_element_test0(State, Reg, Index, Dest, Dump) -> + State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +move_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 2, {x_reg, 0}, << + " 0: 0086af83 lw t6,8(a3)\n" + " 4: 01f52c23 sw t6,24(a0)" + >>) + end), + %% move_array_element: reg[x] to ptr + ?_test(begin + move_array_element_test0(State0, a3, 3, {ptr, t4}, << + " 0: 00c6af83 lw t6,12(a3)\n" + " 4: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 1, {y_reg, 2}, << + " 0: 0046af03 lw t5,4(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 01efa423 sw t5,8(t6)" + >>) + end), + %% move_array_element: reg[x] to native reg (t4) + ?_test(begin + move_array_element_test0(State0, a3, 1, t4, << + " 0: 0046ae83 lw t4,4(a3)" + >>) + end), + %% move_array_element: reg[x] to y_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {y_reg, 31}, << + " 0: 01c6af03 lw t5,28(a3)\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 07efae23 sw t5,124(t6)" + >>) + end), + %% move_array_element: reg[x] to x_reg + ?_test(begin + move_array_element_test0(State0, a3, 7, {x_reg, 15}, << + " 0: 01c6af83 lw t6,28(a3)\n" + " 4: 05f52a23 sw t6,84(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to x_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 03f52023 sw t6,32(a0)" + >>) + end), + %% move_array_element: reg_x[reg_y] to pointer (large x reg) + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01fea023 sw t6,0(t4)" + >>) + end), + %% move_array_element: reg_x[reg_y] to y_reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4), + move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, << + " 0: 0106af83 lw t6,16(a3)\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 01f68fb3 add t6,a3,t6\n" + " a: 000faf83 lw t6,0(t6)\n" + " e: 01452f03 lw t5,20(a0)\n" + " 12: 07ff2e23 sw t6,124(t5)" + >>) + end), + %% move_array_element with integer index and x_reg destination + ?_test(begin + {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 008faf03 lw t5,8(t6)\n" + " 8: 03e52623 sw t5,44(a0)" + >>) + end) + ] + end}. + +get_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% get_array_element: reg[x] to new native reg + ?_test(begin + {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 010e2f83 lw t6,16(t3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg) + end) + ] + end}. + +move_to_array_element_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_array_element/4: x_reg to reg[x] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: x_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: ptr to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000faf83 lw t6,0(t6)\n" + " 4: 8f72 mv t5,t3\n" + " 6: 0f0a slli t5,t5,0x2\n" + " 8: 01e68f33 add t5,a3,t5\n" + " c: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/4: y_reg to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 008f2f83 lw t6,8(t5)\n" + " 8: 8f72 mv t5,t3\n" + " a: 0f0a slli t5,t5,0x2\n" + " c: 01e68f33 add t5,a3,t5\n" + " 10: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01f6a423 sw t6,8(a3)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: x_reg to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_array_element/5: imm to reg[x+offset] + ?_test(begin + State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]), + State2 = setelement(7, State1, [a3, t3]), + [a3, t3] = ?BACKEND:used_regs(State2), + State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1), + Stream = ?BACKEND:stream(State3), + Dump = << + " 0: 02a00f93 li t6,42\n" + " 4: 001e0f13 addi t5,t3,1\n" + " 8: 0f0a slli t5,t5,0x2\n" + " a: 01e68f33 add t5,a3,t5\n" + " e: 01ff2023 sw t6,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +move_to_native_register_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + %% move_to_native_register/2: imm + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02a00f93 li t6,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: negative value + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: fd600f93 li t6,-42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -255 (boundary case) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0100f93 li t6,-255" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: -256 (boundary case, fits in immediate for RISC-V) + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256), + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(t6, Reg), + Dump = << + " 0: f0000f93 li t6,-256\n" + " 4: a8f5 j 0x100" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {ptr, reg} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, t5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t5, Reg), + Dump = << + " 0: 000f2f03 lw t5,0(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {x_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 02c52f83 lw t6,44(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/2: {y_reg, N} + ?_test(begin + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}), + Stream = ?BACKEND:stream(State1), + ?assertEqual(t6, Reg), + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 00cf2f83 lw t6,12(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: imm to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, 42, t5), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 02a00f13 li t5,42" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: reg to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, t6, t4), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 8efe mv t4,t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {ptr, reg} to reg + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 000fae03 lw t3,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {x_reg, x} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 5114 lw a3,32(a0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% move_to_native_register/3: {y_reg, y} to reg[reg] + ?_test(begin + State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 01452f83 lw t6,20(a0)\n" + " 4: 008fa583 lw a1,8(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + %% Test: ptr with offset to fp_reg (term_to_float) + ?_test(begin + {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + State2 = ?BACKEND:move_to_vm_register( + State1, {free, {ptr, RegA, 1}}, {fp_reg, 3} + ), + Stream = ?BACKEND:stream(State2), + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 06052f03 lw t5,96(a0)\n" + " 8: 004fae83 lw t4,4(t6)\n" + " c: 01df2c23 sw t4,24(t5)\n" + " 10: 008fae83 lw t4,8(t6)\n" + " 14: 01df2e23 sw t4,28(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +add_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:add(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +add_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + add_test0(State0, a2, 2, << + " 0: 0609 addi a2,a2,2\n" + " 2: a8fd j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 967e add a2,a2,t6\n" + " 6: a8ed j 0x100" + >>) + end), + ?_test(begin + add_test0(State0, a2, a3, << + " 0: 9636 add a2,a2,a3\n" + " 2: a8fd j 0x100" + >>) + end) + ] + end}. + +sub_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:sub(State0, Reg, Imm), + % Force emission of literal pool + State2 = ?BACKEND:jump_to_offset(State1, 16#100), + Stream = ?BACKEND:stream(State2), + ?assertEqual(dump_to_bin(Dump), Stream). + +sub_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + sub_test0(State0, a2, 2, << + " 0: 1679 addi a2,a2,-2\n" + " 2: a8fd j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, 256, << + " 0: 10000f93 li t6,256\n" + " 4: 41f60633 sub a2,a2,t6\n" + " 8: a8e5 j 0x100" + >>) + end), + ?_test(begin + sub_test0(State0, a2, a3, << + " 0: 8e15 sub a2,a2,a3\n" + " 2: a8fd j 0x100" + >>) + end) + ] + end}. + +mul_test0(State0, Reg, Imm, Dump) -> + State1 = ?BACKEND:mul(State0, Reg, Imm), + Stream = ?BACKEND:stream(State1), + ?assertEqual(dump_to_bin(Dump), Stream). + +mul_test_() -> + {setup, + fun() -> + ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)) + end, + fun(State0) -> + [ + ?_test(begin + mul_test0(State0, a2, 2, << + " 0: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 3, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 4, << + " 0: 060a slli a2,a2,0x2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 5, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 6, << + " 0: 00161f93 slli t6,a2,0x1\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 7, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 40cf8633 sub a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 8, << + " 0: 060e slli a2,a2,0x3" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 9, << + " 0: 00361f93 slli t6,a2,0x3\n" + " 4: 00cf8633 add a2,t6,a2" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 10, << + " 0: 00261f93 slli t6,a2,0x2\n" + " 4: 00cf8633 add a2,t6,a2\n" + " 8: 0606 slli a2,a2,0x1" + >>) + end), + ?_test(begin + mul_test0(State0, a2, 11, << + " 0: 4fad li t6,11\n" + " 2: 03f60633 mul a2,a2,t6" + >>) + end) + ] + end}. + +%% Test set_args1 with y_reg pattern +set_args1_y_reg_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1 + % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}]) + % but with {y_reg, 5} instead of {free, Src} + {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [ + {y_reg, 5} + ]), + + Stream = ?BACKEND:stream(State1), + % Expected disassembly for loading from y_reg and calling primitive + Dump = << + " 0: 04300f93 li t6,67\n" + " 4: 0f8a slli t6,t6,0x2\n" + " 6: 9fb2 add t6,t6,a2\n" + " 8: 000faf83 lw t6,0(t6)\n" + " c: 1141 addi sp,sp,-16\n" + " e: c006 sw ra,0(sp)\n" + " 10: c22a sw a0,4(sp)\n" + " 12: c42e sw a1,8(sp)\n" + " 14: c632 sw a2,12(sp)\n" + " 16: 01452f03 lw t5,20(a0)\n" + " 1a: 014f2503 lw a0,20(t5)\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read (Y=123, offset=492, exceeds immediate limit) +large_y_reg_read_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move from a large Y register (123 * 4 = 492 bytes, exceeds immediate limit) + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp register for large offset + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 1ec00f93 li t6,492\n" + " 8: 9ffa add t6,t6,t5\n" + " a: 000faf83 lw t6,0(t6)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test large Y register write with immediate value +large_y_reg_write_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Move immediate to a large Y register (123 * 4 = 492 bytes) + State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 123}), + Stream = ?BACKEND:stream(State1), + % Expected: uses helper with temp registers for large offset + Dump = << + " 0: 02a00f13 li t5,42\n" + " 4: 01452f83 lw t6,20(a0)\n" + " 8: 1ec00e93 li t4,492\n" + " c: 9efe add t4,t4,t6\n" + " e: 01eea023 sw t5,0(t4)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test large Y register read with limited registers (uses IP_REG fallback) +large_y_reg_read_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper) + {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback + {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t0+t1 fallback sequence when temps are exhausted + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452283 lw t0,20(a0)\n" + " 18: 08c00313 li t1,140\n" + " 1c: 9316 add t1,t1,t0\n" + " 1e: 00032303 lw t1,0(t1)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t1, ResultReg). + +%% Test large Y register write with register exhaustion (uses t1/t0 fallback) +large_y_reg_write_register_exhaustion_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Get a source register first + {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + % Allocate most remaining registers to simulate exhaustion + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + % Try to write to large Y register when only one temp register is available + StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}), + Stream = ?BACKEND:stream(StateFinal), + % Expected: uses t1/t0 fallback sequence + Dump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 01452303 lw t1,20(a0)\n" + " 18: 0c800293 li t0,200\n" + " 1c: 929a add t0,t0,t1\n" + " 1e: 01f2a023 sw t6,0(t0)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing) +y_reg_boundary_direct_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}), + Stream = ?BACKEND:stream(State1), + % Expected: uses direct addressing since 31 * 4 = 124 < 2048 + Dump = << + " 0: 01452f03 lw t5,20(a0)\n" + " 4: 07cf2f83 lw t6,124(t5)" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t6, Reg). + +%% Test debugger function +debugger_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:debugger(State0), + Stream = ?BACKEND:stream(State1), + Dump = << + " 0: 9002 ebreak" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +and_register_exhaustion_negative_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test negative immediate (-4) which should use NOT+AND with t0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, t6, -4), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 428d li t0,3\n" + " 1a: fff2c293 not t0,t0\n" + " 1e: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +and_register_exhaustion_positive_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Allocate all available registers to simulate register exhaustion + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + % Test positive immediate (0x3F) which should use AND with t0 as temp + StateResult = ?BACKEND:and_(StateNoRegs, t6, 16#3F), + Stream = ?BACKEND:stream(StateResult), + ExpectedDump = << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 03f00293 li t0,63\n" + " 1c: 005fffb3 and t6,t6,t0" + >>, + ?assertEqual(dump_to_bin(ExpectedDump), Stream). + +jump_table_large_labels_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 512), + Stream = ?BACKEND:stream(State1), + % RISC-V: Each jump table entry is 8 bytes (AUIPC + JALR) + ?assertEqual((512 + 1) * 8, byte_size(Stream)). + +alloc_boxed_integer_fragment_small_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 42} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State1), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 02a00593 li a1,42\n" + " 12: 4601 li a2,0\n" + " 14: 9f82 jalr t6\n" + " 16: 8faa mv t6,a0\n" + " 18: 4082 lw ra,0(sp)\n" + " 1a: 4512 lw a0,4(sp)\n" + " 1c: 45a2 lw a1,8(sp)\n" + " 1e: 4632 lw a2,12(sp)\n" + " 20: 0141 addi sp,sp,16" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +alloc_boxed_integer_fragment_large_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [ + ctx, {avm_int64_t, 16#123456789ABCDEF0} + ]), + % Add a call primitive last to emit literal pool + State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [ + ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg} + ]), + ?assertEqual(t6, ResultReg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 03c62f83 lw t6,60(a2)\n" + " 4: 1141 addi sp,sp,-16\n" + " 6: c006 sw ra,0(sp)\n" + " 8: c22a sw a0,4(sp)\n" + " a: c42e sw a1,8(sp)\n" + " c: c632 sw a2,12(sp)\n" + " e: 9abce5b7 lui a1,0x9abce\n" + " 12: ef058593 addi a1,a1,-272 # 0x9abcdef0\n" + " 16: 12345637 lui a2,0x12345\n" + " 1a: 67860613 addi a2,a2,1656 # 0x12345678\n" + " 1e: 9f82 jalr t6\n" + " 20: 8faa mv t6,a0\n" + " 22: 4082 lw ra,0(sp)\n" + " 24: 4512 lw a0,4(sp)\n" + " 26: 45a2 lw a1,8(sp)\n" + " 28: 4632 lw a2,12(sp)\n" + " 2a: 0141 addi sp,sp,16\n" + " 2c: 04c62f03 lw t5,76(a2)\n" + " 30: 03000613 li a2,48\n" + " 34: 28b00693 li a3,651\n" + " 38: 877e mv a4,t6\n" + " 3a: 8f02 jr t5" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for stack alignment issue in call_func_ptr +%% RISC-V maintains 16-byte stack alignment (RISC-V calling convention) +call_func_ptr_stack_alignment_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, _ResultReg} = ?BACKEND:call_func_ptr(State4, {free, t3}, [42]), + Stream = ?BACKEND:stream(State5), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 1101 addi sp,sp,-32\n" + " 12: c006 sw ra,0(sp)\n" + " 14: c22a sw a0,4(sp)\n" + " 16: c42e sw a1,8(sp)\n" + " 18: c632 sw a2,12(sp)\n" + " 1a: c876 sw t4,16(sp)\n" + " 1c: ca7a sw t5,20(sp)\n" + " 1e: cc7e sw t6,24(sp)\n" + " 20: 02a00513 li a0,42\n" + " 24: 9e02 jalr t3\n" + " 26: 8e2a mv t3,a0\n" + " 28: 4082 lw ra,0(sp)\n" + " 2a: 4512 lw a0,4(sp)\n" + " 2c: 45a2 lw a1,8(sp)\n" + " 2e: 4632 lw a2,12(sp)\n" + " 30: 4ec2 lw t4,16(sp)\n" + " 32: 4f52 lw t5,20(sp)\n" + " 34: 4fe2 lw t6,24(sp)\n" + " 36: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +%% Test for register exhaustion issue in call_func_ptr with 5+ arguments +%% When all registers are used and we call a function with 5+ args, +%% set_args needs temporary registers but none are available +call_func_ptr_register_exhaustion_test_() -> + {setup, + fun() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + + % Allocate all available registers to simulate register pressure + {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}), + {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}), + {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}), + {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}), + {State6, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}), + State6 + end, + fun(State6) -> + [ + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 3, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 468d li a3,3\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, 1, t1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 4685 li a3,1\n" + " 2e: 871a mv a4,t1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, t5}, + [ctx, jit_state, {free, t2}, t1, 1] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: 1101 addi sp,sp,-32\n" + " 1a: c006 sw ra,0(sp)\n" + " 1c: c22a sw a0,4(sp)\n" + " 1e: c42e sw a1,8(sp)\n" + " 20: c632 sw a2,12(sp)\n" + " 22: c81a sw t1,16(sp)\n" + " 24: ca72 sw t3,20(sp)\n" + " 26: cc76 sw t4,24(sp)\n" + " 28: ce7e sw t6,28(sp)\n" + " 2a: 861e mv a2,t2\n" + " 2c: 869a mv a3,t1\n" + " 2e: 4705 li a4,1\n" + " 30: 9f02 jalr t5\n" + " 32: 8f2a mv t5,a0\n" + " 34: 4082 lw ra,0(sp)\n" + " 36: 4512 lw a0,4(sp)\n" + " 38: 45a2 lw a1,8(sp)\n" + " 3a: 4632 lw a2,12(sp)\n" + " 3c: 4342 lw t1,16(sp)\n" + " 3e: 4e52 lw t3,20(sp)\n" + " 40: 4ee2 lw t4,24(sp)\n" + " 42: 4ff2 lw t6,28(sp)\n" + " 44: 02010113 addi sp,sp,32" + >>, + ?assertEqual(dump_to_bin(Dump), Stream), + ?assertEqual(t5, ResultReg) + end), + ?_test(begin + {State7, _ResultReg} = ?BACKEND:call_func_ptr( + State6, + {free, a1}, + [t5, a3] + ), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07a sw t5,32(sp)\n" + " 2e: d27e sw t6,36(sp)\n" + " 30: 832e mv t1,a1\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: c42a sw a0,8(sp)\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f02 lw t5,32(sp)\n" + " 4c: 5f92 lw t6,36(sp)\n" + " 4e: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + {State7, ResultReg} = ?BACKEND:call_func_ptr( + State6, + {primitive, 2}, + [{free, t5}, a3] + ), + ?assertEqual(ResultReg, t5), + Stream = ?BACKEND:stream(State7), + Dump = + << + " 0: 01852f83 lw t6,24(a0)\n" + " 4: 01c52f03 lw t5,28(a0)\n" + " 8: 02052e83 lw t4,32(a0)\n" + " c: 02452e03 lw t3,36(a0)\n" + " 10: 02852383 lw t2,40(a0)\n" + " 14: 02c52303 lw t1,44(a0)\n" + " 18: fd010113 addi sp,sp,-48\n" + " 1c: c006 sw ra,0(sp)\n" + " 1e: c22a sw a0,4(sp)\n" + " 20: c42e sw a1,8(sp)\n" + " 22: c632 sw a2,12(sp)\n" + " 24: c81a sw t1,16(sp)\n" + " 26: ca1e sw t2,20(sp)\n" + " 28: cc72 sw t3,24(sp)\n" + " 2a: ce76 sw t4,28(sp)\n" + " 2c: d07e sw t6,32(sp)\n" + " 2e: 00862303 lw t1,8(a2)\n" + " 32: 857a mv a0,t5\n" + " 34: 85b6 mv a1,a3\n" + " 36: 9302 jalr t1\n" + " 38: 8f2a mv t5,a0\n" + " 3a: 4082 lw ra,0(sp)\n" + " 3c: 4512 lw a0,4(sp)\n" + " 3e: 45a2 lw a1,8(sp)\n" + " 40: 4632 lw a2,12(sp)\n" + " 42: 4342 lw t1,16(sp)\n" + " 44: 43d2 lw t2,20(sp)\n" + " 46: 4e62 lw t3,24(sp)\n" + " 48: 4ef2 lw t4,28(sp)\n" + " 4a: 5f82 lw t6,32(sp)\n" + " 4c: 03010113 addi sp,sp,48" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ] + end}. + +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test_() -> + [ + ?_test(begin + % Test 1: jump_to_continuation at offset 0 + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}), + Stream = ?BACKEND:stream(State1), + % Expected: riscv32 PIC sequence + Dump = + << + " 0: 00000f97 auipc t6,0x0\n" + " 4: 9faa add t6,t6,a0\n" + " 6: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + % Test 2: jump_to_continuation after jump table (non-zero relative address) + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + % Generate a jump table for 3 labels (4 entries * 8 bytes = 32 bytes) + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:jump_to_continuation(State1, {free, a0}), + Stream = ?BACKEND:stream(State2), + % Expected: jump table (32 bytes) + jump_to_continuation + % NetOffset = 0 - 32 = -32 (0xFFFFFFE0) + Dump = + << + " 0: ffffffff .insn 4, 0xffffffff\n" + " 4: ffffffff .insn 4, 0xffffffff\n" + " 8: ffffffff .insn 4, 0xffffffff\n" + " c: ffffffff .insn 4, 0xffffffff\n" + " 10: ffffffff .insn 4, 0xffffffff\n" + " 14: ffffffff .insn 4, 0xffffffff\n" + " 18: ffffffff .insn 4, 0xffffffff\n" + " 1c: ffffffff .insn 4, 0xffffffff\n" + " 20: 00000f97 auipc t6,0x0\n" + " 24: 1f81 addi t6,t6,-32 # 0x0\n" + " 26: 9faa add t6,t6,a0\n" + " 28: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. + +%% Mimic part of add.beam +add_beam_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_table(State0, 3), + State2 = ?BACKEND:add_label(State1, 1), + State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}), + State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}), + State5 = ?BACKEND:call_only_or_schedule_next(State4, 2), + State6 = ?BACKEND:add_label(State5, 2), + {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [ + ctx, jit_state, 1, 0, 1 + ]), + State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) -> + ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset]) + end), + State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + State10 = ?BACKEND:call_or_schedule_next(State9, 3), + State11 = ?BACKEND:add_label(State10, 3), + State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [ + ctx, jit_state + ]), + % OP_INT_CALL_END + State13 = ?BACKEND:add_label(State12, 0), + State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]), + State15 = ?BACKEND:update_branches(State14), + Stream = ?BACKEND:stream(State15), + Dump = + << + % jump table (new 8-byte format) + " 0: 00000697 auipc a3,0x0\n" + " 4: 0e068067 jr 224(a3) # 0xe0\n" + " 8: 00000697 auipc a3,0x0\n" + " c: 01868067 jr 24(a3) # 0x20\n" + " 10: 00000697 auipc a3,0x0\n" + " 14: 04868067 jr 72(a3) # 0x58\n" + " 18: 00000697 auipc a3,0x0\n" + " 1c: 0c268067 jr 194(a3) # 0xda\n" + % label 1 + % {move,{integer,9},{x,1}}. + " 20: 09f00f93 li t6,159\n" + " 24: 01f52e23 sw t6,28(a0)\n" + % {move,{integer,8},{x,0}} + " 28: 08f00f93 li t6,143\n" + " 2c: 01f52c23 sw t6,24(a0)\n" + % {call_only,2,{f,2}}. + " 30: 0085af83 lw t6,8(a1)\n" + " 34: 1ffd addi t6,t6,-1\n" + " 36: 01f5a423 sw t6,8(a1)\n" + " 3a: 000f8663 beqz t6,0x46\n" + " 3e: a829 j 0x58\n" + " 40: 0001 nop\n" + " 42: 00000013 nop\n" + " 46: 00000f97 auipc t6,0x0\n" + " 4a: 0fd1 addi t6,t6,20 # 0x5a\n" + " 4c: 0001 nop\n" + " 4e: 01f5a223 sw t6,4(a1)\n" + " 52: 00862f83 lw t6,8(a2)\n" + " 56: 8f82 jr t6\n" + % label 2 + % {allocate,1,1}. + " 58: 01462f83 lw t6,20(a2)\n" + " 5c: 1141 addi sp,sp,-16\n" + " 5e: c006 sw ra,0(sp)\n" + " 60: c22a sw a0,4(sp)\n" + " 62: c42e sw a1,8(sp)\n" + " 64: c632 sw a2,12(sp)\n" + " 66: 4605 li a2,1\n" + " 68: 4681 li a3,0\n" + " 6a: 4705 li a4,1\n" + " 6c: 9f82 jalr t6\n" + " 6e: 8faa mv t6,a0\n" + " 70: 4082 lw ra,0(sp)\n" + " 72: 4512 lw a0,4(sp)\n" + " 74: 45a2 lw a1,8(sp)\n" + " 76: 4632 lw a2,12(sp)\n" + " 78: 0141 addi sp,sp,16\n" + " 7a: 01ff9f13 slli t5,t6,0x1f\n" + " 7e: 000f4763 bltz t5,0x8c\n" + " 82: 01862f83 lw t6,24(a2)\n" + " 86: 08600613 li a2,134\n" + " 8a: 8f82 jr t6\n" + % {init_yregs,{list,[{y,0}]}}. + %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}), + " 8c: 03b00f13 li t5,59\n" + " 90: 01452f83 lw t6,20(a0)\n" + " 94: 01efa023 sw t5,0(t6)\n" + % {call,1,{f,3}} + %% call_or_schedule_next(State9, 3), + " 98: 0005af03 lw t5,0(a1)\n" + " 9c: 000f2f03 lw t5,0(t5)\n" + " a0: 0f62 slli t5,t5,0x18\n" + " a2: 36800f93 li t6,872\n" + " a6: 00000013 nop\n" + " aa: 01ff6f33 or t5,t5,t6\n" + " ae: 05e52e23 sw t5,92(a0)\n" + " b2: 0085af83 lw t6,8(a1)\n" + " b6: 1ffd addi t6,t6,-1\n" + " b8: 01f5a423 sw t6,8(a1)\n" + " bc: 000f8663 beqz t6,0xc8\n" + " c0: a829 j 0xda\n" + " c2: 0001 nop\n" + " c4: 00000013 nop\n" + " c8: 00000f97 auipc t6,0x0\n" + " cc: 0fd1 addi t6,t6,20 # 0xdc\n" + " ce: 0001 nop\n" + " d0: 01f5a223 sw t6,4(a1)\n" + " d4: 00862f83 lw t6,8(a2)\n" + " d8: 8f82 jr t6\n" + %% (continuation) + % label 3 + " da: 00462f83 lw t6,4(a2)\n" + " de: 8f82 jr t6\n" + % label 0 + " e0: 00462f83 lw t6,4(a2)\n" + " e4: 8f82 jr t6" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + +dump_to_bin(Dump) -> + dump_to_bin0(Dump, addr, []). + +-define(IS_HEX_DIGIT(C), + ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F)) +). + +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<>, addr, Acc) when ?IS_HEX_DIGIT(N) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, hex, Acc); +%% Handle RISC-V 32-bit instructions (8 consecutive hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + %% RISC-V instructions are 32-bit little-endian + Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +%% Handle 32-bits undefined instruction (ARM format with space: "1234 5678") +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) andalso + ?IS_HEX_DIGIT(H5) andalso + ?IS_HEX_DIGIT(H6) andalso + ?IS_HEX_DIGIT(H7) andalso + ?IS_HEX_DIGIT(H8) +-> + InstrA = list_to_integer([H1, H2, H3, H4], 16), + InstrB = list_to_integer([H5, H6, H7, H8], 16), + dump_to_bin0(Rest, instr, [<>, <> | Acc]); +%% Handle 16-bit ARM32 Thumb instructions (4 hex digits) +dump_to_bin0(<>, hex, Acc) when + (Sp =:= $\t orelse Sp =:= $\s) andalso + ?IS_HEX_DIGIT(H1) andalso + ?IS_HEX_DIGIT(H2) andalso + ?IS_HEX_DIGIT(H3) andalso + ?IS_HEX_DIGIT(H4) +-> + %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction) + Instr = list_to_integer([H1, H2, H3, H4], 16), + dump_to_bin0(Rest, instr, [<> | Acc]); +dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, addr, Acc); +dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) -> + dump_to_bin0(Tail, instr, Acc); +dump_to_bin0(<<>>, _, Acc) -> + list_to_binary(lists:reverse(Acc)). diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl index 72a356ae3c..56206f7eac 100644 --- a/tests/libs/jit/jit_tests.erl +++ b/tests/libs/jit/jit_tests.erl @@ -62,13 +62,73 @@ <<0, 0, 0, 3, 0, 0, 0, 2, 15, 255, 0, 16>> ). -compile_minimal_x86_64_test() -> +% Code chunk from bool_min2.erl - tests tail-call cache optimization +% This module has multiple return opcodes which trigger the tail-call cache: +% - The first return creates a cached implementation +% - Subsequent returns use jump_to_offset to jump back to the cached code +-define(CODE_CHUNK_3, + <<16#00, 16#00, 16#00, 16#10, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#B2, 16#00, + 16#00, 16#00, 16#09, 16#00, 16#00, 16#00, 16#03, 16#01, 16#10, 16#99, 16#10, 16#02, 16#12, + 16#22, 16#00, 16#01, 16#20, 16#0C, 16#10, 16#00, 16#AC, 16#17, 16#10, 16#04, 16#40, 16#32, + 16#23, 16#40, 16#32, 16#33, 16#40, 16#32, 16#13, 16#40, 16#42, 16#43, 16#40, 16#32, 16#03, + 16#99, 16#20, 16#04, 16#50, 16#45, 16#04, 16#10, 16#65, 16#40, 16#03, 16#04, 16#40, 16#42, + 16#23, 16#40, 16#42, 16#33, 16#40, 16#32, 16#13, 16#40, 16#42, 16#43, 16#40, 16#42, 16#03, + 16#99, 16#30, 16#04, 16#50, 16#45, 16#04, 16#10, 16#65, 16#99, 16#20, 16#7D, 16#05, 16#10, + 16#00, 16#57, 16#04, 16#10, 16#57, 16#03, 16#10, 16#03, 16#12, 16#10, 16#13, 16#01, 16#30, + 16#99, 16#40, 16#02, 16#12, 16#72, 16#50, 16#01, 16#40, 16#99, 16#50, 16#0B, 16#05, 16#10, + 16#03, 16#13, 16#03, 16#0B, 16#05, 16#10, 16#23, 16#33, 16#13, 16#0B, 16#05, 16#20, 16#57, + 16#03, 16#20, 16#57, 16#13, 16#20, 16#03, 16#0A, 16#05, 16#30, 16#43, 16#13, 16#0B, 16#05, + 16#20, 16#57, 16#03, 16#20, 16#57, 16#13, 16#20, 16#03, 16#13, 16#01, 16#50, 16#99, 16#60, + 16#02, 16#12, 16#B2, 16#10, 16#01, 16#60, 16#3B, 16#03, 16#55, 16#17, 16#40, 16#32, 16#85, + 16#42, 16#75, 16#01, 16#70, 16#40, 16#11, 16#03, 16#13, 16#01, 16#80, 16#40, 16#01, 16#03, + 16#13, 16#03>> +). +-define(ATU8_CHUNK_3, + <<16#FF, 16#FF, 16#FF, 16#F5, 16#90, 16#62, 16#6F, 16#6F, 16#6C, 16#5F, 16#6D, 16#69, 16#6E, + 16#32, 16#50, 16#73, 16#74, 16#61, 16#72, 16#74, 16#50, 16#66, 16#61, 16#6C, 16#73, 16#65, + 16#40, 16#74, 16#72, 16#75, 16#65, 16#60, 16#65, 16#72, 16#6C, 16#61, 16#6E, 16#67, 16#10, + 16#2B, 16#10, 16#66, 16#30, 16#61, 16#6E, 16#64, 16#20, 16#6F, 16#72, 16#30, 16#6E, 16#6F, + 16#74, 16#B0, 16#6F, 16#6E, 16#65, 16#5F, 16#69, 16#66, 16#5F, 16#74, 16#72, 16#75, 16#65>> +). +-define(TYPE_CHUNK_3, + <<16#00, 16#00, 16#00, 16#03, 16#00, 16#00, 16#00, 16#03, 16#0F, 16#FF, 16#30, 16#20, 16#00, + 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, + 16#00, 16#01, 16#00, 16#01>> +). +-define(LINE_CHUNK_3, + <<16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#07, 16#00, + 16#00, 16#00, 16#06, 16#00, 16#00, 16#00, 16#00, 16#41, 16#51, 16#61, 16#81, 16#91, 16#B1>> +). + +-ifdef(JIT_DWARF). +compile_stream_setup(CodeChunk) -> + Stream0 = jit_dwarf:new(jit_x86_64, test_module, jit_stream_binary, 0), + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, + Stream1 = jit_dwarf:append( + Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) + ), + Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_dwarf, Stream1), + {LabelsCount, Stream2}. + +compile_stream_finalize(Stream3) -> + DwarfStream = jit_x86_64:stream(Stream3), + jit_dwarf:stream(DwarfStream). +-else. +compile_stream_setup(CodeChunk) -> Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_0, + <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk, Stream1 = jit_stream_binary:append( Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) ), Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), + {LabelsCount, Stream2}. + +compile_stream_finalize(Stream3) -> + jit_x86_64:stream(Stream3). +-endif. + +compile_minimal_x86_64_test() -> + {LabelsCount, Stream2} = compile_stream_setup(?CODE_CHUNK_0), {_LabelsCount, Stream3} = jit:compile( ?CODE_CHUNK_0, fun(_) -> undefined end, @@ -77,7 +137,7 @@ compile_minimal_x86_64_test() -> jit_x86_64, Stream2 ), - Stream4 = jit_x86_64:stream(Stream3), + Stream4 = compile_stream_finalize(Stream3), <<16:32, LabelsCount:32, ?JIT_FORMAT_VERSION:16, 1:16, ?JIT_ARCH_X86_64:16, ?JIT_VARIANT_PIC:16, 0:32, Code/binary>> = Stream4, {JumpTable, _} = split_binary(Code, (LabelsCount + 1) * 5), @@ -105,24 +165,27 @@ check_labels_table0(N, <>) -> check_labels_table0 check_lines_table(<>) -> ok. -term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> - % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization - Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_1, - Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) - ), - Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), +backend_to_arch(jit_x86_64) -> ?JIT_ARCH_X86_64; +backend_to_arch(jit_aarch64) -> ?JIT_ARCH_AARCH64; +backend_to_arch(jit_armv6m) -> ?JIT_ARCH_ARMV6M. + +compile_stream_for_backend(Backend, CodeChunk, AtomChunk, TypeChunk) -> + {LabelsCount, Stream2} = compile_stream_setup(CodeChunk), - AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_1), + AtomResolver = jit_precompile:atom_resolver(AtomChunk), LiteralResolver = fun(_) -> test_literal end, - TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_1), + TypeResolver = jit_precompile:type_resolver(TypeChunk), % Compile with typed register support - {_LabelsCount, Stream3} = jit:compile( - ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 + {LabelsCount, Stream3} = jit:compile( + CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2 + ), + compile_stream_finalize(Stream3). + +term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> + CompiledCode = compile_stream_for_backend( + jit_x86_64, ?CODE_CHUNK_1, ?ATU8_CHUNK_1, ?TYPE_CHUNK_1 ), - CompiledCode = jit_x86_64:stream(Stream3), % Check the reading of x[1] is immediatly followed by a shift right. % 15c: 4c 8b 5f 38 mov 0x38(%rdi),%r11 @@ -183,23 +246,9 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() -> ok. verify_is_function_typed_optimization_x86_64_test() -> - % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization - Stream0 = jit_stream_binary:new(0), - <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_2, - Stream1 = jit_stream_binary:append( - Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC) - ), - Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1), - - AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_2), - LiteralResolver = fun(_) -> test_literal end, - TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_2), - - % Compile with typed register support - {_LabelsCount, Stream3} = jit:compile( - ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2 + CompiledCode = compile_stream_for_backend( + jit_x86_64, ?CODE_CHUNK_2, ?ATU8_CHUNK_2, ?TYPE_CHUNK_2 ), - CompiledCode = jit_x86_64:stream(Stream3), % Check that call to allocate is directly followed by the building the cp % for call @@ -250,3 +299,36 @@ verify_is_function_typed_optimization_x86_64_test() -> ) ), ok. + +tail_call_cache_armv6m_test() -> + CompiledCode = compile_stream_for_backend( + jit_armv6m, ?CODE_CHUNK_3, ?ATU8_CHUNK_3, ?TYPE_CHUNK_3 + ), + + % Check that we have the following pattern: + % 8c: 278c movs r7, #140 @ 0x8c + % 8e: 6816 ldr r6, [r2, #0] + % 90: 463a mov r2, r7 + % 92: 4b02 ldr r3, [pc, #8] @ (0x9c) + % 94: 9f05 ldr r7, [sp, #20] + % 96: 9605 str r6, [sp, #20] + % 98: 46be mov lr, r7 + + % Check for the first return implementation (call_primitive_last for PRIM_RETURN) + ?assertMatch( + {_, _}, + binary:match( + CompiledCode, + <<16#278c:16/little, 16#6816:16/little, 16#463a:16/little, 16#4b02:16/little, + 16#9f05:16/little, 16#9605:16/little, 16#46be:16/little>> + ) + ), + + % Check for tail-call cache jump: ldr r7, [pc, #0] followed by b.n (backward branch) + % 29c: 4f00 ldr r7, [pc, #0] @ (0x2a0) + % 29e: e6f5 b.n 0x8c + ?assertMatch( + {_, _}, + binary:match(CompiledCode, <<16#4f00:16/little, 16#e6f5:16/little>>) + ), + ok. diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl index cfabfcf15f..cf989e746d 100644 --- a/tests/libs/jit/jit_tests_common.erl +++ b/tests/libs/jit/jit_tests_common.erl @@ -77,6 +77,8 @@ asm(Arch, Bin, Str) -> find_binutils(Arch) -> ArchStr = atom_to_list(Arch), BinutilsList = [ + {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"}, + {ArchStr ++ "-unknown-elf-as", ArchStr ++ "-unknown-elf-objdump"}, {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"}, {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"}, {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"} @@ -104,6 +106,8 @@ get_asm_header(arm) -> get_asm_header(aarch64) -> ".text\n"; get_asm_header(x86_64) -> + ".text\n"; +get_asm_header(riscv32) -> ".text\n". %% Get architecture-specific assembler flags @@ -113,7 +117,9 @@ get_as_flags(arm) -> get_as_flags(aarch64) -> ""; get_as_flags(x86_64) -> - "--64". + "--64"; +get_as_flags(riscv32) -> + "-march=rv32imac". %% Parse objdump output lines and extract binary data -spec asm_lines([binary()], binary(), atom()) -> binary(). diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl index abdb0d6773..9aa86b6427 100644 --- a/tests/libs/jit/jit_x86_64_tests.erl +++ b/tests/libs/jit/jit_x86_64_tests.erl @@ -820,17 +820,35 @@ if_else_block_test() -> >>, ?assertEqual(dump_to_bin(Dump), Stream). -shift_right_test() -> - State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), - {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), - State2 = ?BACKEND:shift_right(State1, Reg, 3), - Stream = ?BACKEND:stream(State2), - Dump = - << - " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" - " 4: 48 c1 e8 03 shr $0x3,%rax" - >>, - ?assertEqual(dump_to_bin(Dump), Stream). +shift_right_test_() -> + [ + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 48 c1 e8 03 shr $0x3,%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end), + ?_test(begin + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}), + {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3), + ?assertNotEqual(OtherReg, Reg), + Stream = ?BACKEND:stream(State2), + Dump = + << + " 0: 48 8b 47 30 mov 0x30(%rdi),%rax\n" + " 4: 49 89 c3 mov %rax,%r11\n" + " 7: 49 c1 eb 03 shr $0x3,%r11" + >>, + ?assertEqual(dump_to_bin(Dump), Stream) + end) + ]. shift_left_test() -> State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), @@ -1559,6 +1577,21 @@ move_to_array_element_test_() -> ] end}. +%% Test jump_to_continuation optimization for intra-module returns +jump_to_continuation_test() -> + State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)), + State1 = ?BACKEND:jump_to_continuation(State0, {free, rax}), + Stream = ?BACKEND:stream(State1), + % Expected: leaq -0x7(%rip), %rax; addq %rax, %rax; jmpq *%rax + % With default offset 0, NetOffset = 0 - 0 = 0, but RIP-relative needs adjustment for instruction length + Dump = + << + " 0: 48 8d 05 f9 ff ff ff lea -0x7(%rip),%rax\n" + " 7: 48 01 c0 add %rax,%rax\n" + " a: ff e0 jmpq *%rax" + >>, + ?assertEqual(dump_to_bin(Dump), Stream). + dump_to_bin(Dump) -> dump_to_bin0(Dump, addr, []). diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl index a435ab17e0..5411862592 100644 --- a/tests/libs/jit/tests.erl +++ b/tests/libs/jit/tests.erl @@ -27,8 +27,13 @@ start() -> etest:test([ jit_tests, + jit_dwarf_tests, jit_aarch64_tests, jit_aarch64_asm_tests, + jit_armv6m_tests, + jit_armv6m_asm_tests, + jit_riscv32_tests, + jit_riscv32_asm_tests, jit_x86_64_tests, jit_x86_64_asm_tests ]). diff --git a/tests/test.c b/tests/test.c index 79aa2ec121..5db01bfff0 100644 --- a/tests/test.c +++ b/tests/test.c @@ -708,6 +708,16 @@ int test_modules_execution(bool beam, bool skip, int count, char **item) perror("Error: cannot find aarch64 directory"); return EXIT_FAILURE; } +#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M + if (chdir("armv6m") != 0) { + perror("Error: cannot find armv6m directory"); + return EXIT_FAILURE; + } +#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32 + if (chdir("riscv32") != 0) { + perror("Error: cannot find riscv32 directory"); + return EXIT_FAILURE; + } #else #error Unknown JIT target #endif