diff --git a/.github/workflows/build-and-test.yaml b/.github/workflows/build-and-test.yaml
index fcc45fd033..5a983dc3e2 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -321,7 +321,7 @@ jobs:
           cmake_opts_other: "-DAVM_DISABLE_JIT=OFF"
           jit_target_arch: "aarch64"
 
-        # armhf build
+        # armhf builds
         - os: "ubuntu-24.04"
           cc: "arm-linux-gnueabihf-gcc"
           cxx: "arm-linux-gnueabihf-g++"
@@ -336,6 +336,21 @@ jobs:
           arch: "armhf"
           library-arch: arm-linux-gnueabihf
 
+        - os: "ubuntu-24.04"
+          cc: "arm-linux-gnueabihf-gcc"
+          cxx: "arm-linux-gnueabihf-g++"
+          # -D_FILE_OFFSET_BITS=64 is required for making atomvm:posix_readdir/1 test work
+          # otherwise readdir will fail due to 64 bits inode numbers with 32 bit ino_t
+          cflags: "-mcpu=cortex-a7 -mfloat-abi=hard -O2 -mthumb -mthumb-interwork -D_FILE_OFFSET_BITS=64"
+          otp: "28"
+          elixir_version: "1.17"
+          rebar3_version: "3.24.0"
+          cmake_opts_other: "-DAVM_DISABLE_JIT=OFF -DAVM_JIT_TARGET_ARCH=armv6m -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/armhf_toolchain.cmake"
+          compiler_pkgs: "crossbuild-essential-armhf libc6-dbg:armhf zlib1g-dev:armhf libmbedtls-dev:armhf qemu-user qemu-user-binfmt binfmt-support"
+          arch: "armhf"
+          library-arch: arm-linux-gnueabihf
+          jit_target_arch: "armv6m"
+
         # s390x build
         - os: "ubuntu-24.04"
           cc: "s390x-linux-gnu-gcc"
@@ -351,6 +366,19 @@ jobs:
           arch: "s390x"
           library-arch: s390x-linux-gnu
 
+        # riscv32-ilp32 build
+        - os: "ubuntu-24.04"
+          cc: "riscv32-unknown-linux-gnu-gcc"
+          cxx: "riscv32-unknown-linux-gnu-g++"
+          cflags: "-O2"
+          otp: "28"
+          elixir_version: "1.17"
+          rebar3_version: "3.24.0"
+          cmake_opts_other: "-DAVM_WARNINGS_ARE_ERRORS=ON -DCMAKE_TOOLCHAIN_FILE=${RUNNER_TEMP}/riscv32_ilp32_toolchain.cmake"
+          compiler_pkgs: "qemu-user qemu-user-binfmt binfmt-support"
+          arch: "riscv32"
+          library-arch: riscv32-linux-gnu-ilp32
+
     env:
       ImageOS: ${{ matrix.container == 'ubuntu:20.04' && 'ubuntu20' || matrix.os == 'ubuntu-20.04' && 'ubuntu20' || matrix.os == 'ubuntu-22.04' && 'ubuntu22' || matrix.os == 'ubuntu-24.04' && 'ubuntu24' || 'ubuntu24' }}
       CC: ${{ matrix.cc }}
@@ -371,7 +399,7 @@ jobs:
       run: sudo dpkg --add-architecture i386
 
     - name: "Setup cross compilation architecture"
-      if: matrix.library-arch != ''
+      if: matrix.library-arch != '' && matrix.library-arch != 'riscv32-linux-gnu-ilp32'
       run: |
         sudo dpkg --add-architecture ${{ matrix.arch }}
         cat > ${RUNNER_TEMP}/cross-compile-sources.list <<EOF
@@ -396,6 +424,73 @@ jobs:
         set(MBEDTLS_LIBRARIES_DIR /usr/lib/${{ matrix.library-arch }})
         EOF
 
+    - name: "Setup cross compilation architecture (riscv32)"
+      if: matrix.library-arch == 'riscv32-linux-gnu-ilp32'
+      run: |
+        sudo dpkg --add-architecture ${{ matrix.arch }}
+
+        # Download toolchain and libraries from release
+        gh release download riscv-toolchain-2025.10.18 \
+          -R pguyot/crossbuild-essential-riscv32 \
+          --pattern 'riscv32-gnu-toolchain-ilp32_2025.10.18_amd64.deb' \
+          --pattern 'libc6-ilp32_2.39-0ubuntu1_riscv32.deb' \
+          --pattern 'libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb' \
+          --pattern 'libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb' \
+          --pattern 'zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb' \
+          --pattern 'zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb' \
+          --pattern 'libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb'
+
+        # Install the toolchain
+        sudo dpkg -i riscv32-gnu-toolchain-ilp32_2025.10.18_amd64.deb
+
+        # Add to PATH for all subsequent steps
+        echo "/opt/riscv32-ilp32/bin" >> $GITHUB_PATH
+
+        # Install the libs
+        sudo dpkg -i libc6-ilp32_2.39-0ubuntu1_riscv32.deb
+        sudo dpkg -i libc6-dev-ilp32_2.39-0ubuntu1_riscv32.deb
+        sudo dpkg -i libc6-dbg-ilp32_2.39-0ubuntu1_riscv32.deb
+
+        sudo dpkg -i zlib1g-ilp32_1.3.1-0ubuntu1_riscv32.deb
+        sudo dpkg -i zlib1g-dev-ilp32_1.3.1-0ubuntu1_riscv32.deb
+
+        sudo dpkg -i libmbedcrypto7-ilp32_2.28.8-0ubuntu1_riscv32.deb
+        sudo dpkg -i libmbedtls-dev-ilp32_2.28.8-0ubuntu1_riscv32.deb
+        sudo dpkg -i libmbedtls14-ilp32_2.28.8-0ubuntu1_riscv32.deb
+        sudo dpkg -i libmbedx509-1-ilp32_2.28.8-0ubuntu1_riscv32.deb
+
+        sudo sed -i '/Types: deb/a Architectures: amd64' /etc/apt/sources.list.d/ubuntu.sources
+
+        cat > ${RUNNER_TEMP}/${{ matrix.arch }}_toolchain.cmake <<EOF
+        set(CMAKE_SYSTEM_NAME Linux)
+        set(CMAKE_C_LIBRARY_ARCHITECTURE ${{ matrix.library-arch }})
+
+        set(ZLIB_LIBRARY /usr/lib/${{ matrix.library-arch }}/libz.so)
+        set(ZLIB_INCLUDE_DIR /usr/riscv32-linux-gnu/include)
+
+        set(MBEDTLS_ROOT_DIR /usr)
+        set(MBEDTLS_LIBRARIES_DIR /usr/lib/${{ matrix.library-arch }})
+
+        include_directories(SYSTEM /usr/riscv32-linux-gnu/include)
+        EOF
+
+        # Set up qemu-user binfmt to find libraries
+        sudo ln -s /opt/riscv32-ilp32/sysroot/lib/ld-linux-riscv32-ilp32.so.1 /lib/ld-linux-riscv32-ilp32.so.1
+        sudo mkdir -p /usr/gnemul
+        sudo ln -s /opt/riscv32-ilp32/sysroot /usr/gnemul/qemu-riscv32
+
+        # Copy cross-compiled libraries to sysroot for qemu-user
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libz.so.1* /opt/riscv32-ilp32/sysroot/lib/
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedtls.so.14 /opt/riscv32-ilp32/sysroot/lib/
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedcrypto.so.7 /opt/riscv32-ilp32/sysroot/lib/
+        sudo cp /usr/lib/${{ matrix.library-arch }}/libmbedx509.so.1 /opt/riscv32-ilp32/sysroot/lib/
+
+      env:
+        GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
     - name: "APT update"
       run: sudo apt update -y
 
@@ -586,7 +681,7 @@ jobs:
 
     - name: "Test: test_jit.avm with valgrind"
       if: matrix.library-arch == '' && matrix.otp != '21' && matrix.otp != '22'
-      timeout-minutes: 30
+      timeout-minutes: 60
       working-directory: build
       run: |
         ulimit -c unlimited
diff --git a/.github/workflows/pico-build.yaml b/.github/workflows/pico-build.yaml
index 9cf01d045a..2e9de47109 100644
--- a/.github/workflows/pico-build.yaml
+++ b/.github/workflows/pico-build.yaml
@@ -41,7 +41,17 @@ jobs:
     strategy:
       matrix:
         board: ["pico", "pico_w", "pico2"]
+        platform: [""]
         language: ["cpp"]
+        jit: ["", "-DAVM_DISABLE_JIT=OFF"]
+        include:
+          - board: "pico2"
+            platform: "-DPICO_PLATFORM=rp2350-riscv"
+            jit: ""
+
+          - board: "pico2"
+            platform: "-DPICO_PLATFORM=rp2350-riscv"
+            jit: "-DAVM_DISABLE_JIT=OFF"
 
     steps:
     - name: Checkout repo
@@ -57,6 +67,16 @@ jobs:
             libnewlib-arm-none-eabi libstdc++-arm-none-eabi-newlib \
             erlang-base erlang-dev erlang-dialyzer erlang-eunit rebar3
 
+    - name: Install riscv32 toolchain
+      if: matrix.platform == "-DPICO_PLATFORM=rp2350-riscv"
+      run: |
+        sudo mkdir -p /opt
+        cd /opt
+        sudo wget https://github.com/raspberrypi/pico-sdk-tools/releases/download/v2.2.0-3/riscv-toolchain-15-x86_64-lin.tar.gz
+        sudo tar xzf riscv-toolchain-15-x86_64-lin.tar.gz
+        ls /opt
+        echo "/opt/riscv-toolchain-15-x86_64-lin/bin" >> $GITHUB_PATH
+
     - name: "Git config safe.directory for codeql"
       run: git config --global --add safe.directory /__w/AtomVM/AtomVM
 
@@ -74,7 +94,7 @@ jobs:
         set -euo pipefail
         mkdir build
         cd build
-        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }}
+        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.platform }} ${{ matrix.jit }}
         ninja
 
     - name: "Perform CodeQL Analysis"
@@ -97,7 +117,7 @@ jobs:
         mkdir build.nosmp
         cd build.nosmp
         # TODO: fix all warnings and enable -DAVM_WARNINGS_ARE_ERRORS=ON
-        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} -DAVM_DISABLE_SMP=1
+        cmake .. -G Ninja -DPICO_BOARD=${{ matrix.board }} ${{ matrix.jit }} -DAVM_DISABLE_SMP=1
         cmake --build . --target=rp2_tests
 
     - name: Run tests with rp2040js
@@ -112,7 +132,7 @@ jobs:
         npx tsx run-tests.ts ../build.nosmp/tests/rp2_tests.uf2 ../build.nosmp/tests/test_erl_sources/rp2_test_modules.uf2
 
     - name: Build atomvmlib.uf2
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == ''
       shell: bash
       run: |
         set -euo pipefail
@@ -122,7 +142,7 @@ jobs:
         make atomvmlib-${{ matrix.board }}.uf2
 
     - name: Rename AtomVM and write sha256sum
-      if: startsWith(github.ref, 'refs/tags/')
+      if: startsWith(github.ref, 'refs/tags/') && matrix.platform == '' && matrix.jit == ''
       shell: bash
       run: |
         pushd src/platforms/rp2/build
@@ -137,7 +157,7 @@ jobs:
         popd
 
     - name: Rename atomvmlib and write sha256sum
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == ''
       shell: bash
       run: |
         pushd build/libs
@@ -148,7 +168,7 @@ jobs:
 
     - name: Release (Pico & Pico2)
       uses: softprops/action-gh-release@v1
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board != 'pico_w' && matrix.platform == '' && matrix.jit == ''
       with:
         draft: true
         fail_on_unmatched_files: true
@@ -160,7 +180,7 @@ jobs:
 
     - name: Release (PicoW)
       uses: softprops/action-gh-release@v1
-      if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w'
+      if: startsWith(github.ref, 'refs/tags/') && matrix.board == 'pico_w' && matrix.platform == '' && matrix.jit == ''
       with:
         draft: true
         fail_on_unmatched_files: true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6ba351373d..875a16349b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -33,6 +33,7 @@ option(AVM_DISABLE_SMP "Disable SMP." OFF)
 option(AVM_DISABLE_TASK_DRIVER "Disable task driver support." OFF)
 option(AVM_DISABLE_JIT "Disable just in time compilation." ON)
 option(AVM_ENABLE_PRECOMPILED "Enable execution of precompiled code, even if JIT is disabled." OFF)
+option(AVM_DISABLE_JIT_DWARF "Disable DWARF debug and profiling info for JIT." ON)
 option(AVM_USE_32BIT_FLOAT "Use 32 bit floats." OFF)
 option(AVM_VERBOSE_ABORT "Print module and line number on VM abort" OFF)
 option(AVM_RELEASE "Build an AtomVM release" OFF)
@@ -57,12 +58,14 @@ if (NOT AVM_DISABLE_JIT AND NOT DEFINED AVM_JIT_TARGET_ARCH)
         set(AVM_JIT_TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
     elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^arm64|aarch64$")
         set(AVM_JIT_TARGET_ARCH "aarch64")
+    elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$")
+        set(AVM_JIT_TARGET_ARCH "armv6m")
     else()
-        message(FATAL "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}")
+        message(FATAL_ERROR "JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}")
     endif()
 endif()
 
-set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON")
+set(AVM_PRECOMPILED_TARGETS "x86_64;aarch64;armv6m;armv6m+float32;riscv32" CACHE STRING "Targets to precompile code to if AVM_DISABLE_JIT is OFF or AVM_ENABLE_PRECOMPILED is ON")
 
 if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") OR
    (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") OR
diff --git a/CMakeModules/BuildErlang.cmake b/CMakeModules/BuildErlang.cmake
index e19fb6d622..986a2900a1 100644
--- a/CMakeModules/BuildErlang.cmake
+++ b/CMakeModules/BuildErlang.cmake
@@ -22,7 +22,6 @@ macro(pack_archive avm_name)
 
     set(multiValueArgs ERLC_FLAGS MODULES)
     cmake_parse_arguments(PACK_ARCHIVE "" "" "${multiValueArgs}" ${ARGN})
-    list(JOIN PACK_ARCHIVE_ERLC_FLAGS " " PACK_ARCHIVE_ERLC_FLAGS)
     foreach(module_name IN LISTS ${PACK_ARCHIVE_MODULES} PACK_ARCHIVE_MODULES PACK_ARCHIVE_UNPARSED_ARGUMENTS)
         add_custom_command(
             OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam
@@ -77,10 +76,13 @@ macro(pack_precompiled_archive avm_name)
         else()
             set(jit_deps "jit")
         endif()
-        foreach(jit_target_arch ${AVM_PRECOMPILED_TARGETS})
+        foreach(jit_target_arch_variant ${AVM_PRECOMPILED_TARGETS})
             set(pack_precompile_archive_${avm_name}_beams "")
+            # Extract base architecture for module dependencies
+            string(REGEX REPLACE "\\+.*$" "" jit_target_arch "${jit_target_arch_variant}")
             set(jit_compiler_modules
                 ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam
+                ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam
                 ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam
                 ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam
                 ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${jit_target_arch}.beam
@@ -89,14 +91,14 @@ macro(pack_precompiled_archive avm_name)
 
             foreach(module_name IN LISTS ${PACK_ARCHIVE_MODULES} PACK_ARCHIVE_MODULES PACK_ARCHIVE_UNPARSED_ARGUMENTS)
                 add_custom_command(
-                    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/${module_name}.beam
-                    COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/
-                        && erl -pa ${CMAKE_BINARY_DIR}/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${jit_target_arch} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/ ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam
+                    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/${module_name}.beam
+                    COMMAND mkdir -p ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/
+                        && erl -pa ${CMAKE_BINARY_DIR}/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${jit_target_arch_variant} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/ ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam
                     DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/beams/${module_name}.beam ${jit_compiler_modules} ${jit_deps}
-                    COMMENT "Compiling ${module_name}.beam to ${jit_target_arch}"
+                    COMMENT "Compiling ${module_name}.beam to ${jit_target_arch_variant}"
                     VERBATIM
                 )
-                set(pack_precompile_archive_${avm_name}_beams ${pack_precompile_archive_${avm_name}_beams} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch}/${module_name}.beam)
+                set(pack_precompile_archive_${avm_name}_beams ${pack_precompile_archive_${avm_name}_beams} ${CMAKE_CURRENT_BINARY_DIR}/beams/${jit_target_arch_variant}/${module_name}.beam)
             endforeach()
 
             if(AVM_RELEASE)
@@ -106,20 +108,20 @@ macro(pack_precompiled_archive avm_name)
             endif()
 
             add_custom_command(
-                OUTPUT ${avm_name}-${jit_target_arch}.avm
+                OUTPUT ${avm_name}-${jit_target_arch_variant}.avm
                 DEPENDS ${pack_precompile_archive_${avm_name}_beams} PackBEAM
-                COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch}.avm ${pack_precompile_archive_${avm_name}_beams}
-                COMMENT "Packing archive ${avm_name}-${jit_target_arch}.avm"
+                COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch_variant}.avm ${pack_precompile_archive_${avm_name}_beams}
+                COMMENT "Packing archive ${avm_name}-${jit_target_arch_variant}.avm"
                 VERBATIM
             )
             add_custom_target(
-                ${avm_name}_${jit_target_arch} ALL
-                DEPENDS ${avm_name}-${jit_target_arch}.avm
+                ${avm_name}_${jit_target_arch_variant} ALL
+                DEPENDS ${avm_name}-${jit_target_arch_variant}.avm
             )
             # Ensure source beams are built before precompilation
-            add_dependencies(${avm_name}_${jit_target_arch} ${avm_name}_emu)
+            add_dependencies(${avm_name}_${jit_target_arch_variant} ${avm_name}_emu)
             # Make main target depend on precompiled targets
-            add_dependencies(${avm_name} ${avm_name}_${jit_target_arch})
+            add_dependencies(${avm_name} ${avm_name}_${jit_target_arch_variant})
         endforeach()
     endif()
 endmacro()
@@ -159,23 +161,23 @@ macro(pack_lib avm_name)
     set(target_deps ${avm_name}.avm)
 
     if(NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED)
-        foreach(jit_target_arch ${AVM_PRECOMPILED_TARGETS})
+        foreach(jit_target_arch_variant ${AVM_PRECOMPILED_TARGETS})
             # Build JIT archives list for this specific target architecture
-            set(pack_lib_${avm_name}_jit_archives_${jit_target_arch} ${CMAKE_BINARY_DIR}/libs/jit/src/jit-${jit_target_arch}.avm)
+            set(pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant} ${CMAKE_BINARY_DIR}/libs/jit/src/jit-${jit_target_arch_variant}.avm)
             foreach(archive_name ${ARGN})
                 if(${archive_name} STREQUAL "estdlib")
-                    set(pack_lib_${avm_name}_jit_archives_${jit_target_arch} ${pack_lib_${avm_name}_jit_archives_${jit_target_arch}} ${CMAKE_BINARY_DIR}/libs/${archive_name}/src/${archive_name}-${jit_target_arch}.avm)
+                    set(pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant} ${pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant}} ${CMAKE_BINARY_DIR}/libs/${archive_name}/src/${archive_name}-${jit_target_arch_variant}.avm)
                 endif()
             endforeach()
 
             add_custom_command(
-                OUTPUT ${avm_name}-${jit_target_arch}.avm
+                OUTPUT ${avm_name}-${jit_target_arch_variant}.avm
                 DEPENDS ${pack_lib_${avm_name}_archive_targets} PackBEAM
-                COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch}.avm ${pack_lib_${avm_name}_jit_archives_${jit_target_arch}} ${pack_lib_${avm_name}_archives}
-                COMMENT "Packing lib ${avm_name}-${jit_target_arch}.avm"
+                COMMAND ${CMAKE_BINARY_DIR}/tools/packbeam/PackBEAM -a ${INCLUDE_LINES} ${avm_name}-${jit_target_arch_variant}.avm ${pack_lib_${avm_name}_jit_archives_${jit_target_arch_variant}} ${pack_lib_${avm_name}_archives}
+                COMMENT "Packing lib ${avm_name}-${jit_target_arch_variant}.avm"
                 VERBATIM
             )
-            set(target_deps ${target_deps} ${avm_name}-${jit_target_arch}.avm)
+            set(target_deps ${target_deps} ${avm_name}-${jit_target_arch_variant}.avm)
         endforeach()
     endif()
     add_custom_command(
@@ -194,6 +196,24 @@ macro(pack_lib avm_name)
     )
     set(target_deps ${target_deps} ${avm_name}-pico.uf2 ${avm_name}-pico2.uf2)
 
+    if((NOT AVM_DISABLE_JIT OR AVM_ENABLE_PRECOMPILED) AND ("armv6m" IN_LIST AVM_PRECOMPILED_TARGETS OR "armv6m+float32" IN_LIST AVM_PRECOMPILED_TARGETS))
+        add_custom_command(
+            OUTPUT ${avm_name}-armv6m-pico.uf2
+            DEPENDS ${avm_name}-armv6m.avm UF2Tool
+            COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m-pico.uf2 -s 0x10100000 ${avm_name}-armv6m.avm
+            COMMENT "Creating UF2 file ${avm_name}-armv6m.uf2"
+            VERBATIM
+        )
+        add_custom_command(
+            OUTPUT ${avm_name}-armv6m-pico2.uf2
+            DEPENDS ${avm_name}-armv6m.avm UF2Tool
+            COMMAND ${CMAKE_BINARY_DIR}/tools/uf2tool/uf2tool create -o ${avm_name}-armv6m-pico2.uf2 -f data -s 0x10100000 ${avm_name}-armv6m.avm
+            COMMENT "Creating UF2 file ${avm_name}-armv6m.uf2"
+            VERBATIM
+        )
+        set(target_deps ${target_deps} ${avm_name}-armv6m-pico.uf2 ${avm_name}-armv6m-pico2.uf2)
+    endif()
+
     add_custom_target(
         ${avm_name} ALL
         DEPENDS ${target_deps}
diff --git a/libs/estdlib/src/code_server.erl b/libs/estdlib/src/code_server.erl
index 427d5fa529..69aa359327 100644
--- a/libs/estdlib/src/code_server.erl
+++ b/libs/estdlib/src/code_server.erl
@@ -174,11 +174,12 @@ load(Module) ->
                             BackendModule,
                             BackendState0
                         ),
-                        Stream1 = BackendModule:stream(BackendState1),
+                        BackendState2 = BackendModule:flush(BackendState1),
+                        Stream1 = BackendModule:stream(BackendState2),
                         code_server:set_native_code(Module, LabelsCount, Stream1),
                         End = erlang:system_time(millisecond),
                         io:format("~B ms (bytecode: ~B bytes, native code: ~B bytes)\n", [
-                            End - Start, byte_size(Code), BackendModule:offset(BackendState1)
+                            End - Start, byte_size(Code), BackendModule:offset(BackendState2)
                         ])
                     catch
                         T:V:S ->
diff --git a/libs/jit/include/jit.hrl b/libs/jit/include/jit.hrl
index 427fa40aec..81ff1c42c2 100644
--- a/libs/jit/include/jit.hrl
+++ b/libs/jit/include/jit.hrl
@@ -22,7 +22,10 @@
 
 -define(JIT_ARCH_X86_64, 1).
 -define(JIT_ARCH_AARCH64, 2).
+-define(JIT_ARCH_ARMV6M, 3).
+-define(JIT_ARCH_RISCV32, 4).
 
 -define(JIT_VARIANT_PIC, 1).
+-define(JIT_VARIANT_FLOAT32, 2).
 
 -define(MAX_REG, 16).
diff --git a/libs/jit/src/CMakeLists.txt b/libs/jit/src/CMakeLists.txt
index a5810feff9..df155f9d0d 100644
--- a/libs/jit/src/CMakeLists.txt
+++ b/libs/jit/src/CMakeLists.txt
@@ -24,16 +24,27 @@ include(BuildErlang)
 
 set(ERLANG_MODULES
     jit
+    jit_dwarf
     jit_precompile
     jit_stream_binary
     jit_stream_mmap
     jit_aarch64
     jit_aarch64_asm
+    jit_armv6m
+    jit_armv6m_asm
+    jit_riscv32
+    jit_riscv32_asm
     jit_x86_64
     jit_x86_64_asm
 )
 
-pack_precompiled_archive(jit ${ERLANG_MODULES})
+if (NOT AVM_DISABLE_JIT_DWARF)
+    set(erlc_flags -DJIT_DWARF)
+else()
+    set(erlc_flags)
+endif()
+
+pack_precompiled_archive(jit ERLC_FLAGS ${erlc_flags} MODULES ${ERLANG_MODULES})
 
 include(../../../version.cmake)
 
diff --git a/libs/jit/src/compact_term.hrl b/libs/jit/src/compact_term.hrl
new file mode 100644
index 0000000000..3739b4404d
--- /dev/null
+++ b/libs/jit/src/compact_term.hrl
@@ -0,0 +1,52 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+
+-define(COMPACT_LITERAL, 0).
+-define(COMPACT_INTEGER, 1).
+-define(COMPACT_ATOM, 2).
+-define(COMPACT_XREG, 3).
+-define(COMPACT_YREG, 4).
+-define(COMPACT_LABEL, 5).
+-define(COMPACT_EXTENDED, 7).
+-define(COMPACT_LARGE_LITERAL, 8).
+-define(COMPACT_LARGE_INTEGER, 9).
+-define(COMPACT_LARGE_ATOM, 10).
+-define(COMPACT_LARGE_XREG, 11).
+-define(COMPACT_LARGE_YREG, 12).
+
+% OTP-20+ format
+-define(COMPACT_EXTENDED_LIST, 16#17).
+-define(COMPACT_EXTENDED_FP_REGISTER, 16#27).
+-define(COMPACT_EXTENDED_ALLOCATION_LIST, 16#37).
+-define(COMPACT_EXTENDED_LITERAL, 16#47).
+% https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_asm.erl#L433
+-define(COMPACT_EXTENDED_TYPED_REGISTER, 16#57).
+
+-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_WORDS, 0).
+-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FLOATS, 1).
+-define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FUNS, 2).
+
+-define(COMPACT_LARGE_IMM_MASK, 16#18).
+-define(COMPACT_11BITS_VALUE, 16#8).
+-define(COMPACT_NBITS_VALUE, 16#18).
+
+-define(COMPACT_LARGE_INTEGER_11BITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_11BITS_VALUE)).
+-define(COMPACT_LARGE_INTEGER_NBITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_NBITS_VALUE)).
diff --git a/libs/jit/src/jit.erl b/libs/jit/src/jit.erl
index d30f52e7ed..83cb0c0d8d 100644
--- a/libs/jit/src/jit.erl
+++ b/libs/jit/src/jit.erl
@@ -24,13 +24,15 @@
     stream/1,
     backend/1,
     beam_chunk_header/3,
-    compile/6
+    compile/6,
+    decode_value64/1
 ]).
 
 % NIFs
 -export([
     stream_module/0,
-    backend_module/0
+    backend_module/0,
+    variant/0
 ]).
 
 -export_type([
@@ -45,38 +47,7 @@
 -include("opcodes.hrl").
 -include("primitives.hrl").
 -include("term.hrl").
-
--define(COMPACT_LITERAL, 0).
--define(COMPACT_INTEGER, 1).
--define(COMPACT_ATOM, 2).
--define(COMPACT_XREG, 3).
--define(COMPACT_YREG, 4).
--define(COMPACT_LABEL, 5).
--define(COMPACT_EXTENDED, 7).
--define(COMPACT_LARGE_LITERAL, 8).
--define(COMPACT_LARGE_INTEGER, 9).
--define(COMPACT_LARGE_ATOM, 10).
--define(COMPACT_LARGE_XREG, 11).
--define(COMPACT_LARGE_YREG, 12).
-
-% OTP-20+ format
--define(COMPACT_EXTENDED_LIST, 16#17).
--define(COMPACT_EXTENDED_FP_REGISTER, 16#27).
--define(COMPACT_EXTENDED_ALLOCATION_LIST, 16#37).
--define(COMPACT_EXTENDED_LITERAL, 16#47).
-% https://github.com/erlang/otp/blob/master/lib/compiler/src/beam_asm.erl#L433
--define(COMPACT_EXTENDED_TYPED_REGISTER, 16#57).
-
--define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_WORDS, 0).
--define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FLOATS, 1).
--define(COMPACT_EXTENDED_ALLOCATOR_LIST_TAG_FUNS, 2).
-
--define(COMPACT_LARGE_IMM_MASK, 16#18).
--define(COMPACT_11BITS_VALUE, 16#8).
--define(COMPACT_NBITS_VALUE, 16#18).
-
--define(COMPACT_LARGE_INTEGER_11BITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_11BITS_VALUE)).
--define(COMPACT_LARGE_INTEGER_NBITS, (?COMPACT_LARGE_INTEGER bor ?COMPACT_NBITS_VALUE)).
+-include("compact_term.hrl").
 
 -define(BOXED_FUN_SIZE, 3).
 -define(FLOAT_SIZE_64, 2).
@@ -99,7 +70,8 @@
     labels_count :: pos_integer(),
     atom_resolver :: fun((integer()) -> atom()),
     literal_resolver :: fun((integer()) -> any()),
-    type_resolver :: fun((integer()) -> any())
+    type_resolver :: fun((integer()) -> any()),
+    tail_cache :: [{tuple(), non_neg_integer()}]
 }).
 
 -type stream() :: any().
@@ -112,6 +84,20 @@
 -define(ASSERT_ALL_NATIVE_FREE(St), ok).
 -define(ASSERT(Expr), ok).
 
+-ifdef(JIT_DWARF).
+-define(DWARF_OPCODE(MMod, MSt, Opcode), MMod:dwarf_opcode(MSt, Opcode)).
+-define(DWARF_LABEL(MMod, MSt, Label), MMod:dwarf_label(MSt, Label)).
+-define(DWARF_FUNCTION(MMod, MSt, FunctionName, Arity),
+    MMod:dwarf_function(MSt, (State0#state.atom_resolver)(FunctionName), Arity)
+).
+-define(DWARF_LINE(MMod, MSt, Line), MMod:dwarf_line(MSt, Line)).
+-else.
+-define(DWARF_OPCODE(_MMod, MSt, _Opcode), MSt).
+-define(DWARF_LABEL(MMod, MSt, _Label), MSt).
+-define(DWARF_FUNCTION(_MMod, MSt, _FunctionName, _Arity), MSt).
+-define(DWARF_LINE(_MMod, MSt, _Line), MSt).
+-endif.
+
 %%-----------------------------------------------------------------------------
 %% @param   LabelsCount number of labels
 %% @param   Arch code for the architecture
@@ -141,7 +127,8 @@ compile(
         labels_count = LabelsCount,
         atom_resolver = AtomResolver,
         literal_resolver = LiteralResolver,
-        type_resolver = TypeResolver
+        type_resolver = TypeResolver,
+        tail_cache = []
     },
     {State1, MSt2} = first_pass(Opcodes, MMod, MSt1, State0),
     MSt3 = second_pass(MMod, MSt2, State1),
@@ -159,32 +146,46 @@ compile(CodeChunk, _AtomResolver, _LiteralResolver, _TypeResolver, _MMod, _MSt)
     error(badarg, [CodeChunk]).
 
 % 1
-first_pass(
-    <<?OP_LABEL, Rest0/binary>>, MMod, MSt0, State0
-) ->
+first_pass(<<?OP_LABEL, Rest0/binary>>, MMod, MSt, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_literal(Rest0),
     ?TRACE("OP_LABEL ~p\n", [Label]),
+    MSt0 = ?DWARF_LABEL(MMod, MSt, Label),
     MSt1 = MMod:add_label(MSt0, Label),
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
     first_pass(Rest1, MMod, MSt1, State0);
 % 2
-first_pass(<<?OP_FUNC_INFO, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FUNC_INFO, Rest0/binary>>, MMod, MSt, #state{tail_cache = TC} = State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"func_info/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_ModuleAtom, Rest1} = decode_atom(Rest0),
     {_FunctionName, Rest2} = decode_atom(Rest1),
     {_Arity, Rest3} = decode_literal(Rest2),
     ?TRACE("OP_FUNC_INFO ~p, ~p, ~p\n", [_ModuleAtom, _FunctionName, _Arity]),
-    % Implement function clause at the previous label. (TODO: optimize it out to save space)
-    MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [
-        ctx, jit_state, offset, ?FUNCTION_CLAUSE_ATOM
-    ]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt1),
-    first_pass(Rest3, MMod, MSt1, State0);
+    % Implement function clause at the previous label.
+    Offset = MMod:offset(MSt0),
+    {MSt1, OffsetReg} = MMod:move_to_native_register(MSt0, Offset),
+    TailCacheKey = {call_primitive_last, ?PRIM_RAISE_ERROR, [OffsetReg, ?FUNCTION_CLAUSE_ATOM]},
+    State1 =
+        case lists:keyfind(TailCacheKey, 1, TC) of
+            false ->
+                MSt3 = MMod:call_primitive_last(MSt1, ?PRIM_RAISE_ERROR, [
+                    ctx, jit_state, {free, OffsetReg}, ?FUNCTION_CLAUSE_ATOM
+                ]),
+                State0#state{tail_cache = [{TailCacheKey, Offset} | TC]};
+            {TailCacheKey, CacheOffset} ->
+                MSt2 = MMod:jump_to_offset(MSt1, CacheOffset),
+                MSt3 = MMod:free_native_registers(MSt2, [OffsetReg]),
+                State0
+        end,
+    MSt4 = ?DWARF_FUNCTION(MMod, MSt3, _FunctionName, _Arity),
+    ?ASSERT_ALL_NATIVE_FREE(MSt4),
+    first_pass(Rest3, MMod, MSt4, State1);
 % 3
 first_pass(
-    <<?OP_INT_CALL_END>>, MMod, MSt0, #state{labels_count = LabelsCount} = State
+    <<?OP_INT_CALL_END>>, MMod, MSt, #state{labels_count = LabelsCount} = State
 ) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"int_call_end/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_INT_CALL_END\n", []),
     MSt1 = MMod:add_label(MSt0, LabelsCount),
@@ -193,7 +194,8 @@ first_pass(
     ]),
     {State, MSt2};
 % 4
-first_pass(<<?OP_CALL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Arity, Rest1} = decode_literal(Rest0),
     {Label, Rest2} = decode_label(Rest1),
@@ -202,28 +204,61 @@ first_pass(<<?OP_CALL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
     first_pass(Rest2, MMod, MSt1, State0);
 % 5
-first_pass(<<?OP_CALL_LAST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_LAST, Rest0/binary>>, MMod, MSt, #state{tail_cache = TC} = State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_last/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Arity, Rest1} = decode_literal(Rest0),
     {Label, Rest2} = decode_label(Rest1),
     {NWords, Rest3} = decode_literal(Rest2),
     ?TRACE("OP_CALL_LAST ~p, ~p, ~p\n", [_Arity, Label, NWords]),
-    MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}),
-    MSt2 = MMod:increment_sp(MSt1, NWords + 1),
-    MSt3 = MMod:call_only_or_schedule_next(MSt2, Label),
+    TailCacheKey0 = {op_call_last, NWords, Label},
+    case lists:keyfind(TailCacheKey0, 1, TC) of
+        false ->
+            Offset0 = MMod:offset(MSt0),
+            MSt1 = MMod:move_to_cp(MSt0, {y_reg, NWords}),
+            MSt2 = MMod:increment_sp(MSt1, NWords + 1),
+            TailCacheKey1 = {op_call_only, Label},
+            case lists:keyfind(TailCacheKey1, 1, TC) of
+                false ->
+                    Offset1 = MMod:offset(MSt2),
+                    MSt3 = MMod:call_only_or_schedule_next(MSt2, Label),
+                    State1 = State0#state{
+                        tail_cache = [{TailCacheKey1, Offset1}, {TailCacheKey0, Offset0} | TC]
+                    };
+                {TailCacheKey1, Offset1} ->
+                    MSt3 = MMod:jump_to_offset(MSt2, Offset1),
+                    State1 = State0#state{
+                        tail_cache = [{TailCacheKey0, Offset0} | TC]
+                    }
+            end;
+        {TailCacheKey0, Offset0} ->
+            MSt3 = MMod:jump_to_offset(MSt0, Offset0),
+            State1 = State0
+    end,
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
-    first_pass(Rest3, MMod, MSt3, State0);
+    first_pass(Rest3, MMod, MSt3, State1);
 % 6
-first_pass(<<?OP_CALL_ONLY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_ONLY, Rest0/binary>>, MMod, MSt, #state{tail_cache = TC} = State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_only/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Arity, Rest1} = decode_literal(Rest0),
     {Label, Rest2} = decode_label(Rest1),
     ?TRACE("OP_CALL_ONLY ~p, ~p\n", [_Arity, Label]),
-    MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
+    TailCacheKey = {op_call_only, Label},
+    case lists:keyfind(TailCacheKey, 1, TC) of
+        false ->
+            Offset = MMod:offset(MSt0),
+            MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
+            State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]};
+        {TailCacheKey, Offset} ->
+            MSt1 = MMod:jump_to_offset(MSt0, Offset),
+            State1 = State0
+    end,
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
-    first_pass(Rest2, MMod, MSt1, State0);
+    first_pass(Rest2, MMod, MSt1, State1);
 % 7
-first_pass(<<?OP_CALL_EXT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_EXT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Arity, Rest1} = decode_literal(Rest0),
     {Index, Rest2} = decode_literal(Rest1),
@@ -235,7 +270,8 @@ first_pass(<<?OP_CALL_EXT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 8
-first_pass(<<?OP_CALL_EXT_LAST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_EXT_LAST, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext_last/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Arity, Rest1} = decode_literal(Rest0),
     {Index, Rest2} = decode_literal(Rest1),
@@ -248,7 +284,8 @@ first_pass(<<?OP_CALL_EXT_LAST, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest3, MMod, MSt2, State0);
 % 9
-first_pass(<<?OP_BIF0, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BIF0, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif0/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Bif, Rest1} = decode_literal(Rest0),
     {MSt1, FuncPtr} = MMod:call_primitive(MSt0, ?PRIM_GET_IMPORTED_BIF, [
@@ -264,7 +301,8 @@ first_pass(<<?OP_BIF0, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest2, MMod, MSt5, State0);
 % 10
-first_pass(<<?OP_BIF1, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BIF1, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif1/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FailLabel, Rest1} = decode_label(Rest0),
     {Bif, Rest2} = decode_literal(Rest1),
@@ -281,7 +319,8 @@ first_pass(<<?OP_BIF1, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest4, MMod, MSt5, State0);
 % 11
-first_pass(<<?OP_BIF2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BIF2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bif2/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FailLabel, Rest1} = decode_label(Rest0),
     {Bif, Rest2} = decode_literal(Rest1),
@@ -299,7 +338,8 @@ first_pass(<<?OP_BIF2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest5, MMod, MSt6, State0);
 % 12
-first_pass(<<?OP_ALLOCATE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_ALLOCATE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"allocate/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {StackNeed, Rest1} = decode_literal(Rest0),
     {Live, Rest2} = decode_literal(Rest1),
@@ -311,7 +351,8 @@ first_pass(<<?OP_ALLOCATE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 13
-first_pass(<<?OP_ALLOCATE_HEAP, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_ALLOCATE_HEAP, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"allocate_heap/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {StackNeed, Rest1} = decode_literal(Rest0),
     {HeapNeed, Rest2} = decode_allocator_list(MMod, Rest1),
@@ -324,7 +365,8 @@ first_pass(<<?OP_ALLOCATE_HEAP, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest3, MMod, MSt2, State0);
 % 16
-first_pass(<<?OP_TEST_HEAP, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TEST_HEAP, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"test_heap/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {HeapNeed, Rest1} = decode_allocator_list(MMod, Rest0),
     {Live, Rest2} = decode_literal(Rest1),
@@ -336,7 +378,8 @@ first_pass(<<?OP_TEST_HEAP, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 18
-first_pass(<<?OP_DEALLOCATE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_DEALLOCATE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"deallocate/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {NWords, Rest1} = decode_literal(Rest0),
     ?TRACE("OP_DEALLOCATE ~p\n", [NWords]),
@@ -347,16 +390,45 @@ first_pass(<<?OP_DEALLOCATE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest1, MMod, MSt2, State0);
 % 19
-first_pass(<<?OP_RETURN, Rest/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RETURN, Rest/binary>>, MMod, MSt, #state{tail_cache = TC} = State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"return/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_RETURN\n", []),
-    MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RETURN, [
-        ctx, jit_state
-    ]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt1),
-    first_pass(Rest, MMod, MSt1, State0);
+    % Optimized return: check if returning within same module
+    {MSt1, CpReg0} = MMod:move_to_native_register(MSt0, cp),
+    {MSt2, ModuleIndexReg} = MMod:get_module_index(MSt1),
+    % Extract module index from cp (upper 8 bits: cp >> 24)
+    {MSt3, CpReg1} = MMod:shift_right(MSt2, CpReg0, 24),
+    % Compare extracted module index with current module index
+    MSt4 = MMod:if_block(
+        MSt3,
+        {{free, CpReg1}, '==', {free, ModuleIndexReg}},
+        % Same module: fast intra-module return
+        fun(BSt0) ->
+            % Mask to get lower 24 bits and shift right by 2 for offset
+            BSt1 = MMod:and_(BSt0, CpReg0, 16#FFFFFF),
+            {BSt3, CPReg1} = MMod:shift_right(BSt1, {free, CpReg0}, 2),
+            % Jump to continuation (this is a tail call)
+            MMod:jump_to_continuation(BSt3, {free, CPReg1})
+        end
+    ),
+    MSt5 = MMod:free_native_registers(MSt4, [CpReg0]),
+    % Different module: use existing slow path
+    TailCacheKey = {call_primitive_last, ?PRIM_RETURN},
+    case lists:keyfind(TailCacheKey, 1, TC) of
+        false ->
+            Offset = MMod:offset(MSt5),
+            MSt6 = MMod:call_primitive_last(MSt5, ?PRIM_RETURN, [ctx, jit_state]),
+            State1 = State0#state{tail_cache = [{TailCacheKey, Offset} | TC]};
+        {TailCacheKey, Offset} ->
+            MSt6 = MMod:jump_to_offset(MSt5, Offset),
+            State1 = State0
+    end,
+    ?ASSERT_ALL_NATIVE_FREE(MSt6),
+    first_pass(Rest, MMod, MSt6, State1);
 % 20
-first_pass(<<?OP_SEND, Rest/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_SEND, Rest/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"send/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_SEND\n", []),
     {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_SEND, [
@@ -366,7 +438,8 @@ first_pass(<<?OP_SEND, Rest/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest, MMod, MSt2, State0);
 % 21
-first_pass(<<?OP_REMOVE_MESSAGE, Rest/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_REMOVE_MESSAGE, Rest/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"remove_message/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_REMOVE_MESSAGE\n", []),
     {MSt1, Reg1} = MMod:call_primitive(MSt0, ?PRIM_CANCEL_TIMEOUT, [
@@ -384,7 +457,8 @@ first_pass(<<?OP_REMOVE_MESSAGE, Rest/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest, MMod, MSt6, State0);
 % 22
-first_pass(<<?OP_TIMEOUT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TIMEOUT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"timeout/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_TIMEOUT\n", []),
     {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_TIMEOUT, [
@@ -394,7 +468,8 @@ first_pass(<<?OP_TIMEOUT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest0, MMod, MSt2, State0);
 % 23
-first_pass(<<?OP_LOOP_REC, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_LOOP_REC, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"loop_rec/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
@@ -410,7 +485,8 @@ first_pass(<<?OP_LOOP_REC, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt7),
     first_pass(Rest2, MMod, MSt7, State0);
 % 24
-first_pass(<<?OP_LOOP_REC_END, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_LOOP_REC_END, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"loop_rec_end/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     ?TRACE("OP_LOOP_REC_END ~p\n", [Label]),
@@ -426,7 +502,8 @@ first_pass(<<?OP_LOOP_REC_END, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest1, MMod, MSt5, State0);
 % 25
-first_pass(<<?OP_WAIT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_WAIT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"wait/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     ?TRACE("OP_WAIT ~p\n", [Label]),
@@ -435,7 +512,8 @@ first_pass(<<?OP_WAIT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest1, MMod, MSt2, State0);
 % 26
-first_pass(<<?OP_WAIT_TIMEOUT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_WAIT_TIMEOUT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"wait_timeout/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, OffsetRef0} = MMod:set_continuation_to_offset(MSt0),
@@ -461,7 +539,8 @@ first_pass(<<?OP_WAIT_TIMEOUT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt9),
     first_pass(Rest2, MMod, MSt9, State0);
 % 39
-first_pass(<<?OP_IS_LT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_LT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_lt/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -477,7 +556,8 @@ first_pass(<<?OP_IS_LT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest3, MMod, MSt5, State0);
 % 40
-first_pass(<<?OP_IS_GE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_GE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ge/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -493,7 +573,8 @@ first_pass(<<?OP_IS_GE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest3, MMod, MSt5, State0);
 % 41
-first_pass(<<?OP_IS_EQUAL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_EQUAL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_eq/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -512,7 +593,8 @@ first_pass(<<?OP_IS_EQUAL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest3, MMod, MSt5, State0);
 % 42
-first_pass(<<?OP_IS_NOT_EQUAL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_NOT_EQUAL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ne/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -526,7 +608,8 @@ first_pass(<<?OP_IS_NOT_EQUAL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest3, MMod, MSt5, State0);
 % 43
-first_pass(<<?OP_IS_EQ_EXACT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_EQ_EXACT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_eq_exact/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -555,7 +638,8 @@ first_pass(<<?OP_IS_EQ_EXACT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest3, MMod, MSt5, State0);
 % 44
-first_pass(<<?OP_IS_NOT_EQ_EXACT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_NOT_EQ_EXACT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_ne_exact/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -580,7 +664,8 @@ first_pass(<<?OP_IS_NOT_EQ_EXACT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest3, MMod, MSt5, State0);
 % 45
-first_pass(<<?OP_IS_INTEGER, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_INTEGER, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_integer/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -591,7 +676,8 @@ first_pass(<<?OP_IS_INTEGER, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 46
-first_pass(<<?OP_IS_FLOAT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_FLOAT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_float/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -600,7 +686,8 @@ first_pass(<<?OP_IS_FLOAT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 47
-first_pass(<<?OP_IS_NUMBER, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_NUMBER, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_number/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -633,7 +720,8 @@ first_pass(<<?OP_IS_NUMBER, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt4),
     first_pass(Rest2, MMod, MSt4, State0);
 % 48
-first_pass(<<?OP_IS_ATOM, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_ATOM, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_atom/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -645,7 +733,8 @@ first_pass(<<?OP_IS_ATOM, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest2, MMod, MSt3, State0);
 % 49
-first_pass(<<?OP_IS_PID, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_PID, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_pid/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -656,7 +745,8 @@ first_pass(<<?OP_IS_PID, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 50
-first_pass(<<?OP_IS_REFERENCE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_REFERENCE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_reference/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -678,7 +768,8 @@ first_pass(<<?OP_IS_REFERENCE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt8),
     first_pass(Rest2, MMod, MSt8, State0);
 % 51
-first_pass(<<?OP_IS_PORT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_PORT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_port/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -689,7 +780,8 @@ first_pass(<<?OP_IS_PORT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 52
-first_pass(<<?OP_IS_NIL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_NIL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_nil/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -700,7 +792,8 @@ first_pass(<<?OP_IS_NIL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt4),
     first_pass(Rest2, MMod, MSt4, State0);
 % 53
-first_pass(<<?OP_IS_BINARY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_BINARY, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_binary/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -710,7 +803,8 @@ first_pass(<<?OP_IS_BINARY, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest2, MMod, MSt3, State0);
 % 55
-first_pass(<<?OP_IS_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_LIST, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_list/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -728,7 +822,8 @@ first_pass(<<?OP_IS_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest2, MMod, MSt3, State0);
 % 56
-first_pass(<<?OP_IS_NONEMPTY_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_NONEMPTY_LIST, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_nonempty_list/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -740,7 +835,8 @@ first_pass(<<?OP_IS_NONEMPTY_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest2, MMod, MSt3, State0);
 % 57
-first_pass(<<?OP_IS_TUPLE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_TUPLE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_tuple/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -749,7 +845,8 @@ first_pass(<<?OP_IS_TUPLE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 58
-first_pass(<<?OP_TEST_ARITY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TEST_ARITY, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"test_arity/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -758,13 +855,13 @@ first_pass(<<?OP_TEST_ARITY, Rest0/binary>>, MMod, MSt0, State0) ->
     {MSt2, Reg} = MMod:move_to_native_register(MSt1, Arg1),
     MSt3 = MMod:and_(MSt2, Reg, ?TERM_PRIMARY_CLEAR_MASK),
     MSt4 = MMod:move_array_element(MSt3, Reg, 0, Reg),
-    MSt5 = MMod:shift_right(MSt4, Reg, 6),
-    MSt6 = cond_jump_to_label({Reg, '!=', Arity}, Label, MMod, MSt5),
-    MSt7 = MMod:free_native_registers(MSt6, [Reg]),
-    ?ASSERT_ALL_NATIVE_FREE(MSt7),
-    first_pass(Rest3, MMod, MSt7, State0);
+    {MSt5, ArityReg} = MMod:shift_right(MSt4, {free, Reg}, 6),
+    MSt6 = cond_jump_to_label({{free, ArityReg}, '!=', Arity}, Label, MMod, MSt5),
+    ?ASSERT_ALL_NATIVE_FREE(MSt6),
+    first_pass(Rest3, MMod, MSt6, State0);
 % 59
-first_pass(<<?OP_SELECT_VAL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_SELECT_VAL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"select_val/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {DefaultLabel, Rest2} = decode_label(Rest1),
@@ -794,7 +891,8 @@ first_pass(<<?OP_SELECT_VAL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest4, MMod, MSt3, State0);
 % 60
-first_pass(<<?OP_SELECT_TUPLE_ARITY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_SELECT_TUPLE_ARITY, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"select_tuple_arity/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {DefaultLabel, Rest2} = decode_label(Rest1),
@@ -818,16 +916,27 @@ first_pass(<<?OP_SELECT_TUPLE_ARITY, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest4, MMod, MSt5, State0);
 % 61
-first_pass(<<?OP_JUMP, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_JUMP, Rest0/binary>>, MMod, MSt, #state{tail_cache = TC} = State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"jump/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     ?TRACE("OP_JUMP ~p\n", [Label]),
-    MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
-    ?ASSERT_ALL_NATIVE_FREE(MSt1),
-    first_pass(Rest1, MMod, MSt1, State0);
+    TailCacheKey = {op_call_only, Label},
+    case lists:keyfind(TailCacheKey, 1, TC) of
+        false ->
+            Offset = MMod:offset(MSt0),
+            MSt1 = MMod:call_only_or_schedule_next(MSt0, Label),
+            ?ASSERT_ALL_NATIVE_FREE(MSt1),
+            first_pass(Rest1, MMod, MSt1, State0#state{tail_cache = [{TailCacheKey, Offset} | TC]});
+        {TailCacheKey, Offset} ->
+            MSt1 = MMod:jump_to_offset(MSt0, Offset),
+            ?ASSERT_ALL_NATIVE_FREE(MSt1),
+            first_pass(Rest1, MMod, MSt1, State0)
+    end;
 % 62
 % Same implementation as OP_TRY, to confirm.
-first_pass(<<?OP_CATCH, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CATCH, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"catch/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0),
     {Label, Rest2} = decode_label(Rest1),
@@ -836,7 +945,8 @@ first_pass(<<?OP_CATCH, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 63
-first_pass(<<?OP_CATCH_END, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CATCH_END, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"catch_end/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0),
     ?TRACE("OP_CATCH_END ~p\n", [Dest]),
@@ -847,7 +957,8 @@ first_pass(<<?OP_CATCH_END, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest1, MMod, MSt5, State0);
 % 64
-first_pass(<<?OP_MOVE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_MOVE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"move/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Source, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1),
@@ -857,7 +968,8 @@ first_pass(<<?OP_MOVE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt4),
     first_pass(Rest2, MMod, MSt4, State0);
 % 65
-first_pass(<<?OP_GET_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GET_LIST, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_list/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, List, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, HeadDest, Rest2} = decode_dest(Rest1, MMod, MSt1),
@@ -873,7 +985,8 @@ first_pass(<<?OP_GET_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt10),
     first_pass(Rest3, MMod, MSt10, State0);
 % 66
-first_pass(<<?OP_GET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_tuple_element/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Source, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {Element, Rest2} = decode_literal(Rest1),
@@ -886,7 +999,8 @@ first_pass(<<?OP_GET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest3, MMod, MSt6, State0);
 % 67
-first_pass(<<?OP_SET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_SET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"set_tuple_element/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, NewElement, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Tuple, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0),
@@ -899,7 +1013,8 @@ first_pass(<<?OP_SET_TUPLE_ELEMENT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest3, MMod, MSt6, State0);
 % 69
-first_pass(<<?OP_PUT_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_PUT_LIST, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_list/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Head, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Tail, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0),
@@ -914,7 +1029,8 @@ first_pass(<<?OP_PUT_LIST, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt7),
     first_pass(Rest3, MMod, MSt7, State0);
 % 72
-first_pass(<<?OP_BADMATCH, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BADMATCH, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"badmatch/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     ?TRACE("OP_BADMATCH ~p\n", [Arg1]),
@@ -924,7 +1040,8 @@ first_pass(<<?OP_BADMATCH, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest1, MMod, MSt2, State0);
 % 73
-first_pass(<<?OP_IF_END, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IF_END, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"if_end/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_IF_END\n", []),
     MSt1 = MMod:call_primitive_last(MSt0, ?PRIM_RAISE_ERROR, [
@@ -933,7 +1050,8 @@ first_pass(<<?OP_IF_END, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
     first_pass(Rest0, MMod, MSt1, State0);
 % 74
-first_pass(<<?OP_CASE_END, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CASE_END, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"case_end/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     ?TRACE("OP_CASE_END ~p\n", [Arg1]),
@@ -943,7 +1061,8 @@ first_pass(<<?OP_CASE_END, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest1, MMod, MSt2, State0);
 % 75
-first_pass(<<?OP_CALL_FUN, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_FUN, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_fun/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {ArgsCount, Rest1} = decode_literal(Rest0),
     ?TRACE("OP_CALL_FUN ~p\n", [ArgsCount]),
@@ -956,7 +1075,8 @@ first_pass(<<?OP_CALL_FUN, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt4),
     first_pass(Rest1, MMod, MSt4, State0);
 % 77
-first_pass(<<?OP_IS_FUNCTION, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_FUNCTION, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_function/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -965,7 +1085,8 @@ first_pass(<<?OP_IS_FUNCTION, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 78
-first_pass(<<?OP_CALL_EXT_ONLY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_EXT_ONLY, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_ext_only/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Arity, Rest1} = decode_literal(Rest0),
     {Index, Rest2} = decode_literal(Rest1),
@@ -975,7 +1096,8 @@ first_pass(<<?OP_CALL_EXT_ONLY, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 96
-first_pass(<<?OP_FMOVE, ?COMPACT_EXTENDED_FP_REGISTER, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FMOVE, ?COMPACT_EXTENDED_FP_REGISTER, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmove/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FPRegIndex, Rest1} = decode_literal(Rest0),
     {MSt1, Dest, Rest2} = decode_dest(Rest1, MMod, MSt0),
@@ -985,7 +1107,8 @@ first_pass(<<?OP_FMOVE, ?COMPACT_EXTENDED_FP_REGISTER, Rest0/binary>>, MMod, MSt
     MSt4 = MMod:free_native_registers(MSt3, [ResultReg, Dest]),
     ?ASSERT_ALL_NATIVE_FREE(MSt4),
     first_pass(Rest2, MMod, MSt4, State0);
-first_pass(<<?OP_FMOVE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FMOVE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmove/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {FPReg, Rest2} = decode_fp_register(Rest1),
@@ -998,7 +1121,8 @@ first_pass(<<?OP_FMOVE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest2, MMod, MSt6, State0);
 % 97
-first_pass(<<?OP_FCONV, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FCONV, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fconv/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Reg} = MMod:move_to_native_register(MSt1, SrcValue),
@@ -1019,23 +1143,28 @@ first_pass(<<?OP_FCONV, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt8),
     first_pass(Rest2, MMod, MSt8, State0);
 % 98
-first_pass(<<?OP_FADD, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FADD, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fadd/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     first_pass_float3(?PRIM_FADD, Rest0, MMod, MSt0, State0);
 % 99
-first_pass(<<?OP_FSUB, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FSUB, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fsub/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     first_pass_float3(?PRIM_FSUB, Rest0, MMod, MSt0, State0);
 % 100
-first_pass(<<?OP_FMUL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FMUL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fmul/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     first_pass_float3(?PRIM_FMUL, Rest0, MMod, MSt0, State0);
 % 101
-first_pass(<<?OP_FDIV, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FDIV, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fdiv/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     first_pass_float3(?PRIM_FDIV, Rest0, MMod, MSt0, State0);
 % 102
-first_pass(<<?OP_FNEGATE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_FNEGATE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"fnegate/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Label, Rest1} = decode_label(Rest0),
     {{fp_reg, FPRegIndex1}, Rest2} = decode_fp_register(Rest1),
@@ -1048,7 +1177,8 @@ first_pass(<<?OP_FNEGATE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest3, MMod, MSt2, State0);
 % 104
-first_pass(<<?OP_TRY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TRY, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0),
     {Label, Rest2} = decode_label(Rest1),
@@ -1057,7 +1187,8 @@ first_pass(<<?OP_TRY, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 105
-first_pass(<<?OP_TRY_END, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TRY_END, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_end/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0),
     ?TRACE("OP_TRY_END ~p\n", [Dest]),
@@ -1066,7 +1197,8 @@ first_pass(<<?OP_TRY_END, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest1, MMod, MSt3, State0);
 % 106
-first_pass(<<?OP_TRY_CASE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TRY_CASE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_case/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0),
     ?TRACE("OP_TRY_CASE ~p\n", [Dest]),
@@ -1075,7 +1207,8 @@ first_pass(<<?OP_TRY_CASE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest1, MMod, MSt3, State0);
 % 107
-first_pass(<<?OP_TRY_CASE_END, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TRY_CASE_END, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"try_case_end/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     ?TRACE("OP_TRY_CASE_END ~p\n", [Arg1]),
@@ -1085,7 +1218,8 @@ first_pass(<<?OP_TRY_CASE_END, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest1, MMod, MSt2, State0);
 % 108
-first_pass(<<?OP_RAISE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RAISE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"raise/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Stacktrace, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, ExcValue, Rest2} = decode_compact_term(Rest1, MMod, MSt1, State0),
@@ -1096,7 +1230,8 @@ first_pass(<<?OP_RAISE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest2, MMod, MSt3, State0);
 % 112
-first_pass(<<?OP_APPLY, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_APPLY, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"apply/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Arity, Rest1} = decode_literal(Rest0),
     {MSt1, Module} = read_any_xreg(Arity, MMod, MSt0),
@@ -1111,7 +1246,8 @@ first_pass(<<?OP_APPLY, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest1, MMod, MSt6, State0);
 % 113
-first_pass(<<?OP_APPLY_LAST, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_APPLY_LAST, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"apply_last/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Arity, Rest1} = decode_literal(Rest0),
     {NWords, Rest2} = decode_literal(Rest1),
@@ -1129,7 +1265,8 @@ first_pass(<<?OP_APPLY_LAST, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt8),
     first_pass(Rest2, MMod, MSt8, State0);
 % 114
-first_pass(<<?OP_IS_BOOLEAN, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_BOOLEAN, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_boolean/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1142,7 +1279,8 @@ first_pass(<<?OP_IS_BOOLEAN, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt4),
     first_pass(Rest2, MMod, MSt4, State0);
 % 115
-first_pass(<<?OP_IS_FUNCTION2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_FUNCTION2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_function2/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1156,23 +1294,25 @@ first_pass(<<?OP_IS_FUNCTION2, Rest0/binary>>, MMod, MSt0, State0) ->
         MSt6,
         {IndexOrModuleReg, '&', ?TERM_IMMED2_TAG_MASK, '!=', ?TERM_IMMED2_ATOM},
         fun(BSt0) ->
-            BSt1 = MMod:shift_right(BSt0, IndexOrModuleReg, 4),
+            {BSt1, IndexReg} = MMod:shift_right(BSt0, {free, IndexOrModuleReg}, 4),
             {BSt2, FunArity} = MMod:call_primitive(BSt1, ?PRIM_MODULE_GET_FUN_ARITY, [
-                ModuleReg, IndexOrModuleReg
+                ModuleReg, {free, IndexReg}
             ]),
             cond_jump_to_label({'(int)', {free, FunArity}, '!=', Arity}, Label, MMod, BSt2)
         end,
         fun(BSt0) ->
-            {BSt1, FunArity} = MMod:get_array_element(BSt0, FuncPtr, 3),
-            BSt2 = MMod:shift_right(BSt1, FunArity, 4),
-            cond_jump_to_label({'(int)', {free, FunArity}, '!=', Arity}, Label, MMod, BSt2)
+            BSt1 = MMod:free_native_registers(BSt0, [IndexOrModuleReg]),
+            {BSt2, FunArity} = MMod:get_array_element(BSt1, FuncPtr, 3),
+            {BSt3, FunArityReg} = MMod:shift_right(BSt2, {free, FunArity}, 4),
+            cond_jump_to_label({'(int)', {free, FunArityReg}, '!=', Arity}, Label, MMod, BSt3)
         end
     ),
-    MSt8 = MMod:free_native_registers(MSt7, [FuncPtr, IndexOrModuleReg, ModuleReg, Arity]),
+    MSt8 = MMod:free_native_registers(MSt7, [FuncPtr, ModuleReg, Arity]),
     ?ASSERT_ALL_NATIVE_FREE(MSt8),
     first_pass(Rest3, MMod, MSt8, State0);
 % 117
-first_pass(<<?OP_BS_GET_INTEGER2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_INTEGER2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_integer2/7">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0),
@@ -1210,7 +1350,8 @@ first_pass(<<?OP_BS_GET_INTEGER2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt18),
     first_pass(Rest7, MMod, MSt18, State0);
 % 118
-first_pass(<<?OP_BS_GET_FLOAT2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_FLOAT2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_float2/7">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0),
@@ -1247,7 +1388,8 @@ first_pass(<<?OP_BS_GET_FLOAT2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt17),
     first_pass(Rest7, MMod, MSt17, State0);
 % 119
-first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_binary2/7">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0),
@@ -1257,7 +1399,7 @@ first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt0, State0) ->
     {FlagsValue, Rest6} = decode_literal(Rest5),
     {MSt3, MatchStateRegPtr} = verify_is_match_state_and_get_ptr(MMod, MSt2, Src),
     {MSt4, BSBinaryReg0} = MMod:get_array_element(MSt3, MatchStateRegPtr, 1),
-    {MSt5, BSOffsetReg} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2),
+    {MSt5, BSOffsetReg0} = MMod:get_array_element(MSt4, MatchStateRegPtr, 2),
     MSt6 =
         if
             Unit =/= 8 ->
@@ -1271,22 +1413,22 @@ first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt0, State0) ->
             true ->
                 MSt5
         end,
-    MSt7 = MMod:if_block(MSt6, {BSOffsetReg, '&', 16#7, '!=', 0}, fun(BlockSt) ->
+    MSt7 = MMod:if_block(MSt6, {BSOffsetReg0, '&', 16#7, '!=', 0}, fun(BlockSt) ->
         MMod:call_primitive_last(BlockSt, ?PRIM_RAISE_ERROR, [ctx, jit_state, offset, ?BADARG_ATOM])
     end),
-    MSt8 = MMod:shift_right(MSt7, BSOffsetReg, 3),
+    {MSt8, BSOffsetReg1} = MMod:shift_right(MSt7, {free, BSOffsetReg0}, 3),
     MSt9 = MMod:and_(MSt8, BSBinaryReg0, ?TERM_PRIMARY_CLEAR_MASK),
     {MSt10, SizeReg} = MMod:get_array_element(MSt9, {free, BSBinaryReg0}, 1),
     {MSt13, SizeValue} =
         if
             Size =:= ?ALL_ATOM ->
-                MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg),
+                MSt11 = MMod:sub(MSt10, SizeReg, BSOffsetReg1),
                 {MSt11, SizeReg};
             is_integer(Size) ->
                 % SizeReg is binary size
                 % SizeVal is a constant
                 MSt11 = MMod:sub(MSt10, SizeReg, Size bsl 4),
-                MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg}, Fail, MMod, MSt11),
+                MSt12 = cond_jump_to_label({{free, SizeReg}, '<', BSOffsetReg1}, Fail, MMod, MSt11),
                 {MSt12, Size bsl 4};
             true ->
                 {MSt11, SizeValReg} = MMod:move_to_native_register(MSt10, Size),
@@ -1294,20 +1436,20 @@ first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt0, State0) ->
                     MSt11,
                     {SizeValReg, '==', ?ALL_ATOM},
                     fun(BSt0) ->
-                        BSt1 = MMod:sub(BSt0, SizeReg, BSOffsetReg),
+                        BSt1 = MMod:sub(BSt0, SizeReg, BSOffsetReg1),
                         MMod:free_native_registers(BSt1, [SizeValReg])
                     end,
                     fun(BSt0) ->
                         {BSt1, SizeValReg} = term_to_int(SizeValReg, 0, MMod, BSt0),
                         BSt2 = MMod:sub(BSt1, SizeReg, SizeValReg),
-                        BSt3 = cond_jump_to_label({SizeReg, '<', BSOffsetReg}, Fail, MMod, BSt2),
+                        BSt3 = cond_jump_to_label({SizeReg, '<', BSOffsetReg1}, Fail, MMod, BSt2),
                         BSt4 = MMod:move_to_native_register(BSt3, SizeValReg, SizeReg),
                         MMod:free_native_registers(BSt4, [SizeValReg])
                     end
                 ),
                 {MSt12, SizeReg}
         end,
-    {MSt14, NewOffsetReg} = MMod:copy_to_native_register(MSt13, BSOffsetReg),
+    {MSt14, NewOffsetReg} = MMod:copy_to_native_register(MSt13, BSOffsetReg1),
     MSt15 = MMod:add(MSt14, NewOffsetReg, SizeValue),
     MSt16 = MMod:shift_left(MSt15, NewOffsetReg, 3),
     % Write new offset
@@ -1324,7 +1466,7 @@ first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt0, State0) ->
         BSBinaryReg1, Live, {free, HeapSizeReg}, MMod, MSt23
     ),
     {MSt25, ResultTerm} = MMod:call_primitive(MSt24, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [
-        ctx, {free, BSBinaryReg2}, {free, BSOffsetReg}, {free, SizeValue}
+        ctx, {free, BSBinaryReg2}, {free, BSOffsetReg1}, {free, SizeValue}
     ]),
     {MSt26, Dest, Rest7} = decode_dest(Rest6, MMod, MSt25),
     ?TRACE("OP_BS_GET_BINARY2 ~p,~p,~p,~p,~p,~p,~p\n", [
@@ -1335,7 +1477,8 @@ first_pass(<<?OP_BS_GET_BINARY2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt28),
     first_pass(Rest7, MMod, MSt28, State0);
 % 120
-first_pass(<<?OP_BS_SKIP_BITS2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_SKIP_BITS2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_bits2/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0),
@@ -1365,7 +1508,8 @@ first_pass(<<?OP_BS_SKIP_BITS2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt15),
     first_pass(Rest5, MMod, MSt15, State0);
 % 121
-first_pass(<<?OP_BS_TEST_TAIL2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_TEST_TAIL2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_test_tail2/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0),
@@ -1383,7 +1527,8 @@ first_pass(<<?OP_BS_TEST_TAIL2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt10),
     first_pass(Rest3, MMod, MSt10, State0);
 % 124
-first_pass(<<?OP_GC_BIF1, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GC_BIF1, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif1/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FailLabel, Rest1} = decode_label(Rest0),
     {Live, Rest2} = decode_literal(Rest1),
@@ -1408,7 +1553,8 @@ first_pass(<<?OP_GC_BIF1, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt7),
     first_pass(Rest5, MMod, MSt7, State0);
 % 125
-first_pass(<<?OP_GC_BIF2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GC_BIF2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif2/6">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FailLabel, Rest1} = decode_label(Rest0),
     {Live, Rest2} = decode_literal(Rest1),
@@ -1434,7 +1580,8 @@ first_pass(<<?OP_GC_BIF2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt8),
     first_pass(Rest6, MMod, MSt8, State0);
 % 129
-first_pass(<<?OP_IS_BITSTR, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_BITSTR, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_bitstr/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1460,7 +1607,8 @@ first_pass(<<?OP_IS_BITSTR, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt8),
     first_pass(Rest2, MMod, MSt8, State0);
 % 132
-first_pass(<<?OP_BS_MATCH_STRING, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_MATCH_STRING, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_match_string/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt0, State0),
@@ -1480,7 +1628,8 @@ first_pass(<<?OP_BS_MATCH_STRING, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt9),
     first_pass(Rest4, MMod, MSt9, State0);
 % 133
-first_pass(<<?OP_BS_INIT_WRITABLE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_INIT_WRITABLE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_init_writable/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     ?TRACE("OP_BS_INIT_WRITABLE\n", []),
     HeapSize = term_binary_heap_size(0, MMod),
@@ -1497,7 +1646,8 @@ first_pass(<<?OP_BS_INIT_WRITABLE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest0, MMod, MSt6, State0);
 % 136
-first_pass(<<?OP_TRIM, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_TRIM, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"trim/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {NWords, Rest1} = decode_literal(Rest0),
     {_NRemaining, Rest2} = decode_literal(Rest1),
@@ -1506,7 +1656,8 @@ first_pass(<<?OP_TRIM, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
     first_pass(Rest2, MMod, MSt1, State0);
 % 138
-first_pass(<<?OP_BS_GET_UTF8, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_UTF8, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf8/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1521,7 +1672,8 @@ first_pass(<<?OP_BS_GET_UTF8, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest5, MMod, MSt6, State0);
 % 139
-first_pass(<<?OP_BS_SKIP_UTF8, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_SKIP_UTF8, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf8/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1533,7 +1685,8 @@ first_pass(<<?OP_BS_SKIP_UTF8, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest4, MMod, MSt3, State0);
 % 140
-first_pass(<<?OP_BS_GET_UTF16, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_UTF16, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf16/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1550,7 +1703,8 @@ first_pass(<<?OP_BS_GET_UTF16, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest5, MMod, MSt6, State0);
 % 141
-first_pass(<<?OP_BS_SKIP_UTF16, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_SKIP_UTF16, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf16/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1564,7 +1718,8 @@ first_pass(<<?OP_BS_SKIP_UTF16, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest4, MMod, MSt3, State0);
 % 142
-first_pass(<<?OP_BS_GET_UTF32, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_UTF32, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_utf32/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1581,7 +1736,8 @@ first_pass(<<?OP_BS_GET_UTF32, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest5, MMod, MSt6, State0);
 % 143
-first_pass(<<?OP_BS_SKIP_UTF32, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_SKIP_UTF32, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_skip_utf32/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1595,7 +1751,8 @@ first_pass(<<?OP_BS_SKIP_UTF32, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest4, MMod, MSt3, State0);
 % 152
-first_pass(<<?OP_GC_BIF3, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GC_BIF3, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"gc_bif3/7">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FailLabel, Rest1} = decode_label(Rest0),
     {Live, Rest2} = decode_literal(Rest1),
@@ -1630,12 +1787,14 @@ first_pass(
 ) ->
     {Line, Rest1} = decode_literal(Rest0),
     ?TRACE("OP_LINE ~p\n", [Line]),
-    Offset = MMod:offset(MSt),
-    first_pass(Rest1, MMod, MSt, State0#state{
+    MSt0 = ?DWARF_LINE(MMod, MSt, Line),
+    Offset = MMod:offset(MSt0),
+    first_pass(Rest1, MMod, MSt0, State0#state{
         line_offsets = [{Line, Offset} | AccLines]
     });
 % 154
-first_pass(<<?OP_PUT_MAP_ASSOC, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_PUT_MAP_ASSOC, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_map_assoc/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Label, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1716,7 +1875,8 @@ first_pass(<<?OP_PUT_MAP_ASSOC, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt18),
     first_pass(Rest6, MMod, MSt18, State0);
 % 155
-first_pass(<<?OP_PUT_MAP_EXACT, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_PUT_MAP_EXACT, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_map_exact/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {_Label, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1790,7 +1950,8 @@ first_pass(<<?OP_PUT_MAP_EXACT, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt14),
     first_pass(Rest6, MMod, MSt14, State0);
 % 156
-first_pass(<<?OP_IS_MAP, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_IS_MAP, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_map/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1799,7 +1960,8 @@ first_pass(<<?OP_IS_MAP, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest2, MMod, MSt2, State0);
 % 157
-first_pass(<<?OP_HAS_MAP_FIELDS, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_HAS_MAP_FIELDS, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"has_map_fields/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1845,7 +2007,8 @@ first_pass(<<?OP_HAS_MAP_FIELDS, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt7),
     first_pass(Rest5, MMod, MSt7, State0);
 % 158
-first_pass(<<?OP_GET_MAP_ELEMENTS, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GET_MAP_ELEMENTS, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_map_elements/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1902,8 +2065,9 @@ first_pass(<<?OP_GET_MAP_ELEMENTS, Rest0/binary>>, MMod, MSt0, State0) ->
     first_pass(Rest6, MMod, MSt14, State0);
 % 159
 first_pass(
-    <<?OP_IS_TAGGED_TUPLE, Rest0/binary>>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0
+    <<?OP_IS_TAGGED_TUPLE, Rest0/binary>>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0
 ) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"is_tagged_tuple/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Label, Rest1} = decode_label(Rest0),
     {MSt1, Arg1, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -1915,13 +2079,13 @@ first_pass(
         {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, MMod, MSt2
     ),
     MSt4 = MMod:and_(MSt3, Reg, ?TERM_PRIMARY_CLEAR_MASK),
-    {MSt5, TagReg} = MMod:get_array_element(MSt4, Reg, 0),
+    {MSt5, TagReg0} = MMod:get_array_element(MSt4, Reg, 0),
     MSt6 = cond_jump_to_label(
-        {TagReg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5
+        {TagReg0, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_TUPLE}, Label, MMod, MSt5
     ),
-    MSt7 = MMod:shift_right(MSt6, TagReg, 6),
-    MSt8 = cond_jump_to_label({TagReg, '!=', Arity}, Label, MMod, MSt7),
-    MSt9 = MMod:free_native_registers(MSt8, [TagReg]),
+    {MSt7, TagReg1} = MMod:shift_right(MSt6, {free, TagReg0}, 6),
+    MSt8 = cond_jump_to_label({TagReg1, '!=', Arity}, Label, MMod, MSt7),
+    MSt9 = MMod:free_native_registers(MSt8, [TagReg1]),
     MSt10 = MMod:move_array_element(MSt9, Reg, 1, Reg),
     {MSt11, AtomReg} =
         case maps:find(AtomResolver(AtomIndex), ?DEFAULT_ATOMS) of
@@ -1938,7 +2102,8 @@ first_pass(
     ?ASSERT_ALL_NATIVE_FREE(MSt14),
     first_pass(Rest4, MMod, MSt14, State0);
 % 160
-first_pass(<<?OP_BUILD_STACKTRACE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BUILD_STACKTRACE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"build_stacktrace/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, ResultReg} = MMod:call_primitive(MSt0, ?PRIM_STACKTRACE_BUILD, [ctx]),
     MSt2 = MMod:move_to_vm_register(MSt1, ResultReg, {x_reg, 0}),
@@ -1946,7 +2111,8 @@ first_pass(<<?OP_BUILD_STACKTRACE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest0, MMod, MSt3, State0);
 % 161
-first_pass(<<?OP_RAW_RAISE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RAW_RAISE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"raw_raise/0">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, ExClassReg} = MMod:move_to_native_register(MSt0, {x_reg, 0}),
     MSt2 = MMod:if_block(MSt1, {ExClassReg, '==', ?ERROR_ATOM}, fun(BSt0) ->
@@ -1962,7 +2128,8 @@ first_pass(<<?OP_RAW_RAISE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest0, MMod, MSt5, State0);
 % 162
-first_pass(<<?OP_GET_HD, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GET_HD, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_hd/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1),
@@ -1974,7 +2141,8 @@ first_pass(<<?OP_GET_HD, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest3, MMod, MSt6, State0);
 % 163
-first_pass(<<?OP_GET_TL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_GET_TL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"get_tl/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, SrcValue, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Dest, Rest3} = decode_dest(Rest1, MMod, MSt1),
@@ -1986,7 +2154,8 @@ first_pass(<<?OP_GET_TL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest3, MMod, MSt6, State0);
 % 164
-first_pass(<<?OP_PUT_TUPLE2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_PUT_TUPLE2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"put_tuple2/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0),
     {ListSize, Rest2} = decode_extended_list_header(Rest1),
@@ -2011,7 +2180,8 @@ first_pass(<<?OP_PUT_TUPLE2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt7),
     first_pass(Rest3, MMod, MSt7, State0);
 % 165
-first_pass(<<?OP_BS_GET_TAIL, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_TAIL, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_tail/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1),
@@ -2034,7 +2204,8 @@ first_pass(<<?OP_BS_GET_TAIL, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt14),
     first_pass(Rest3, MMod, MSt14, State0);
 % 166
-first_pass(<<?OP_BS_START_MATCH3, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_START_MATCH3, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_start_match3/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, Src, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -2047,7 +2218,8 @@ first_pass(<<?OP_BS_START_MATCH3, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest4, MMod, MSt5, State0);
 % 167
-first_pass(<<?OP_BS_GET_POSITION, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_GET_POSITION, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_get_position/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Src, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Dest, Rest2} = decode_dest(Rest1, MMod, MSt1),
@@ -2063,7 +2235,8 @@ first_pass(<<?OP_BS_GET_POSITION, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt9),
     first_pass(Rest3, MMod, MSt9, State0);
 % 168
-first_pass(<<?OP_BS_SET_POSITION, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_SET_POSITION, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_set_position/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Src, Rest1} = decode_typed_compact_term(Rest0, MMod, MSt0, State0),
     {MSt2, Pos, Rest2} = decode_typed_compact_term(Rest1, MMod, MSt1, State0),
@@ -2075,7 +2248,8 @@ first_pass(<<?OP_BS_SET_POSITION, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest2, MMod, MSt6, State0);
 % 169
-first_pass(<<?OP_SWAP, Rest0/binary>>, MMod, MSt0, State) ->
+first_pass(<<?OP_SWAP, Rest0/binary>>, MMod, MSt, State) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"swap/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, ArgA, Rest1} = decode_dest(Rest0, MMod, MSt0),
     {MSt2, ArgB, Rest2} = decode_dest(Rest1, MMod, MSt1),
@@ -2087,7 +2261,8 @@ first_pass(<<?OP_SWAP, Rest0/binary>>, MMod, MSt0, State) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest2, MMod, MSt6, State);
 % 170
-first_pass(<<?OP_BS_START_MATCH4, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_START_MATCH4, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_start_match4/4">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_atom_or_label(Rest0, State0),
     {Live, Rest2} = decode_literal(Rest1),
@@ -2108,7 +2283,8 @@ first_pass(<<?OP_BS_START_MATCH4, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt5),
     first_pass(Rest4, MMod, MSt5, State0);
 % 171
-first_pass(<<?OP_MAKE_FUN3, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_MAKE_FUN3, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"make_fun3/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {FunIndex, Rest1} = decode_literal(Rest0),
     {MSt1, Dest, Rest2} = decode_dest(Rest1, MMod, MSt0),
@@ -2136,7 +2312,8 @@ first_pass(<<?OP_MAKE_FUN3, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt7),
     first_pass(Rest4, MMod, MSt7, State0);
 % 172
-first_pass(<<?OP_INIT_YREGS, Rest0/binary>>, MMod, MSt0, State) ->
+first_pass(<<?OP_INIT_YREGS, Rest0/binary>>, MMod, MSt, State) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"init_yregs/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {ListSize, Rest1} = decode_extended_list_header(Rest0),
     ?TRACE("OP_INIT_YREGS ~p\n", [ListSize]),
@@ -2153,7 +2330,8 @@ first_pass(<<?OP_INIT_YREGS, Rest0/binary>>, MMod, MSt0, State) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt1),
     first_pass(Rest2, MMod, MSt1, State);
 % 173
-first_pass(<<?OP_RECV_MARKER_BIND, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RECV_MARKER_BIND, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_bind/2">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0),
     {MSt2, RegB, Rest2} = decode_dest(Rest1, MMod, MSt1),
@@ -2162,7 +2340,8 @@ first_pass(<<?OP_RECV_MARKER_BIND, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest2, MMod, MSt3, State0);
 % 174
-first_pass(<<?OP_RECV_MARKER_CLEAR, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RECV_MARKER_CLEAR, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_clear/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0),
     ?TRACE("OP_RECV_MARKER_CLEAR ~p\n", [RegA]),
@@ -2170,7 +2349,8 @@ first_pass(<<?OP_RECV_MARKER_CLEAR, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt2),
     first_pass(Rest1, MMod, MSt2, State0);
 % 175
-first_pass(<<?OP_RECV_MARKER_RESERVE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RECV_MARKER_RESERVE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_reserve/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Dest, Rest1} = decode_dest(Rest0, MMod, MSt0),
     ?TRACE("OP_RECV_MARKER_RESERVE ~p\n", [Dest]),
@@ -2180,7 +2360,8 @@ first_pass(<<?OP_RECV_MARKER_RESERVE, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt3),
     first_pass(Rest1, MMod, MSt3, State0);
 % 176
-first_pass(<<?OP_RECV_MARKER_USE, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_RECV_MARKER_USE, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"recv_marker_use/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, RegA, Rest1} = decode_dest(Rest0, MMod, MSt0),
     ?TRACE("OP_RECV_MARKER_USE ~p\n", [RegA]),
@@ -2189,8 +2370,9 @@ first_pass(<<?OP_RECV_MARKER_USE, Rest0/binary>>, MMod, MSt0, State0) ->
     first_pass(Rest1, MMod, MSt2, State0);
 % 177
 first_pass(
-    <<?OP_BS_CREATE_BIN, Rest0/binary>>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0
+    <<?OP_BS_CREATE_BIN, Rest0/binary>>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0
 ) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_create_bin/6">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {Alloc, Rest2} = decode_allocator_list(MMod, Rest1),
@@ -2262,15 +2444,19 @@ first_pass(
                 {MSt7, (BinaryTotalSize div 8),
                     term_binary_heap_size((BinaryTotalSize div 8), MMod) + Alloc};
             true ->
-                MSt8 = MMod:shift_right(MSt7, BinaryTotalSize, 3),
-                {MSt9, BinaryTotalSize0} = MMod:copy_to_native_register(MSt8, BinaryTotalSize),
-                {MSt10, AllocSizeReg} = term_binary_heap_size({free, BinaryTotalSize0}, MMod, MSt9),
+                {MSt8, BinaryTotalSizeBytes} = MMod:shift_right(MSt7, {free, BinaryTotalSize}, 3),
+                {MSt9, BinaryTotalSizeBytes0} = MMod:copy_to_native_register(
+                    MSt8, BinaryTotalSizeBytes
+                ),
+                {MSt10, AllocSizeReg} = term_binary_heap_size(
+                    {free, BinaryTotalSizeBytes0}, MMod, MSt9
+                ),
                 case Alloc of
                     0 ->
-                        {MSt10, BinaryTotalSize, AllocSizeReg};
+                        {MSt10, BinaryTotalSizeBytes, AllocSizeReg};
                     _ ->
                         MSt11 = MMod:add(MSt10, AllocSizeReg, Alloc),
-                        {MSt11, BinaryTotalSize, AllocSizeReg}
+                        {MSt11, BinaryTotalSizeBytes, AllocSizeReg}
                 end
         end,
     {MSt13, MemoryEnsureFreeReg} = MMod:call_primitive(
@@ -2318,7 +2504,8 @@ first_pass(
     ?ASSERT_ALL_NATIVE_FREE(MSt19),
     first_pass(Rest7, MMod, MSt19, State1);
 % 178
-first_pass(<<?OP_CALL_FUN2, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_CALL_FUN2, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"call_fun2/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Tag, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     {ArgsCount, Rest2} = decode_literal(Rest1),
@@ -2334,7 +2521,8 @@ first_pass(<<?OP_CALL_FUN2, Rest0/binary>>, MMod, MSt0, State0) ->
     ?ASSERT_ALL_NATIVE_FREE(MSt6),
     first_pass(Rest3, MMod, MSt6, State0);
 % 180
-first_pass(<<?OP_BADRECORD, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BADRECORD, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"badrecord/1">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {MSt1, Arg1, Rest1} = decode_compact_term(Rest0, MMod, MSt0, State0),
     ?TRACE("OP_BADRECORD ~p\n", [Arg1]),
@@ -2345,8 +2533,9 @@ first_pass(<<?OP_BADRECORD, Rest0/binary>>, MMod, MSt0, State0) ->
     first_pass(Rest1, MMod, MSt2, State0);
 % 181
 first_pass(
-    <<?OP_UPDATE_RECORD, Rest0/binary>>, MMod, MSt0, #state{atom_resolver = AtomResolver} = State0
+    <<?OP_UPDATE_RECORD, Rest0/binary>>, MMod, MSt, #state{atom_resolver = AtomResolver} = State0
 ) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"update_record/5">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {HintAtomIndex, Rest1} = decode_atom(Rest0),
     Hint = AtomResolver(HintAtomIndex),
@@ -2422,7 +2611,8 @@ first_pass(
     ?ASSERT_ALL_NATIVE_FREE(MSt11),
     first_pass(Rest6, MMod, MSt11, State0);
 % 182
-first_pass(<<?OP_BS_MATCH, Rest0/binary>>, MMod, MSt0, State0) ->
+first_pass(<<?OP_BS_MATCH, Rest0/binary>>, MMod, MSt, State0) ->
+    MSt0 = ?DWARF_OPCODE(MMod, MSt, <<"bs_match/3">>),
     ?ASSERT_ALL_NATIVE_FREE(MSt0),
     {Fail, Rest1} = decode_label(Rest0),
     {MSt1, MatchState, Rest2} = decode_compact_term(Rest1, MMod, MSt0, State0),
@@ -2879,33 +3069,32 @@ first_pass_bs_match_binary(
                 ])
         end,
     MatchedBytes = MatchedBits div 8,
-    {MSt2, BSOffseBytesReg} = MMod:copy_to_native_register(MSt1, BSOffsetReg),
-    MSt3 = MMod:shift_right(MSt2, BSOffseBytesReg, 3),
-    {MSt4, RemainingBytesReg} = MMod:get_array_element(MSt3, BSBinaryReg, 1),
-    MSt5 = MMod:sub(MSt4, RemainingBytesReg, BSOffseBytesReg),
-    MSt6 = cond_jump_to_label({RemainingBytesReg, '<', MatchedBytes}, Fail, MMod, MSt5),
-    MSt7 = MMod:free_native_registers(MSt6, [RemainingBytesReg]),
-    {MSt8, HeapSizeReg} = MMod:call_primitive(MSt7, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [
+    {MSt2, BSOffseBytesReg} = MMod:shift_right(MSt1, BSOffsetReg, 3),
+    {MSt3, RemainingBytesReg} = MMod:get_array_element(MSt2, BSBinaryReg, 1),
+    MSt4 = MMod:sub(MSt3, RemainingBytesReg, BSOffseBytesReg),
+    MSt5 = cond_jump_to_label({RemainingBytesReg, '<', MatchedBytes}, Fail, MMod, MSt4),
+    MSt6 = MMod:free_native_registers(MSt5, [RemainingBytesReg]),
+    {MSt7, HeapSizeReg} = MMod:call_primitive(MSt6, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [
         BSBinaryReg, MatchedBytes
     ]),
-    {MSt9, NewMatchState} = memory_ensure_free_with_extra_root(
-        MatchState, Live, {free, HeapSizeReg}, MMod, MSt8
+    {MSt8, NewMatchState} = memory_ensure_free_with_extra_root(
+        MatchState, Live, {free, HeapSizeReg}, MMod, MSt7
     ),
     % Restore BSBinaryReg as it may have been gc'd as well
-    {MSt10, MatchStateReg0} = MMod:copy_to_native_register(MSt9, NewMatchState),
-    MSt11 = MMod:and_(MSt10, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
-    MSt12 = MMod:move_array_element(MSt11, MatchStateReg0, 1, BSBinaryReg),
-    MSt13 = MMod:free_native_registers(MSt12, [MatchStateReg0]),
-    {MSt14, ResultTerm} = MMod:call_primitive(MSt13, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [
+    {MSt9, MatchStateReg0} = MMod:copy_to_native_register(MSt8, NewMatchState),
+    MSt10 = MMod:and_(MSt9, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
+    MSt11 = MMod:move_array_element(MSt10, MatchStateReg0, 1, BSBinaryReg),
+    MSt12 = MMod:free_native_registers(MSt11, [MatchStateReg0]),
+    {MSt13, ResultTerm} = MMod:call_primitive(MSt12, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [
         ctx, BSBinaryReg, {free, BSOffseBytesReg}, MatchedBytes
     ]),
-    MSt15 = MMod:and_(MSt14, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
-    {MSt16, Dest, Rest5} = decode_dest(Rest4, MMod, MSt15),
+    MSt14 = MMod:and_(MSt13, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt15, Dest, Rest5} = decode_dest(Rest4, MMod, MSt14),
     ?TRACE("~p},", [Dest]),
-    MSt17 = MMod:move_to_vm_register(MSt16, ResultTerm, Dest),
-    MSt18 = MMod:free_native_registers(MSt17, [ResultTerm]),
-    MSt19 = MMod:add(MSt18, BSOffsetReg, MatchedBits),
-    {J0 - 5, Rest5, NewMatchState, BSOffsetReg, MSt19}.
+    MSt16 = MMod:move_to_vm_register(MSt15, ResultTerm, Dest),
+    MSt17 = MMod:free_native_registers(MSt16, [ResultTerm]),
+    MSt18 = MMod:add(MSt17, BSOffsetReg, MatchedBits),
+    {J0 - 5, Rest5, NewMatchState, BSOffsetReg, MSt18}.
 
 first_pass_bs_match_get_tail(MatchState, BSBinaryReg, BSOffsetReg, J0, Rest0, MMod, MSt0) ->
     {Live, Rest1} = decode_literal(Rest0),
@@ -2925,32 +3114,31 @@ do_get_tail(
     MatchState, Live, BSOffsetReg, BSBinaryReg, MMod, MSt0
 ) ->
     MSt1 = cond_raise_badarg({BSOffsetReg, '&', 2#111, '!=', 0}, MMod, MSt0),
-    {MSt2, BSOffseBytesReg} = MMod:copy_to_native_register(MSt1, BSOffsetReg),
-    MSt3 = MMod:shift_right(MSt2, BSOffseBytesReg, 3),
-    {MSt4, TailBytesReg0} = MMod:get_array_element(MSt3, BSBinaryReg, 1),
-    MSt5 = MMod:sub(MSt4, TailBytesReg0, BSOffseBytesReg),
-    {MSt6, HeapSizeReg} = MMod:call_primitive(MSt5, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [
+    {MSt2, BSOffseBytesReg} = MMod:shift_right(MSt1, BSOffsetReg, 3),
+    {MSt3, TailBytesReg0} = MMod:get_array_element(MSt2, BSBinaryReg, 1),
+    MSt4 = MMod:sub(MSt3, TailBytesReg0, BSOffseBytesReg),
+    {MSt5, HeapSizeReg} = MMod:call_primitive(MSt4, ?PRIM_TERM_SUB_BINARY_HEAP_SIZE, [
         BSBinaryReg, {free, TailBytesReg0}
     ]),
-    {MSt7, NewMatchState} = memory_ensure_free_with_extra_root(
-        MatchState, Live, {free, HeapSizeReg}, MMod, MSt6
+    {MSt6, NewMatchState} = memory_ensure_free_with_extra_root(
+        MatchState, Live, {free, HeapSizeReg}, MMod, MSt5
     ),
     % Restore BSBinaryReg as it may have been gc'd as well
-    {MSt8, MatchStateReg0} = MMod:copy_to_native_register(MSt7, NewMatchState),
-    MSt9 = MMod:and_(MSt8, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
-    MSt10 = MMod:move_array_element(MSt9, MatchStateReg0, 1, BSBinaryReg),
-    MSt11 = MMod:free_native_registers(MSt10, [MatchStateReg0]),
-    MSt12 = MMod:and_(MSt11, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
-    {MSt13, TailBytesReg1} = MMod:get_array_element(MSt12, BSBinaryReg, 1),
-    MSt14 = MMod:sub(MSt13, TailBytesReg0, BSOffseBytesReg),
-    MSt15 = MMod:add(MSt14, BSBinaryReg, ?TERM_PRIMARY_BOXED),
-    {MSt16, ResultTerm} = MMod:call_primitive(MSt15, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [
+    {MSt7, MatchStateReg0} = MMod:copy_to_native_register(MSt6, NewMatchState),
+    MSt8 = MMod:and_(MSt7, MatchStateReg0, ?TERM_PRIMARY_CLEAR_MASK),
+    MSt9 = MMod:move_array_element(MSt8, MatchStateReg0, 1, BSBinaryReg),
+    MSt10 = MMod:free_native_registers(MSt9, [MatchStateReg0]),
+    MSt11 = MMod:and_(MSt10, BSBinaryReg, ?TERM_PRIMARY_CLEAR_MASK),
+    {MSt12, TailBytesReg1} = MMod:get_array_element(MSt11, BSBinaryReg, 1),
+    MSt13 = MMod:sub(MSt12, TailBytesReg0, BSOffseBytesReg),
+    MSt14 = MMod:add(MSt13, BSBinaryReg, ?TERM_PRIMARY_BOXED),
+    {MSt15, ResultTerm} = MMod:call_primitive(MSt14, ?PRIM_TERM_MAYBE_CREATE_SUB_BINARY, [
         ctx, BSBinaryReg, {free, BSOffseBytesReg}, TailBytesReg1
     ]),
-    MSt17 = MMod:shift_left(MSt16, TailBytesReg1, 3),
-    MSt18 = MMod:add(MSt17, BSOffsetReg, TailBytesReg1),
-    MSt19 = MMod:free_native_registers(MSt18, [TailBytesReg1]),
-    {MSt19, ResultTerm, NewMatchState}.
+    MSt16 = MMod:shift_left(MSt15, TailBytesReg1, 3),
+    MSt17 = MMod:add(MSt16, BSOffsetReg, TailBytesReg1),
+    MSt18 = MMod:free_native_registers(MSt17, [TailBytesReg1]),
+    {MSt18, ResultTerm, NewMatchState}.
 
 first_pass_bs_match_equal_colon_equal(
     Fail, MatchState, BSBinaryReg, BSOffsetReg, J0, Rest0, MMod, MSt0
@@ -2978,9 +3166,8 @@ first_pass_bs_match_equal_colon_equal(
                 {MSt5, IntValue} = MMod:get_array_element(MSt4, {free, Result}, 1),
                 cond_jump_to_label({{free, IntValue}, '!=', PatternValue}, Fail, MMod, MSt5);
             _ ->
-                MSt4 = MMod:shift_right(MSt3, Result, 4),
-                MSt5 = cond_jump_to_label({Result, '!=', PatternValue}, Fail, MMod, MSt4),
-                MMod:free_native_registers(MSt5, [Result])
+                {MSt4, ResultInt} = MMod:shift_right(MSt3, {free, Result}, 4),
+                cond_jump_to_label({{free, ResultInt}, '!=', PatternValue}, Fail, MMod, MSt4)
         end,
     MSt7 = MMod:add(MSt6, BSOffsetReg, Size),
     {J0 - 3, Rest3, MatchState, BSOffsetReg, MSt7}.
@@ -3221,8 +3408,8 @@ term_to_int({literal, Val}, _FailLabel, _MMod, MSt0) when is_integer(Val) ->
 % Optimized case: when we have type information showing this is an integer, skip the type check
 term_to_int({typed, Term, {t_integer, _Range}}, _FailLabel, MMod, MSt0) ->
     {MSt1, Reg} = MMod:move_to_native_register(MSt0, Term),
-    MSt2 = MMod:shift_right(MSt1, Reg, 4),
-    {MSt2, Reg};
+    {MSt2, IntReg} = MMod:shift_right(MSt1, {free, Reg}, 4),
+    {MSt2, IntReg};
 term_to_int({typed, Term, _NonIntegerType}, FailLabel, MMod, MSt0) ->
     % Type information shows it's not an integer, fall back to generic path
     term_to_int(Term, FailLabel, MMod, MSt0);
@@ -3231,8 +3418,8 @@ term_to_int(Term, FailLabel, MMod, MSt0) ->
     MSt2 = cond_raise_badarg_or_jump_to_fail_label(
         {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, FailLabel, MMod, MSt1
     ),
-    MSt3 = MMod:shift_right(MSt2, Reg, 4),
-    {MSt3, Reg}.
+    {MSt3, IntReg} = MMod:shift_right(MSt2, {free, Reg}, 4),
+    {MSt3, IntReg}.
 
 first_pass_float3(Primitive, Rest0, MMod, MSt0, State0) ->
     {Label, Rest1} = decode_label(Rest0),
@@ -3590,8 +3777,8 @@ term_get_tuple_arity(Tuple, MMod, MSt0) ->
         end,
     MSt2 = MMod:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
     MSt3 = MMod:move_array_element(MSt2, Reg, 0, Reg),
-    MSt4 = MMod:shift_right(MSt3, Reg, 6),
-    {MSt4, Reg}.
+    {MSt4, ArityReg} = MMod:shift_right(MSt3, {free, Reg}, 6),
+    {MSt4, ArityReg}.
 
 term_get_map_size(Map, MMod, MSt0) ->
     {MSt1, MapKeys} = term_get_map_keys(Map, MMod, MSt0),
@@ -3638,7 +3825,7 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) ->
                     {Reg, '<', ?REFC_BINARY_MIN_32},
                     fun(BSt0) ->
                         BSt1 = MMod:add(BSt0, Reg, 3),
-                        BSt2 = MMod:shift_right(BSt1, Reg, 2),
+                        {BSt2, Reg} = MMod:shift_right(BSt1, {free, Reg}, 2),
                         MMod:add(BSt2, Reg, 1 + ?BINARY_HEADER_SIZE)
                     end,
                     fun(BSt0) ->
@@ -3654,7 +3841,7 @@ term_binary_heap_size({free, Reg}, MMod, MSt0) ->
                     {Reg, '<', ?REFC_BINARY_MIN_64},
                     fun(BSt0) ->
                         BSt1 = MMod:add(BSt0, Reg, 7),
-                        BSt2 = MMod:shift_right(BSt1, Reg, 3),
+                        {BSt2, Reg} = MMod:shift_right(BSt1, {free, Reg}, 3),
                         MMod:add(BSt2, Reg, 1 + ?BINARY_HEADER_SIZE)
                     end,
                     fun(BSt0) ->
@@ -3702,9 +3889,16 @@ stream(MaxSize) ->
 backend_module() ->
     erlang:nif_error(undefined).
 
+%% @doc Get the JIT variant suitable for runtime compilation
+%% @return The JIT variant for this platform and float precision
+-spec variant() -> non_neg_integer().
+variant() ->
+    erlang:nif_error(undefined).
+
 %% @doc Instantiate backend for this platform
 %% @return A tuple with the backend module and the backend state for this platform
 backend({StreamModule, Stream}) ->
     BackendModule = ?MODULE:backend_module(),
-    BackendState = BackendModule:new(?JIT_VARIANT_PIC, StreamModule, Stream),
+    Variant = ?MODULE:variant(),
+    BackendState = BackendModule:new(Variant, StreamModule, Stream),
     {BackendModule, BackendState}.
diff --git a/libs/jit/src/jit_aarch64.erl b/libs/jit/src/jit_aarch64.erl
index 3449a0a997..9b35569f5d 100644
--- a/libs/jit/src/jit_aarch64.erl
+++ b/libs/jit/src/jit_aarch64.erl
@@ -37,6 +37,8 @@
     call_primitive_with_cp/3,
     return_if_not_equal_to_ctx/2,
     jump_to_label/2,
+    jump_to_continuation/2,
+    jump_to_offset/2,
     if_block/3,
     if_else_block/4,
     shift_right/3,
@@ -70,10 +72,26 @@
     add_label/3
 ]).
 
+-ifdef(JIT_DWARF).
+-export([
+    dwarf_opcode/2,
+    dwarf_label/2,
+    dwarf_function/3,
+    dwarf_line/2,
+    dwarf_ctx_register/0
+]).
+-endif.
+
+-compile([warnings_as_errors]).
+
 -include_lib("jit.hrl").
 
 -include("primitives.hrl").
 
+-ifdef(JIT_DWARF).
+-include("jit_dwarf.hrl").
+-endif.
+
 %-define(ASSERT(Expr), true = Expr).
 -define(ASSERT(_Expr), ok).
 
@@ -133,7 +151,8 @@
     branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
     available_regs :: [aarch64_register()],
     used_regs :: [aarch64_register()],
-    labels :: [{integer() | reference(), integer()}]
+    labels :: [{integer() | reference(), integer()}],
+    variant :: non_neg_integer()
 }).
 
 -type state() :: #state{}.
@@ -155,7 +174,8 @@
     | {'(int)', maybe_free_aarch64_register(), '!=', aarch64_register() | integer()}
     | {'(bool)', maybe_free_aarch64_register(), '==', false}
     | {'(bool)', maybe_free_aarch64_register(), '!=', false}
-    | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()}.
+    | {maybe_free_aarch64_register(), '&', non_neg_integer(), '!=', integer()}
+    | {{free, aarch64_register()}, '==', {free, aarch64_register()}}.
 
 % ctx->e is 0x28
 % ctx->x is 0x30
@@ -167,6 +187,13 @@
 -define(X_REG(N), {?CTX_REG, 16#30 + (N * ?WORD_SIZE)}).
 -define(CP, {?CTX_REG, 16#B8}).
 -define(FP_REGS, {?CTX_REG, 16#C0}).
+-define(FP_REG_OFFSET(State, F),
+    (F *
+        case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of
+            0 -> 8;
+            _ -> 4
+        end)
+).
 -define(BS, {?CTX_REG, 16#C8}).
 -define(BS_OFFSET, {?CTX_REG, 16#D0}).
 -define(JITSTATE_MODULE, {?JITSTATE_REG, 0}).
@@ -188,6 +215,8 @@
 -define(PARAMETER_REGS, [r0, r1, r2, r3, r4, r5]).
 -define(SCRATCH_REGS, [r7, r8, r9, r10, r11, r12, r13, r14, r15, r3, r4, r5, r6, r17]).
 
+-include("jit_backend_dwarf_impl.hrl").
+
 %%-----------------------------------------------------------------------------
 %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e.
 %% sizeof(uintptr_t)
@@ -216,7 +245,7 @@ word_size() -> ?WORD_SIZE.
 %% @return New backend state
 %%-----------------------------------------------------------------------------
 -spec new(any(), module(), stream()) -> state().
-new(_Variant, StreamModule, Stream) ->
+new(Variant, StreamModule, Stream) ->
     #state{
         stream_module = StreamModule,
         stream = Stream,
@@ -224,7 +253,8 @@ new(_Variant, StreamModule, Stream) ->
         offset = StreamModule:offset(Stream),
         available_regs = ?AVAILABLE_REGS,
         used_regs = [],
-        labels = []
+        labels = [],
+        variant = Variant
     }.
 
 %%-----------------------------------------------------------------------------
@@ -520,6 +550,47 @@ jump_to_label(
             State#state{stream = Stream1, branches = [Reloc | AccBranches]}
     end.
 
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    Rel = TargetOffset - Offset,
+    I1 = jit_aarch64_asm:b(Rel),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Jump to a continuation address stored in a register.
+%% This is used for optimized intra-module returns.
+%% @end
+%% @param State current backend state
+%% @param OffsetReg register containing the continuation offset
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_continuation(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        offset = BaseOffset,
+        available_regs = [TempReg | _]
+    } = State,
+    {free, OffsetReg}
+) ->
+    % Calculate absolute address: native_code_base + target_offset
+    % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset)
+    CurrentStreamOffset = StreamModule:offset(Stream0),
+    NetOffset = BaseOffset - CurrentStreamOffset,
+
+    % Get native code base address into temporary register
+    I1 = jit_aarch64_asm:adr(TempReg, NetOffset),
+    % Add target offset to get final absolute address
+    I2 = jit_aarch64_asm:add(TempReg, TempReg, OffsetReg),
+    % Indirect branch to the calculated absolute address
+    I3 = jit_aarch64_asm:br(TempReg),
+
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    % Free all registers since this is a tail jump
+    State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}.
+
 %% @private
 -spec rewrite_branch_instruction(
     jit_aarch64_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()}, integer()
@@ -783,6 +854,20 @@ if_block_cond(
     State1 = if_block_free_reg(RegOrTuple, State0),
     State2 = State1#state{stream = Stream1},
     {State2, ne, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {{free, Reg1}, '==', {free, Reg2}}
+) ->
+    % Compare two free registers
+    I1 = jit_aarch64_asm:cmp(Reg1, Reg2),
+    I2 = jit_aarch64_asm:bcc(ne, 0),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    % Free both registers
+    State1 = if_block_free_reg({free, Reg1}, State0),
+    State2 = if_block_free_reg({free, Reg2}, State1),
+    State3 = State2#state{stream = Stream1},
+    {State3, ne, byte_size(I1)};
 if_block_cond(
     #state{stream_module = StreamModule, stream = Stream0} = State0,
     {'(bool)', RegOrTuple, '==', false}
@@ -924,13 +1009,29 @@ merge_used_regs(State, []) ->
 %% @param Shift number of bits to shift
 %% @return new state
 %%-----------------------------------------------------------------------------
--spec shift_right(state(), aarch64_register(), non_neg_integer()) -> state().
-shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when
+-spec shift_right(#state{}, maybe_free_aarch64_register(), non_neg_integer()) ->
+    {#state{}, aarch64_register()}.
+shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when
     ?IS_GPR(Reg) andalso is_integer(Shift)
 ->
     I = jit_aarch64_asm:lsr(Reg, Reg, Shift),
     Stream1 = StreamModule:append(Stream0, I),
-    State#state{stream = Stream1}.
+    {State#state{stream = Stream1}, Reg};
+shift_right(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ResultReg | T],
+        used_regs = UR
+    } = State,
+    Reg,
+    Shift
+) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_aarch64_asm:lsr(ResultReg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
 
 %%-----------------------------------------------------------------------------
 %% @doc Emit a shift register left by a fixed number of bits, effectively
@@ -1273,7 +1374,7 @@ move_to_vm_register(
 ) ->
     I1 = jit_aarch64_asm:ldr(Reg, {Reg, ?WORD_SIZE}),
     I2 = jit_aarch64_asm:ldr(Temp, ?FP_REGS),
-    I3 = jit_aarch64_asm:str(Reg, {Temp, F * ?WORD_SIZE}),
+    I3 = jit_aarch64_asm:str(Reg, {Temp, ?FP_REG_OFFSET(State0, F)}),
     Code = <<I1/binary, I2/binary, I3/binary>>,
     Stream1 = StreamModule:append(Stream0, Code),
     State1 = free_native_register(State0, Reg),
@@ -1550,7 +1651,19 @@ move_to_array_element(
 %% @param Value value to move (can be an immediate, vm register, pointer, or native register)
 %% @return Tuple of {Updated backend state, Native register containing the value}
 %%-----------------------------------------------------------------------------
--spec move_to_native_register(state(), value()) -> {state(), aarch64_register()}.
+-spec move_to_native_register(state(), value() | cp) -> {state(), aarch64_register()}.
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    cp
+) ->
+    I1 = jit_aarch64_asm:ldr(Reg, ?CP),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
 move_to_native_register(State, Reg) when is_atom(Reg) ->
     {State, Reg};
 move_to_native_register(
@@ -2230,3 +2343,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label)
 -spec add_label(state(), integer() | reference(), integer()) -> state().
 add_label(#state{labels = Labels} = State, Label, Offset) ->
     State#state{labels = [{Label, Offset} | Labels]}.
+
+-ifdef(JIT_DWARF).
+%%-----------------------------------------------------------------------------
+%% @doc Return the DWARF register number for the ctx parameter
+%% @returns The DWARF register number where ctx is passed (x0/r0 in aarch64)
+%% @end
+%%-----------------------------------------------------------------------------
+-spec dwarf_ctx_register() -> non_neg_integer().
+dwarf_ctx_register() ->
+    ?DWARF_X0_REG_AARCH64.
+-endif.
diff --git a/libs/jit/src/jit_armv6m.erl b/libs/jit/src/jit_armv6m.erl
new file mode 100644
index 0000000000..7343473b18
--- /dev/null
+++ b/libs/jit/src/jit_armv6m.erl
@@ -0,0 +1,3178 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_armv6m).
+
+-export([
+    word_size/0,
+    new/3,
+    stream/1,
+    offset/1,
+    debugger/1,
+    used_regs/1,
+    available_regs/1,
+    free_native_registers/2,
+    assert_all_native_free/1,
+    jump_table/2,
+    update_branches/1,
+    call_primitive/3,
+    call_primitive_last/3,
+    call_primitive_with_cp/3,
+    return_if_not_equal_to_ctx/2,
+    jump_to_label/2,
+    jump_to_continuation/2,
+    jump_to_offset/2,
+    if_block/3,
+    if_else_block/4,
+    shift_right/3,
+    shift_left/3,
+    move_to_vm_register/3,
+    move_to_native_register/2,
+    move_to_native_register/3,
+    move_to_cp/2,
+    move_array_element/4,
+    move_to_array_element/4,
+    move_to_array_element/5,
+    set_bs/2,
+    copy_to_native_register/2,
+    get_array_element/3,
+    increment_sp/2,
+    set_continuation_to_label/2,
+    set_continuation_to_offset/1,
+    continuation_entry_point/1,
+    get_module_index/1,
+    and_/3,
+    or_/3,
+    add/3,
+    sub/3,
+    mul/3,
+    decrement_reductions_and_maybe_schedule_next/1,
+    call_or_schedule_next/2,
+    call_only_or_schedule_next/2,
+    call_func_ptr/3,
+    return_labels_and_lines/2,
+    add_label/2,
+    add_label/3
+]).
+
+-ifdef(JIT_DWARF).
+-export([
+    dwarf_opcode/2,
+    dwarf_label/2,
+    dwarf_function/3,
+    dwarf_line/2,
+    dwarf_ctx_register/0
+]).
+-endif.
+
+-compile([warnings_as_errors]).
+
+-include_lib("jit.hrl").
+
+-include("primitives.hrl").
+
+-ifdef(JIT_DWARF).
+-include("jit_dwarf.hrl").
+-endif.
+
+-define(ASSERT(Expr), true = Expr).
+
+%% ARMv6-M AAPCS32 ABI: r0-r3 are used for argument passing and return value.
+%% r0-r1 form a double-word for 64-bit returns, additional args passed on stack.
+%% r4-r11 are callee-saved registers (must be preserved across calls),
+%% r12 (IP) is intra-procedure-call scratch register,
+%% r13 (SP) is stack pointer,
+%% r14 (LR) is link register,
+%% r15 (PC) is program counter.
+%% ARMv6-M has no floating-point unit, so no FP registers available.
+%%
+%% See: Arm® Architecture Procedure Call Standard (AAPCS32)
+%% https://developer.arm.com/documentation/ihi0042/latest/
+%%
+%% Registers used by the JIT backend (ARMv6-M Thumb):
+%%   - Argument/return: r0-r3
+%%   - Callee-saved: r4-r11 (must preserve)
+%%   - Scratch: r12 (IP) - intra-procedure call
+%%   - Stack pointer: r13 (SP)
+%%   - Link register: r14 (LR)
+%%   - Program counter: r15 (PC)
+%%   - Available for JIT scratch: r12 only (very limited!)
+%%
+%% Note: ARMv6-M Thumb instructions are mostly 16-bit with limited
+%% register access (many instructions only work with r0-r7).
+%%
+%% For more details, refer to the AAPCS32 Procedure Call Standard.
+
+-type armv6m_register() ::
+    r0
+    | r1
+    | r2
+    | r3
+    | r4
+    | r5
+    | r6
+    | r7
+    | r8
+    | r9
+    | r10
+    | r11
+    | r12
+    | r13
+    | r14
+    | r15.
+
+-define(IS_GPR(Reg),
+    (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse
+        Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7 orelse Reg =:= r8 orelse Reg =:= r9 orelse
+        Reg =:= r10 orelse Reg =:= r11 orelse Reg =:= r12 orelse Reg =:= r13 orelse Reg =:= r14 orelse
+        Reg =:= r15)
+).
+
+-type stream() :: any().
+
+-record(state, {
+    stream_module :: module(),
+    stream :: stream(),
+    offset :: non_neg_integer(),
+    branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
+    available_regs :: [armv6m_register()],
+    used_regs :: [armv6m_register()],
+    labels :: [{integer() | reference(), integer()}],
+    variant :: non_neg_integer(),
+    literal_pool :: [{non_neg_integer(), armv6m_register(), non_neg_integer()}]
+}).
+
+-type state() :: #state{}.
+-type immediate() :: non_neg_integer().
+-type vm_register() ::
+    {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, armv6m_register()}.
+-type value() :: immediate() | vm_register() | armv6m_register() | {ptr, armv6m_register()}.
+-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}.
+
+-type maybe_free_armv6m_register() ::
+    {free, armv6m_register()} | armv6m_register().
+
+-type condition() ::
+    {armv6m_register(), '<', integer()}
+    | {maybe_free_armv6m_register(), '<', armv6m_register()}
+    | {maybe_free_armv6m_register(), '==', integer()}
+    | {maybe_free_armv6m_register(), '!=', armv6m_register() | integer()}
+    | {'(int)', maybe_free_armv6m_register(), '==', integer()}
+    | {'(int)', maybe_free_armv6m_register(), '!=', armv6m_register() | integer()}
+    | {'(bool)', maybe_free_armv6m_register(), '==', false}
+    | {'(bool)', maybe_free_armv6m_register(), '!=', false}
+    | {maybe_free_armv6m_register(), '&', non_neg_integer(), '!=', integer()}
+    | {{free, armv6m_register()}, '==', {free, armv6m_register()}}.
+
+% ctx->e is 0x28
+% ctx->x is 0x30
+-define(CTX_REG, r0).
+-define(NATIVE_INTERFACE_REG, r2).
+-define(Y_REGS, {?CTX_REG, 16#14}).
+-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}).
+-define(CP, {?CTX_REG, 16#5C}).
+-define(FP_REGS, {?CTX_REG, 16#60}).
+-define(BS, {?CTX_REG, 16#64}).
+-define(BS_OFFSET, {?CTX_REG, 16#68}).
+% JITSTATE is on stack, accessed via stack offset
+% These macros now expect a register that contains the jit_state pointer
+-define(JITSTATE_MODULE(Reg), {Reg, 0}).
+-define(JITSTATE_CONTINUATION(Reg), {Reg, 16#4}).
+-define(JITSTATE_REDUCTIONCOUNT(Reg), {Reg, 16#8}).
+-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}).
+-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}).
+
+-define(JUMP_TABLE_ENTRY_SIZE, 12).
+
+% aarch64 ABI specific
+%% ARMv6-M register mappings
+
+%% IP can be used as an additional scratch register
+-define(IP_REG, r12).
+
+%% Stack offset for function prolog: push {r1,r4,r5,r6,r7,lr}
+%% r1 (JITSTATE_REG) is at SP+0 after push
+-define(STACK_OFFSET_JITSTATE, 0).
+
+-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127).
+-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000).
+-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255).
+-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000).
+-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X),
+    is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000
+).
+
+%% ARMv6-M register allocation:
+%% - r0: context pointer (reserved)
+%% - r1, r3: available (r1 saved/restored, r3 can be parameter)
+%% - r2: parameter register (not available for scratch)
+%% - r4-r7: callee-saved (saved/restored on entry/exit)
+%% - r8-r11: high registers, limited Thumb access
+%% - r12: intra-procedure call scratch
+%% - r13 (SP), r14 (LR), r15 (PC): special purpose
+%% Reorder to match AArch64 test expectations (r7 first)
+-define(AVAILABLE_REGS, [r7, r6, r5, r4, r3, r1]).
+-define(PARAMETER_REGS, [r0, r1, r2, r3]).
+-define(SCRATCH_REGS, [r7, r6, r5, r4, r3, r2, r1, r0, r12]).
+
+-include("jit_backend_dwarf_impl.hrl").
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e.
+%% sizeof(uintptr_t)
+%%
+%% C code equivalent is:
+%% #if UINTPTR_MAX == UINT32_MAX
+%%    #define TERM_BYTES 4
+%% #elif UINTPTR_MAX == UINT64_MAX
+%%    #define TERM_BYTES 8
+%% #else
+%%    #error "Term size must be either 32 bit or 64 bit."
+%% #endif
+%%
+%% @end
+%% @return Word size in bytes
+%%-----------------------------------------------------------------------------
+-spec word_size() -> 4 | 8.
+word_size() -> 4.
+
+%%-----------------------------------------------------------------------------
+%% @doc Create a new backend state for provided variant, module and stream.
+%% @end
+%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC)
+%% @param StreamModule module to stream instructions
+%% @param Stream stream state
+%% @return New backend state
+%%-----------------------------------------------------------------------------
+-spec new(any(), module(), stream()) -> state().
+new(Variant, StreamModule, Stream) ->
+    #state{
+        stream_module = StreamModule,
+        stream = Stream,
+        branches = [],
+        offset = StreamModule:offset(Stream),
+        available_regs = ?AVAILABLE_REGS,
+        used_regs = [],
+        labels = [],
+        variant = Variant,
+        literal_pool = []
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Access the stream object.
+%% @end
+%% @param State current backend state
+%% @return The stream object
+%%-----------------------------------------------------------------------------
+-spec stream(state()) -> stream().
+stream(#state{stream = Stream}) ->
+    Stream.
+
+%%-----------------------------------------------------------------------------
+%% @doc Get the current offset in the stream
+%% @end
+%% @param State current backend state
+%% @return The current offset
+%%-----------------------------------------------------------------------------
+-spec offset(state()) -> non_neg_integer().
+offset(#state{stream_module = StreamModule, stream = Stream}) ->
+    StreamModule:offset(Stream).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a debugger of breakpoint instruction. This is used for debugging
+%% and not in production.
+%% @end
+%% @param State current backend state
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec debugger(state()) -> state().
+debugger(#state{stream_module = StreamModule, stream = Stream0} = State) ->
+    Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:bkpt(0)),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently used native registers. This is used for
+%% debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of used registers
+%%-----------------------------------------------------------------------------
+-spec used_regs(state()) -> [armv6m_register()].
+used_regs(#state{used_regs = Used}) -> Used.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently available native scratch registers. This
+%% is used for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of available registers
+%%-----------------------------------------------------------------------------
+-spec available_regs(state()) -> [armv6m_register()].
+available_regs(#state{available_regs = Available}) -> Available.
+
+%%-----------------------------------------------------------------------------
+%% @doc Free native registers. The passed list of registers can contain
+%% registers, pointer to registers or other values that are ignored.
+%% @end
+%% @param State current backend state
+%% @param Regs list of registers or other values
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec free_native_registers(state(), [value()]) -> state().
+free_native_registers(State, []) ->
+    State;
+free_native_registers(State, [Reg | Rest]) ->
+    State1 = free_native_register(State, Reg),
+    free_native_registers(State1, Rest).
+
+-spec free_native_register(state(), value()) -> state().
+free_native_register(
+    #state{available_regs = Available0, used_regs = Used0} = State,
+    Reg
+) when
+    is_atom(Reg)
+->
+    {Available1, Used1} = free_reg(Available0, Used0, Reg),
+    State#state{available_regs = Available1, used_regs = Used1};
+free_native_register(State, {ptr, Reg}) ->
+    free_native_register(State, Reg);
+free_native_register(State, _Other) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Assert that all native scratch registers are available. This is used
+%% for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return ok
+%%-----------------------------------------------------------------------------
+-spec assert_all_native_free(state()) -> ok.
+assert_all_native_free(#state{
+    available_regs = ?AVAILABLE_REGS, used_regs = []
+}) ->
+    ok.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit the jump table at the beginning of the module. Branches will be
+%% updated afterwards with update_branches/2. Emit branches for labels from
+%% 0 (special entry for lines and labels information) to LabelsCount included
+%% (special entry for OP_INT_CALL_END).
+%%
+%% On this platform, each jump table entry is 12 bytes.
+%% ```
+%% ldr r3, pc+4
+%% push {r1, r4, r5, r6, r7, lr}
+%% add pc, pc, r3
+%% nop()
+%% offset_to_label0
+%% ```
+%%
+%% @end
+%% @param State current backend state
+%% @param LabelsCount number of labels in the module.
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec jump_table(state(), pos_integer()) -> state().
+jump_table(State, LabelsCount) ->
+    jump_table0(State, 0, LabelsCount).
+
+jump_table0(State, N, LabelsCount) when N > LabelsCount ->
+    State;
+jump_table0(
+    #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State,
+    N,
+    LabelsCount
+) ->
+    % Create jump table entry with calculated offsets - all at emit time
+    I1 = jit_armv6m_asm:ldr(r3, {pc, 4}),
+    I2 = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]),
+    I3 = jit_armv6m_asm:add(pc, r3),
+    I4 = jit_armv6m_asm:nop(),
+
+    JumpEntry = <<I1/binary, I2/binary, I3/binary, I4/binary, 0:32>>,
+    Stream1 = StreamModule:append(Stream0, JumpEntry),
+
+    % Add relocation for the data entry so update_branches/2 can patch the jump target
+    DataOffset = StreamModule:offset(Stream1) - 4,
+    % Calculate the offset of the add instruction (3rd instruction, at offset 4 from entry start)
+    EntryStartOffset = StreamModule:offset(Stream1) - 12,
+    AddInstrOffset = EntryStartOffset + 4,
+    DataReloc = {N, DataOffset, {jump_table_data, AddInstrOffset}},
+    UpdatedState = State#state{stream = Stream1, branches = [DataReloc | Branches]},
+
+    jump_table0(UpdatedState, N + 1, LabelsCount).
+
+%%-----------------------------------------------------------------------------
+%% @doc Rewrite stream to update all branches for labels.
+%% @end
+%% @param State current backend state
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec update_branches(state()) -> state().
+update_branches(#state{branches = []} = State) ->
+    State;
+update_branches(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        branches = [{Label, Offset, Type} | BranchesT],
+        labels = Labels
+    } = State
+) ->
+    {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
+    Rel = LabelOffset - Offset,
+    NewInstr =
+        case Type of
+            {adr, Reg} when Rel rem 4 =:= 0 -> jit_armv6m_asm:adr(Reg, Rel);
+            {adr, Reg} when Rel rem 4 =:= 2 -> jit_armv6m_asm:adr(Reg, Rel + 2);
+            {far_branch, Size, TempReg} ->
+                % Check if branch can now be optimized to near branch
+                if
+                    Rel >= -2044 andalso Rel =< 2050 andalso (Rel rem 2) =:= 0 ->
+                        % Optimize to near branch: b + nops to fill original size
+                        DirectBranch = jit_armv6m_asm:b(Rel),
+                        % Fill remaining bytes with NOPs
+                        NopCount = (Size - 2) div 2,
+                        Nops = <<<<(jit_armv6m_asm:nop())/binary>> || _ <- lists:seq(1, NopCount)>>,
+                        <<DirectBranch/binary, Nops/binary>>;
+                    true ->
+                        % Keep far branch sequence, calculate correct ldr immediate and update literal
+
+                        % Set thumb bit for bx instruction - target address must be odd for Thumb mode
+                        % So we substract 1 less
+                        % ldr requires align PC
+                        % add rx, pc doesn't and reads pc+4 whatever the alignment
+
+                        case {TempReg, Size} of
+                            {?IP_REG, 18} ->
+                                % 18-byte sequence with alignment
+                                % Unaligned
+                                I1 = jit_armv6m_asm:push([r0]),
+                                % Aligned
+                                I2 = jit_armv6m_asm:ldr(r0, {pc, 8}),
+                                I3 = jit_armv6m_asm:mov(?IP_REG, r0),
+                                I4 = jit_armv6m_asm:pop([r0]),
+                                I5 = jit_armv6m_asm:add(?IP_REG, pc),
+                                I6 = jit_armv6m_asm:bx(?IP_REG),
+                                I7 = jit_armv6m_asm:nop(),
+                                RelativeOffset = LabelOffset - Offset - 11,
+                                I8 = <<RelativeOffset:32/little>>,
+                                <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary, I6/binary,
+                                    I7/binary, I8/binary>>;
+                            {?IP_REG, 16} ->
+                                % 16-byte sequence without alignment
+                                % Aligned
+                                I1 = jit_armv6m_asm:push([r0]),
+                                % Unaligned
+                                I2 = jit_armv6m_asm:ldr(r0, {pc, 8}),
+                                I3 = jit_armv6m_asm:mov(?IP_REG, r0),
+                                I4 = jit_armv6m_asm:pop([r0]),
+                                I5 = jit_armv6m_asm:add(?IP_REG, pc),
+                                I6 = jit_armv6m_asm:bx(?IP_REG),
+                                RelativeOffset = LabelOffset - Offset - 11,
+                                I7 = <<RelativeOffset:32/little>>,
+                                <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary, I6/binary,
+                                    I7/binary>>;
+                            {_, 12} ->
+                                % 12-byte sequence with alignment
+                                % Aligned
+                                I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
+                                I2 = jit_armv6m_asm:add(TempReg, pc),
+                                I3 = jit_armv6m_asm:bx(TempReg),
+                                I4 = jit_armv6m_asm:nop(),
+                                RelativeOffset = LabelOffset - Offset - 5,
+                                I5 = <<RelativeOffset:32/little>>,
+                                <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>;
+                            {_, 10} ->
+                                % 10-byte sequence without alignment
+                                % Unaligned
+                                I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
+                                I2 = jit_armv6m_asm:add(TempReg, pc),
+                                I3 = jit_armv6m_asm:bx(TempReg),
+                                RelativeOffset = LabelOffset - Offset - 5,
+                                I4 = <<RelativeOffset:32/little>>,
+                                <<I1/binary, I2/binary, I3/binary, I4/binary>>
+                        end
+                end;
+            {jump_table_data, AddInstrOffset} ->
+                % Calculate offset from 'add pc, pc, r3' instruction + 4 to target label
+                % PC when add instruction executes
+                AddPC = AddInstrOffset + 4,
+                RelativeOffset = LabelOffset - AddPC,
+                <<RelativeOffset:32/little>>
+        end,
+    Stream1 = StreamModule:replace(Stream0, Offset, NewInstr),
+    update_branches(State#state{stream = Stream1, branches = BranchesT}).
+
+%%-----------------------------------------------------------------------------
+%% @doc Generate code to load a primitive function pointer into a register
+%% @param Primitive index to the primitive to call
+%% @param TargetReg register to load the function pointer into
+%% @return Binary instruction sequence
+%%-----------------------------------------------------------------------------
+-spec load_primitive_ptr(non_neg_integer(), armv6m_register()) -> binary().
+load_primitive_ptr(Primitive, TargetReg) ->
+    case Primitive of
+        0 ->
+            jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, 0});
+        N when N * 4 =< 124 ->
+            jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, N * 4});
+        N when N * 4 < 256 ->
+            % Can encode N * 4 directly in movs instruction (8-bit immediate limit)
+            I1 = jit_armv6m_asm:movs(TargetReg, N * 4),
+            I2 = jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, TargetReg}),
+            <<I1/binary, I2/binary>>;
+        N ->
+            % For very large primitive numbers, load N and shift left by 2 (multiply by 4)
+            I1 = jit_armv6m_asm:movs(TargetReg, N),
+            I2 = jit_armv6m_asm:lsls(TargetReg, TargetReg, 2),
+            I3 = jit_armv6m_asm:ldr(TargetReg, {?NATIVE_INTERFACE_REG, TargetReg}),
+            <<I1/binary, I2/binary, I3/binary>>
+    end.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call (call with return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention. It also saves scratch registers we need to preserve.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), armv6m_register()}.
+call_primitive(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [TempReg | RestRegs],
+        used_regs = UsedRegs
+    } = State,
+    Primitive,
+    Args
+) ->
+    % Use a low register for LDR since ARM Thumb LDR only works with low registers
+    PrepCall = load_primitive_ptr(Primitive, TempReg),
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+    StateCall = State#state{
+        stream = Stream1,
+        available_regs = RestRegs,
+        used_regs = [TempReg | UsedRegs]
+    },
+    call_func_ptr(StateCall, {free, TempReg}, Args);
+call_primitive(
+    #state{available_regs = []} = State,
+    Primitive,
+    Args
+) ->
+    call_func_ptr(State, {primitive, Primitive}, Args).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump (call without return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+call_primitive_last(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    Primitive,
+    Args
+) ->
+    % We need a register for the function pointer that should not be used as a parameter
+    % Since we're not returning, we can use all scratch registers except
+    % registers used for parameters
+    ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)),
+    ArgsRegs = args_regs(Args),
+    ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs,
+    [Temp | AvailableRegs1] = ScratchRegs,
+    UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1,
+    PrepCall = load_primitive_ptr(Primitive, Temp),
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+
+    State1 = State0#state{
+        stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs
+    },
+
+    % Preprocess offset special arg
+    Args1 = lists:map(
+        fun(Arg) ->
+            case Arg of
+                offset -> StreamModule:offset(Stream1);
+                _ -> Arg
+            end
+        end,
+        Args
+    ),
+
+    % Handle arguments differently for 5+ arguments - use direct call without register preservation
+    State4 =
+        case Args1 of
+            [Arg1, Arg2, Arg3, Arg4, Arg5 | Arg6L] ->
+                State2 =
+                    case Arg6L of
+                        [Arg6] ->
+                            set_stack_args(State1, Arg5, Arg6);
+                        [] ->
+                            set_stack_args(State1, Arg5, undefined)
+                    end,
+                State3 = set_registers_args(State2, [Arg1, Arg2, Arg3, Arg4], 8),
+                #state{stream = Stream2} = State3,
+                % Call the function pointer directly
+                Call = jit_armv6m_asm:blx(Temp),
+                Stream3 = StreamModule:append(Stream2, Call),
+                % Deallocate stack space that was allocated for 5+ arguments
+                DeallocateArgs = jit_armv6m_asm:add(sp, sp, 8),
+                Stream4 = StreamModule:append(Stream3, DeallocateArgs),
+                % Return: pop prolog registers and return
+                PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]),
+                Stream5 = StreamModule:append(Stream4, PopCode),
+                State3#state{stream = Stream5};
+            [FirstArg, jit_state | ArgsT] ->
+                % For 4 or fewer args, use tail call
+                ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT],
+                State2 = set_registers_args(State1, ArgsForTailCall, 0),
+                tail_call_with_jit_state_registers_only(State2, Temp)
+        end,
+    State5 = State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []},
+    flush_literal_pool(State5).
+
+%%-----------------------------------------------------------------------------
+%% @doc Tail call to address in register, restoring prolog registers including
+%% jit_state in r1. Only use when target function expects jit_state as second parameter.
+%% Function prolog saves: push {r1,r4,r5,r6,r7,lr}
+%% @end
+%% @param State current backend state
+%% @param Reg register containing the target address
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+tail_call_with_jit_state_registers_only(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    Reg
+) ->
+    % Standard tail call for 4 or fewer arguments
+    % First restore LR from stack (so target function can return properly)
+    % Choose temp register to avoid conflict with Reg
+    TempReg =
+        case Reg of
+            r7 -> r6;
+            _ -> r7
+        end,
+    % Load saved LR to temp
+    RestoreLRToTemp = jit_armv6m_asm:ldr(TempReg, {sp, 20}),
+    % Store function pointer (pipeline friendly)
+    OverwriteLR = jit_armv6m_asm:str(Reg, {sp, 20}),
+    % Move saved LR to LR register
+    RestoreLR = jit_armv6m_asm:mov(lr, TempReg),
+    % Pop prolog registers: {r1,r4,r5,r6,r7,lr} where lr is now target address
+    % This restores jit_state in r1 and branches to target via pc
+    PopCode = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]),
+
+    Code = <<RestoreLRToTemp/binary, OverwriteLR/binary, RestoreLR/binary, PopCode/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a return of a value if it's not equal to ctx.
+%% This logic is used to break out to the scheduler, typically after signal
+%% messages have been processed.
+%% @end
+%% @param State current backend state
+%% @param Reg register to compare to (should be {free, Reg} as it's always freed)
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+return_if_not_equal_to_ctx(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    {free, Reg}
+) ->
+    I1 = jit_armv6m_asm:cmp(Reg, ?CTX_REG),
+    I3 =
+        case Reg of
+            % Return value is already in r0
+            r0 -> <<>>;
+            % Move to r0 (return register)
+            _ -> jit_armv6m_asm:mov(r0, Reg)
+        end,
+    I4 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]),
+    I2 = jit_armv6m_asm:bcc(eq, 2 + byte_size(I3) + byte_size(I4)),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, Reg
+    ),
+    State#state{
+        stream = Stream1,
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump to a label. The offset of the relocation is saved and will
+%% be updated with `update_branches/2`.
+%% @end
+%% @param State current backend state
+%% @param Label to jump to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_label(
+    #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label
+) ->
+    LabelLookupResult = lists:keyfind(Label, 1, Labels),
+    Offset = StreamModule:offset(Stream0),
+    {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult),
+    Stream1 = StreamModule:append(Stream0, CodeBlock),
+    State2 = State1#state{stream = Stream1},
+    flush_literal_pool(State2).
+
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    CodeBlock = branch_to_offset_code(State, Offset, TargetOffset),
+    Stream1 = StreamModule:append(Stream0, CodeBlock),
+    State2 = State#state{stream = Stream1},
+    flush_literal_pool(State2).
+
+%%-----------------------------------------------------------------------------
+%% @doc Jump to address in continuation pointer register
+%% The continuation points to a function prologue, so we need to compute
+%% the target address using PIC and use function epilogue to jump.
+%% @end
+%% @param State current backend state
+%% @param {free, OffsetReg} register containing the offset value
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_continuation(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        offset = BaseOffset
+    } = State0,
+    {free, OffsetReg}
+) ->
+    % ARM v6-M PIC implementation using one temp register:
+    % 1. Use ADR to get PC into temp register
+    % 2. Add PC to OffsetReg to get intermediate value
+    % 3. Load base offset immediate into temp
+    % 4. Add base offset to get final target address
+    % 5. Use function epilogue pattern to jump
+
+    AdrOffset = StreamModule:offset(Stream0),
+    % ADR Temp, +4 stores PC+4 in Temp
+    I1 = jit_armv6m_asm:adr(Temp, 4),
+
+    % Add PC to OffsetReg: OffsetReg = OffsetReg + PC
+    I2 = jit_armv6m_asm:adds(OffsetReg, OffsetReg, Temp),
+
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+
+    % PC is aligned down to 4-byte boundary
+    AdrPC = (AdrOffset + 4) band (bnot 3),
+
+    % Calculate what we need to add: BaseOffset - AdrPC + 1 for thumb bit
+    ImmediateValue = BaseOffset - AdrPC + 1,
+
+    % Generate mov_immediate to load the calculated base offset into Temp
+    State1 = mov_immediate(State0#state{stream = Stream1}, Temp, ImmediateValue),
+
+    % Add base offset to get final target address: OffsetReg = OffsetReg + BaseOffset
+    I3 = jit_armv6m_asm:adds(OffsetReg, OffsetReg, Temp),
+
+    % Function epilogue pattern:
+    % Load saved LR to temp register (LR is at sp+20)
+    I4 = jit_armv6m_asm:ldr(Temp, {sp, 20}),
+    % Store target address to LR position on stack
+    I5 = jit_armv6m_asm:str(OffsetReg, {sp, 20}),
+    % Move saved LR to LR register
+    I6 = jit_armv6m_asm:mov(lr, Temp),
+    % Pop prolog registers: {r1,r4,r5,r6,r7,lr} where lr is now target address
+    % This restores jit_state in r1 and branches to target via pc
+    I7 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]),
+
+    Code = <<I3/binary, I4/binary, I5/binary, I6/binary, I7/binary>>,
+    Stream2 = StreamModule:append(State1#state.stream, Code),
+    % Free all registers as this is a terminal instruction
+    State2 = State1#state{stream = Stream2, available_regs = ?AVAILABLE_REGS, used_regs = []},
+    flush_literal_pool(State2).
+
+branch_to_offset_code(_State, Offset, TargetOffset) when
+    TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044
+->
+    % Near branch: use direct B instruction
+    Rel = TargetOffset - Offset,
+    jit_armv6m_asm:b(Rel);
+branch_to_offset_code(
+    #state{available_regs = [TempReg | _]}, Offset, TargetOffset
+) ->
+    % Far branch: use register-based sequence, need temporary register
+    if
+        Offset rem 4 =:= 0 ->
+            % Aligned
+            I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
+            I2 = jit_armv6m_asm:add(TempReg, pc),
+            I3 = jit_armv6m_asm:bx(TempReg),
+            % Unaligned : need nop
+            I4 = jit_armv6m_asm:nop(),
+            LiteralValue = TargetOffset - Offset - 5,
+            I5 = <<LiteralValue:32/little>>,
+            <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>;
+        true ->
+            % Unaligned
+            I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
+            I2 = jit_armv6m_asm:add(TempReg, pc),
+            I3 = jit_armv6m_asm:bx(TempReg),
+            LiteralValue = TargetOffset - Offset - 5,
+            I4 = <<LiteralValue:32/little>>,
+            <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    end.
+
+branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) ->
+    CodeBlock = branch_to_offset_code(State, Offset, LabelOffset),
+    {State, CodeBlock};
+branch_to_label_code(
+    #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false
+) ->
+    {CodeBlock, SequenceSize} =
+        if
+            Offset rem 4 =:= 0 ->
+                % Aligned
+                I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
+                I2 = jit_armv6m_asm:add(TempReg, pc),
+                I3 = jit_armv6m_asm:bx(TempReg),
+                % Unaligned : need nop
+                I4 = jit_armv6m_asm:nop(),
+                % Placeholder offset
+                I5 = <<0:32/little>>,
+                Seq = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>,
+                {Seq, byte_size(Seq)};
+            true ->
+                % Unaligned
+                I1 = jit_armv6m_asm:ldr(TempReg, {pc, 4}),
+                I2 = jit_armv6m_asm:add(TempReg, pc),
+                I3 = jit_armv6m_asm:bx(TempReg),
+                % Placeholder offset
+                I4 = <<0:32/little>>,
+                Seq = <<I1/binary, I2/binary, I3/binary, I4/binary>>,
+                {Seq, byte_size(Seq)}
+        end,
+    % Add relocation entry
+    Reloc = {Label, Offset, {far_branch, SequenceSize, TempReg}},
+    State1 = State0#state{branches = [Reloc | Branches]},
+    {State1, CodeBlock};
+branch_to_label_code(
+    #state{available_regs = [], branches = Branches} = State0, Offset, Label, false
+) ->
+    {CodeBlock, SequenceSize} =
+        if
+            Offset rem 4 =/= 0 ->
+                % Unaligned
+                I1 = jit_armv6m_asm:push([r0]),
+                % Aligned
+                I2 = jit_armv6m_asm:ldr(r0, {pc, 8}),
+                I3 = jit_armv6m_asm:mov(?IP_REG, r0),
+                I4 = jit_armv6m_asm:pop([r0]),
+                I5 = jit_armv6m_asm:add(?IP_REG, pc),
+                I6 = jit_armv6m_asm:bx(?IP_REG),
+                % Unaligned : need nop
+                I7 = jit_armv6m_asm:nop(),
+                % Placeholder offset
+                I8 = <<0:32/little>>,
+                Seq =
+                    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary, I6/binary, I7/binary,
+                        I8/binary>>,
+                {Seq, byte_size(Seq)};
+            true ->
+                % Aligned
+                I1 = jit_armv6m_asm:push([r0]),
+                % Unaligned
+                I2 = jit_armv6m_asm:ldr(r0, {pc, 8}),
+                I3 = jit_armv6m_asm:mov(?IP_REG, r0),
+                I4 = jit_armv6m_asm:pop([r0]),
+                I5 = jit_armv6m_asm:add(?IP_REG, pc),
+                I6 = jit_armv6m_asm:bx(?IP_REG),
+                % Placeholder offset
+                I7 = <<0:32/little>>,
+                Seq =
+                    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary, I6/binary, I7/binary>>,
+                {Seq, byte_size(Seq)}
+        end,
+    % Add relocation entry
+    Reloc = {Label, Offset, {far_branch, SequenceSize, ?IP_REG}},
+    State1 = State0#state{branches = [Reloc | Branches]},
+    {State1, CodeBlock};
+branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) ->
+    error({no_available_registers, _LabelLookup}).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally
+%% execute a block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockFn function to emit the block that may be executed
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state().
+if_block(
+    #state{stream_module = StreamModule} = State0,
+    {'and', CondList},
+    BlockFn
+) ->
+    {Replacements, State1} = lists:foldl(
+        fun(Cond, {AccReplacements, AccState}) ->
+            Offset = StreamModule:offset(AccState#state.stream),
+            {NewAccState, CC, ReplaceDelta} = if_block_cond(AccState, Cond),
+            {[{Offset + ReplaceDelta, CC} | AccReplacements], NewAccState}
+        end,
+        {[], State0},
+        CondList
+    ),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    Stream3 = lists:foldl(
+        fun({ReplacementOffset, CC}, AccStream) ->
+            BranchOffset = OffsetAfter - ReplacementOffset,
+            NewBranchInstr = jit_armv6m_asm:bcc(CC, BranchOffset),
+            StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr)
+        end,
+        Stream2,
+        Replacements
+    ),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs);
+if_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    %% Patch the conditional branch instruction to jump to the end of the block
+    BranchOffset = OffsetAfter - (Offset + BranchInstrOffset),
+    NewBranchInstr = jit_armv6m_asm:bcc(CC, BranchOffset),
+    Stream3 = StreamModule:replace(Stream2, Offset + BranchInstrOffset, NewBranchInstr),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if else block, i.e. emit a test of a condition and
+%% conditionnally execute a block or another block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockTrueFn function to emit the block that is executed if condition is true
+%% @param BlockFalseFn function to emit the block that is executed if condition is false
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) ->
+    state().
+if_else_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockTrueFn,
+    BlockFalseFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, CC, BranchInstrOffset} = if_block_cond(State0, Cond),
+    State2 = BlockTrueFn(State1),
+    Stream2 = State2#state.stream,
+    %% Emit unconditional branch to skip the else block (will be replaced)
+    ElseJumpOffset = StreamModule:offset(Stream2),
+    ElseJumpInstr = jit_armv6m_asm:b(0),
+    Stream3 = StreamModule:append(Stream2, ElseJumpInstr),
+    %% Else block starts here.
+    OffsetAfter = StreamModule:offset(Stream3),
+    %% Patch the conditional branch to jump to the else block
+    ElseBranchOffset = OffsetAfter - (Offset + BranchInstrOffset),
+    NewBranchInstr = jit_armv6m_asm:bcc(CC, ElseBranchOffset),
+    Stream4 = StreamModule:replace(Stream3, Offset + BranchInstrOffset, NewBranchInstr),
+    %% Build the else block
+    StateElse = State2#state{
+        stream = Stream4,
+        used_regs = State1#state.used_regs,
+        available_regs = State1#state.available_regs
+    },
+    State3 = BlockFalseFn(StateElse),
+    Stream5 = State3#state.stream,
+    OffsetFinal = StreamModule:offset(Stream5),
+    %% Patch the unconditional branch to jump to the end
+    FinalJumpOffset = OffsetFinal - ElseJumpOffset,
+    NewElseJumpInstr = jit_armv6m_asm:b(FinalJumpOffset),
+    Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr),
+    merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs).
+
+-spec if_block_cond(state(), condition()) ->
+    {
+        state(),
+        jit_armv6m_asm:cc() | {tbz | tbnz, atom(), 0..63} | {cbz, atom()},
+        non_neg_integer()
+    }.
+if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) ->
+    %% Compare register with 0
+    I1 = jit_armv6m_asm:cmp(Reg, 0),
+    %% Branch if positive (N flag clear)
+    I2 = jit_armv6m_asm:bcc(pl, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State1 = State0#state{stream = Stream1},
+    {State1, pl, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {Reg, '<', Val}
+) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 ->
+    I1 = jit_armv6m_asm:cmp(Reg, Val),
+    % ge = greater than or equal
+    I2 = jit_armv6m_asm:bcc(ge, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = State0#state{stream = Stream1},
+    {State1, ge, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, available_regs = [Temp | _]} = State0,
+    {Reg, '<', Val}
+) when is_atom(Reg), is_integer(Val) ->
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream0 = State1#state.stream,
+    I1 = jit_armv6m_asm:cmp(Reg, Temp),
+    % ge = greater than or equal
+    I2 = jit_armv6m_asm:bcc(ge, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State2 = State1#state{stream = Stream1},
+    {State2, ge, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '<', RegB}
+) when is_atom(RegB) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_armv6m_asm:cmp(Reg, RegB),
+    % ge = greater than or equal
+    I2 = jit_armv6m_asm:bcc(ge, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, ge, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% Compare register with 0
+    I1 = jit_armv6m_asm:cmp(Reg, 0),
+    %% Branch if not equal
+    I2 = jit_armv6m_asm:bcc(ne, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, ne, byte_size(I1)};
+%% Delegate (int) forms to regular forms since we only have 32-bit words
+if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) ->
+    if_block_cond(State, {RegOrTuple, '==', 0});
+if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) ->
+    if_block_cond(State, {RegOrTuple, '==', Val});
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '!=', Val}
+) when (is_integer(Val) andalso Val >= 0 andalso Val =< 255) orelse ?IS_GPR(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_armv6m_asm:cmp(Reg, Val),
+    I2 = jit_armv6m_asm:bcc(eq, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, eq, byte_size(I1)};
+if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) ->
+    if_block_cond(State, {RegOrTuple, '!=', Val});
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '==', Val}
+) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    I1 = jit_armv6m_asm:cmp(Reg, Val),
+    I2 = jit_armv6m_asm:bcc(ne, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, ne, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {{free, RegA}, '==', {free, RegB}}
+) ->
+    % Compare two free registers: cmp RegA, RegB; beq <target>
+    I1 = jit_armv6m_asm:cmp(RegA, RegB),
+    Stream1 = StreamModule:append(Stream0, I1),
+    I2 = jit_armv6m_asm:bcc(ne, 0),
+    Stream2 = StreamModule:append(Stream1, I2),
+    State1 = State0#state{stream = Stream2},
+    State2 = if_block_free_reg({free, RegA}, State1),
+    State3 = if_block_free_reg({free, RegB}, State2),
+    {State3, ne, byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '==', Val}
+) when is_integer(Val) ->
+    Offset0 = StreamModule:offset(Stream0),
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    Offset1 = StreamModule:offset(Stream1),
+    I1 = jit_armv6m_asm:cmp(Reg, Temp),
+    I2 = jit_armv6m_asm:bcc(ne, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream2 = StreamModule:append(Stream1, Code),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, ne, Offset1 - Offset0 + byte_size(I1)};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '!=', Val}
+) when is_integer(Val) ->
+    Offset0 = StreamModule:offset(Stream0),
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    Offset1 = StreamModule:offset(Stream1),
+    I1 = jit_armv6m_asm:cmp(Reg, Temp),
+    I2 = jit_armv6m_asm:bcc(eq, 0),
+    Code = <<
+        I1/binary,
+        I2/binary
+    >>,
+    Stream2 = StreamModule:append(Stream1, Code),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, eq, Offset1 - Offset0 + byte_size(I1)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {'(bool)', RegOrTuple, '==', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Test bit 0: shift bit 0 to MSB and branch if positive (bit was 0/false)
+    I1 = jit_armv6m_asm:lsls(Temp, Reg, 31),
+    % branch if negative (bit was 1/true)
+    I2 = jit_armv6m_asm:bcc(mi, 0),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, mi, byte_size(I1)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {'(bool)', RegOrTuple, '!=', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Test bit 0: shift bit 0 to MSB and branch if negative (bit was 1/true)
+    I1 = jit_armv6m_asm:lsls(Temp, Reg, 31),
+    % branch if positive (bit was 0/false)
+    I2 = jit_armv6m_asm:bcc(pl, 0),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, pl, byte_size(I1)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {RegOrTuple, '&', Val, '!=', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % Test bits - optimize for low bits masks that can use lsls
+    {TestCode, BranchCond} =
+        case bit_test_optimization(Val) of
+            {low_bits_mask, BitCount} ->
+                % Low bits mask: use lsls to shift high bits away
+                ShiftAmount = 32 - BitCount,
+                TestCode0 = jit_armv6m_asm:lsls(Temp, Reg, ShiftAmount),
+                % branch if zero (no low bit was set)
+                {TestCode0, eq};
+            no_optimization ->
+                % General case: use mov+tst
+                TestCode0 = jit_armv6m_asm:movs(Temp, Val),
+                TestCode1 = jit_armv6m_asm:tst(Reg, Temp),
+                {<<TestCode0/binary, TestCode1/binary>>, eq}
+        end,
+    I2 = jit_armv6m_asm:bcc(BranchCond, 0),
+    Code = <<TestCode/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, BranchCond, byte_size(TestCode)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {Reg, '&', 16#F, '!=', 16#F}
+) when ?IS_GPR(Reg) ->
+    % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG
+    I1 = jit_armv6m_asm:mvns(Temp, Reg),
+    % 32 - 4
+    I2 = jit_armv6m_asm:lsls(Temp, Temp, 28),
+    I3 = jit_armv6m_asm:bcc(eq, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State1 = State0#state{stream = Stream1},
+    {State1, eq, byte_size(I1) + byte_size(I2)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F}
+) when ?IS_GPR(Reg) ->
+    % Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG
+    I1 = jit_armv6m_asm:mvns(Reg, Reg),
+    % 32 - 4
+    I2 = jit_armv6m_asm:lsls(Reg, Reg, 28),
+    I3 = jit_armv6m_asm:bcc(eq, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State1 = State0#state{stream = Stream1},
+    State2 = if_block_free_reg(RegTuple, State1),
+    {State2, eq, byte_size(I1) + byte_size(I2)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | AT]
+    } = State0,
+    {Reg, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    % AND with mask
+    OffsetBefore = StreamModule:offset(Stream0),
+    I1 = jit_armv6m_asm:mov(Temp, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State1 = State0#state{stream = Stream1},
+    State2 = and_(State1#state{available_regs = AT}, Temp, Mask),
+    Stream2 = State2#state.stream,
+    % Compare with value
+    I2 = jit_armv6m_asm:cmp(Temp, Val),
+    Stream3 = StreamModule:append(Stream2, I2),
+    OffsetAfter = StreamModule:offset(Stream3),
+    I3 = jit_armv6m_asm:bcc(eq, 0),
+    Stream4 = StreamModule:append(Stream3, I3),
+    State3 = State2#state{stream = Stream4, available_regs = [Temp | State2#state.available_regs]},
+    {State3, eq, OffsetAfter - OffsetBefore};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    {{free, Reg} = RegTuple, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    % AND with mask
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = and_(State0, Reg, Mask),
+    Stream1 = State1#state.stream,
+    % Compare with value
+    I2 = jit_armv6m_asm:cmp(Reg, Val),
+    Stream2 = StreamModule:append(Stream1, I2),
+    OffsetAfter = StreamModule:offset(Stream2),
+    I3 = jit_armv6m_asm:bcc(eq, 0),
+    Stream3 = StreamModule:append(Stream2, I3),
+    State3 = State1#state{stream = Stream3},
+    State4 = if_block_free_reg(RegTuple, State3),
+    {State4, eq, OffsetAfter - OffsetBefore}.
+
+-spec if_block_free_reg(armv6m_register() | {free, armv6m_register()}, state()) -> state().
+if_block_free_reg({free, Reg}, State0) ->
+    #state{available_regs = AvR0, used_regs = UR0} = State0,
+    {AvR1, UR1} = free_reg(AvR0, UR0, Reg),
+    State0#state{
+        available_regs = AvR1,
+        used_regs = UR1
+    };
+if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) ->
+    State0.
+
+%% Helper function to determine if a bit test can be optimized using lsls
+-spec bit_test_optimization(non_neg_integer()) ->
+    {low_bits_mask, non_neg_integer()} | no_optimization.
+% ?TERM_PRIMARY_MASK
+bit_test_optimization(16#3) -> {low_bits_mask, 2};
+%
+bit_test_optimization(16#7) -> {low_bits_mask, 3};
+% ?TERM_IMMED_TAG_MASK
+bit_test_optimization(16#F) -> {low_bits_mask, 4};
+% ?TERM_BOXED_TAG_MASK or ?TERM_IMMED2_TAG_MASK
+bit_test_optimization(16#3F) -> {low_bits_mask, 6};
+bit_test_optimization(_) -> no_optimization.
+
+-spec merge_used_regs(state(), [armv6m_register()]) -> state().
+merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [
+    Reg | T
+]) ->
+    case lists:member(Reg, UR0) of
+        true ->
+            merge_used_regs(State, T);
+        false ->
+            AvR1 = lists:delete(Reg, AvR0),
+            UR1 = [Reg | UR0],
+            merge_used_regs(
+                State#state{used_regs = UR1, available_regs = AvR1}, T
+            )
+    end;
+merge_used_regs(State, []) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register right by a fixed number of bits, effectively
+%% dividing it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+-spec shift_right(#state{}, maybe_free_armv6m_register(), non_neg_integer()) ->
+    {#state{}, armv6m_register()}.
+shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_armv6m_asm:lsrs(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    {State#state{stream = Stream1}, Reg};
+shift_right(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ResultReg | T],
+        used_regs = UR
+    } = State,
+    Reg,
+    Shift
+) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_armv6m_asm:lsrs(ResultReg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register left by a fixed number of bits, effectively
+%% multiplying it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when
+    is_atom(Reg)
+->
+    I = jit_armv6m_asm:lsls(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call to a function pointer with arguments. This function converts
+%% arguments and passes them following the backend ABI convention.
+%% @end
+%% @param State current backend state
+%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex}
+%% @param Args arguments to pass to the function
+%% @return Updated backend state and return register
+%%-----------------------------------------------------------------------------
+-spec call_func_ptr(state(), {free, armv6m_register()} | {primitive, non_neg_integer()}, [arg()]) ->
+    {state(), armv6m_register()}.
+call_func_ptr(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State0,
+    FuncPtrTuple,
+    Args
+) ->
+    FreeRegs = lists:flatmap(
+        fun
+            ({free, {ptr, Reg}}) -> [Reg];
+            ({free, Reg}) when is_atom(Reg) -> [Reg];
+            (_) -> []
+        end,
+        [FuncPtrTuple | Args]
+    ),
+    UsedRegs1 = UsedRegs0 -- FreeRegs,
+    SavedRegsBase = [?CTX_REG, ?NATIVE_INTERFACE_REG | UsedRegs1],
+
+    % Calculate available registers for potential padding
+    FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS),
+    AvailableRegs1 = FreeGPRegs ++ AvailableRegs0,
+
+    % Add padding register if odd number to maintain 8-byte stack alignment per ARM AAPCS
+    SavedRegs =
+        case (length(SavedRegsBase) rem 2) =:= 1 of
+            true when AvailableRegs1 /= [] ->
+                [PaddingReg | _] = AvailableRegs1,
+                SavedRegsBase ++ [PaddingReg];
+            _ ->
+                PaddingReg = undefined,
+                SavedRegsBase
+        end,
+
+    Stream1 = push_registers(SavedRegs, StreamModule, Stream0),
+
+    % Set up arguments following ARM AAPCS calling convention
+    % First four args are passed in r0-r4, but 5th and 6th are passed
+    % on the stack.
+    Args1 = lists:map(
+        fun(Arg) ->
+            case Arg of
+                offset -> StreamModule:offset(Stream1);
+                _ -> Arg
+            end
+        end,
+        Args
+    ),
+    {RegArgs0, StackArgs} =
+        case Args1 of
+            [Arg1, Arg2, Arg3, Arg4 | StackArgs0] -> {[Arg1, Arg2, Arg3, Arg4], StackArgs0};
+            _ -> {Args, []}
+        end,
+    RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0),
+    StackArgsRegs = lists:flatmap(fun arg_to_reg_list/1, StackArgs),
+
+    % We pushed registers to stack, so we can use these registers we saved
+    % and the currently available registers to push values to the stack.
+    SetArgsPushStackAvailableArgs = (UsedRegs1 -- (RegArgsRegs ++ StackArgsRegs)) ++ AvailableRegs0,
+    State1 = State0#state{
+        available_regs = SetArgsPushStackAvailableArgs,
+        used_regs = ?AVAILABLE_REGS -- SetArgsPushStackAvailableArgs,
+        stream = Stream1
+    },
+    State2 =
+        case StackArgs of
+            [] -> State1;
+            [Arg5] -> set_stack_args(State1, Arg5, undefined);
+            [Arg5, Args6] -> set_stack_args(State1, Arg5, Args6)
+        end,
+
+    SetArgsRegsOnlyAvailableArgs = State2#state.available_regs,
+    ParameterRegs = parameter_regs(RegArgs0),
+    {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} =
+        case FuncPtrTuple of
+            {free, FuncPtrReg0} ->
+                % If FuncPtrReg is in parameter regs, we must swap it with a free reg.
+                case lists:member(FuncPtrReg0, ParameterRegs) of
+                    true ->
+                        case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of
+                            [] ->
+                                % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0
+                                % that is not in ParameterRegs
+                                [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs,
+                                [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs,
+                                MovInstr1 = jit_armv6m_asm:mov(NewArgReg, FuncPtrReg1),
+                                MovInstr2 = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0),
+                                SetArgsAvailableArgs1 =
+                                    (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++
+                                        [FuncPtrReg0],
+                                RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg),
+                                {
+                                    StreamModule:append(
+                                        State2#state.stream, <<MovInstr1/binary, MovInstr2/binary>>
+                                    ),
+                                    SetArgsAvailableArgs1,
+                                    FuncPtrReg1,
+                                    RegArgs1
+                                };
+                            [FuncPtrReg1 | _] ->
+                                MovInstr = jit_armv6m_asm:mov(FuncPtrReg1, FuncPtrReg0),
+                                SetArgsAvailableArgs1 =
+                                    (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++
+                                        [FuncPtrReg0],
+                                {
+                                    StreamModule:append(State2#state.stream, MovInstr),
+                                    SetArgsAvailableArgs1,
+                                    FuncPtrReg1,
+                                    RegArgs0
+                                }
+                        end;
+                    false ->
+                        SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0],
+                        {State2#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0}
+                end;
+            {primitive, Primitive} ->
+                [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs,
+                SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0],
+                PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0),
+                Stream2 = StreamModule:append(State2#state.stream, PrepCall),
+                {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0}
+        end,
+
+    State3 = State2#state{
+        available_regs = SetArgsAvailableRegs,
+        used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs,
+        stream = Stream3
+    },
+
+    StackOffset =
+        case StackArgs of
+            [] -> length(SavedRegs) * 4;
+            _ -> length(SavedRegs) * 4 + 8
+        end,
+    State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset),
+    Stream4 = State4#state.stream,
+
+    % Call the function pointer (using BLX for call with return)
+    Call = jit_armv6m_asm:blx(FuncPtrReg),
+    Stream5 = StreamModule:append(Stream4, Call),
+
+    % For result, we need a free register (including FuncPtrReg) but ideally
+    % not the one used for padding. If none are available (all 8 registers
+    % were pushed to the stack), we write the result to the stack position
+    % of FuncPtrReg
+    {Stream6, UsedRegs2} =
+        case length(SavedRegs) of
+            8 when element(1, FuncPtrTuple) =:= free ->
+                % We use original FuncPtrReg then as we know it's available.
+                % Calculate stack offset: register number * 4 bytes
+                ResultReg = element(2, FuncPtrTuple),
+                StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4,
+                StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}),
+                {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]};
+            8 when PaddingReg =/= undefined ->
+                % We use PaddingReg then as we know it's available.
+                % Calculate stack offset: register number * 4 bytes
+                ResultReg = PaddingReg,
+                StoreResultStackOffset = jit_armv6m_asm:reg_to_num(ResultReg) * 4,
+                StoreResult = jit_armv6m_asm:str(r0, {sp, StoreResultStackOffset}),
+                {StreamModule:append(Stream5, StoreResult), [PaddingReg | UsedRegs1]};
+            _ ->
+                % Use any free that is not in SavedRegs
+                [ResultReg | _] = AvailableRegs1 -- SavedRegs,
+                MoveResult = jit_armv6m_asm:mov(ResultReg, r0),
+                {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]}
+        end,
+
+    % Deallocate stack space if we allocated it for 5+ arguments
+    Stream7 =
+        case length(Args) >= 5 of
+            true ->
+                DeallocateArgs = jit_armv6m_asm:add(sp, 8),
+                StreamModule:append(Stream6, DeallocateArgs);
+            false ->
+                Stream6
+        end,
+
+    Stream8 = pop_registers(lists:reverse(SavedRegs), StreamModule, Stream7),
+
+    AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1),
+    AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2),
+    {
+        State4#state{
+            stream = Stream8,
+            available_regs = AvailableRegs3,
+            used_regs = UsedRegs2
+        },
+        ResultReg
+    }.
+
+arg_to_reg_list({free, {ptr, Reg}}) -> [Reg];
+arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg];
+arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg];
+arg_to_reg_list(_) -> [].
+
+push_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 ->
+    StreamModule:append(Stream0, jit_armv6m_asm:push(SavedRegs));
+push_registers([], _StreamModule, Stream0) ->
+    Stream0.
+
+pop_registers(SavedRegs, StreamModule, Stream0) when length(SavedRegs) > 0 ->
+    Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:pop(SavedRegs)),
+    Stream1;
+pop_registers([], _StreamModule, Stream0) ->
+    Stream0.
+
+%% @doc Handle 5th and optionally 6th arguments on stack.
+%% For 5 args: push 5th arg at sp+0 with 4-byte padding at sp+4 for 8-byte alignment
+%% For 6 args: push 5th arg at sp+0, 6th arg at sp+4 (2×4 bytes = 8-byte aligned, no padding)
+set_stack_args(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, Arg5, Arg6
+) ->
+    % Decrement stack pointer by 8 bytes once
+    I1 = jit_armv6m_asm:sub(sp, sp, 8),
+    Stream1 = StreamModule:append(Stream0, I1),
+
+    % Handle Arg6 if present (goes at sp+4)
+    State1 =
+        case Arg6 of
+            undefined ->
+                % 5 arguments: no 6th arg to handle
+                State0#state{stream = Stream1};
+            {free, Reg6} when is_atom(Reg6) ->
+                % 6 arguments: Arg6 is already in native register, store directly and free
+                I2 = jit_armv6m_asm:str(Reg6, {sp, 4}),
+                StreamB = StreamModule:append(Stream1, I2),
+                free_native_register(State0#state{stream = StreamB}, Reg6);
+            _ ->
+                % 6 arguments: store Arg6 at sp+4
+                % Handle {free, NonNativeReg} by unwrapping
+                ActualArg6 =
+                    case Arg6 of
+                        {free, InnerArg6} -> InnerArg6;
+                        Other6 -> Other6
+                    end,
+                {StateA, Reg6} = move_to_native_register(
+                    State0#state{stream = Stream1}, ActualArg6
+                ),
+                StreamA = StateA#state.stream,
+                I2 = jit_armv6m_asm:str(Reg6, {sp, 4}),
+                StreamB = StreamModule:append(StreamA, I2),
+                free_native_register(StateA#state{stream = StreamB}, Reg6)
+        end,
+
+    % Handle Arg5 (always present, always goes at sp+0)
+    State2 =
+        case Arg5 of
+            {free, Reg5} when is_atom(Reg5) ->
+                % Arg5 is already in native register, store directly and free
+                I3 = jit_armv6m_asm:str(Reg5, {sp, 0}),
+                Stream3 = StreamModule:append(State1#state.stream, I3),
+                free_native_register(State1#state{stream = Stream3}, Reg5);
+            _ ->
+                % Move Arg5 to register, store, and free
+                % Handle {free, NonNativeReg} by unwrapping
+                ActualArg5 =
+                    case Arg5 of
+                        {free, InnerArg5} -> InnerArg5;
+                        Other5 -> Other5
+                    end,
+                {StateTemp, Reg5} = move_to_native_register(State1, ActualArg5),
+                StreamTemp = StateTemp#state.stream,
+                I3 = jit_armv6m_asm:str(Reg5, {sp, 0}),
+                Stream3 = StreamModule:append(StreamTemp, I3),
+                free_native_register(StateTemp#state{stream = Stream3}, Reg5)
+        end,
+    State2.
+
+set_registers_args(State0, Args, StackOffset) ->
+    ParamRegs = parameter_regs(Args),
+    set_registers_args(State0, Args, ParamRegs, StackOffset).
+
+set_registers_args(
+    #state{used_regs = UsedRegs} = State0,
+    Args,
+    ParamRegs,
+    StackOffset
+) ->
+    ArgsRegs = args_regs(Args),
+    AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs,
+    State1 = set_registers_args0(
+        State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset
+    ),
+    Stream1 = State1#state.stream,
+    NewUsedRegs = lists:foldl(
+        fun
+            ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed);
+            ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed);
+            (_, AccUsed) -> AccUsed
+        end,
+        UsedRegs,
+        Args
+    ),
+    State1#state{
+        stream = Stream1,
+        available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs,
+        used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs)
+    }.
+
+parameter_regs(Args) ->
+    parameter_regs0(Args, ?PARAMETER_REGS, []).
+
+% AAPCS32: 64-bit arguments require double-word alignment (even register number)
+parameter_regs0([], _, Acc) ->
+    lists:reverse(Acc);
+parameter_regs0([{avm_int64_t, _} | T], [r0, r1 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [r1, r0 | Acc]);
+parameter_regs0([{avm_int64_t, _} | T], [r1, r2, r3 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [r3, r2 | Acc]);
+parameter_regs0([{avm_int64_t, _} | T], [r2, r3 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [r3, r2 | Acc]);
+parameter_regs0([_Other | T], [Reg | Rest], Acc) ->
+    parameter_regs0(T, Rest, [Reg | Acc]).
+
+replace_reg(Args, Reg1, Reg2) ->
+    replace_reg0(Args, Reg1, Reg2, []).
+
+replace_reg0([Reg | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([Other | T], Reg, Replacement, Acc) ->
+    replace_reg0(T, Reg, Replacement, [Other | Acc]).
+
+set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) ->
+    State;
+set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) ->
+    set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset);
+set_registers_args0(
+    State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset
+) ->
+    set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset);
+% Handle 64-bit arguments that need two registers according to AAPCS32
+set_registers_args0(
+    State,
+    [{avm_int64_t, Value} | ArgsT],
+    ArgsRegs,
+    ParamRegs,
+    AvailGP,
+    StackOffset
+) when is_integer(Value) ->
+    LowPart = Value band 16#FFFFFFFF,
+    HighPart = (Value bsr 32) band 16#FFFFFFFF,
+    set_registers_args0(
+        State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset
+    );
+% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't
+% want to replace it
+set_registers_args0(
+    State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset
+) ->
+    false = lists:member(?CTX_REG, ArgsRegs),
+    State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset),
+    set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset);
+set_registers_args0(
+    #state{stream_module = StreamModule} = State0,
+    [Arg | ArgsT],
+    [_ArgReg | ArgsRegsT],
+    [ParamReg | ParamRegsT],
+    AvailGP,
+    StackOffset
+) ->
+    case lists:member(ParamReg, ArgsRegsT) of
+        false ->
+            State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset),
+            set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset);
+        true ->
+            [Avail | AvailGPT] = AvailGP,
+            I = jit_armv6m_asm:mov(Avail, ParamReg),
+            Stream1 = StreamModule:append(State0#state.stream, I),
+            State1 = set_registers_args1(
+                State0#state{stream = Stream1}, Arg, ParamReg, StackOffset
+            ),
+            NewArgsT = replace_reg(ArgsT, ParamReg, Avail),
+            set_registers_args0(
+                State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset
+            )
+    end.
+
+set_registers_args1(State, Reg, Reg, _Offset) ->
+    State;
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, jit_state, ParamReg, StackOffset
+) ->
+    JitStateOffset = ?STACK_OFFSET_JITSTATE + StackOffset,
+    I = jit_armv6m_asm:ldr(ParamReg, {sp, JitStateOffset}),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+% For tail calls, jit_state will be restored by pop - skip generating load instruction
+set_registers_args1(State, jit_state_tail_call, r1, _StackOffset) ->
+    State;
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State,
+    {x_reg, extra},
+    Reg,
+    _StackOffset
+) ->
+    I = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset
+) ->
+    I = jit_armv6m_asm:ldr(Reg, ?X_REG(X)),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset
+) ->
+    I = jit_armv6m_asm:ldr(Reg, {Source, 0}),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State,
+    {y_reg, X},
+    Reg,
+    _StackOffset
+) ->
+    Code = ldr_y_reg(Reg, X, AvailRegs),
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset
+) when
+    ?IS_GPR(ArgReg)
+->
+    I = jit_armv6m_asm:mov(Reg, ArgReg),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) ->
+    mov_immediate(State, Reg, Value).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg)
+%% from an immediate, a native register or another vm register.
+%% @end
+%% @param State current backend state
+%% @param Src value to move to vm register
+%% @param Dest vm register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) ->
+    state().
+% Native register to VM register
+move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) ->
+    I1 = jit_armv6m_asm:str(Src, ?X_REG(?MAX_REG)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) ->
+    I1 = jit_armv6m_asm:str(Src, ?X_REG(X)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) ->
+    I1 = jit_armv6m_asm:str(Src, {Reg, 0}),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when
+    is_atom(Src)
+->
+    Code = str_y_reg(Src, Y, Temp1, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, Code),
+    State0#state{stream = Stream1};
+% Source is an integer to y_reg (optimized: ldr first, then movs)
+move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when
+    is_integer(N), N >= 0, N =< 255
+->
+    I1 = jit_armv6m_asm:movs(Temp2, N),
+    YCode = str_y_reg(Temp2, Y, Temp1, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, <<I1/binary, YCode/binary>>),
+    State0#state{stream = Stream1};
+% Source is an integer (0-255 for movs, negative values need different handling)
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when
+    is_integer(N), N >= 0, N =< 255
+->
+    I1 = jit_armv6m_asm:movs(Temp, N),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+%% Handle large values using simple literal pool (branch-over pattern)
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when
+    is_integer(N)
+->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N),
+    State2 = move_to_vm_register(State1, Temp, Dest),
+    State2#state{available_regs = AR0};
+% Source is a VM register
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) ->
+    I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(?MAX_REG)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) ->
+    I1 = jit_armv6m_asm:ldr(Temp, ?X_REG(X)),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) ->
+    I1 = jit_armv6m_asm:ldr(Temp, {Reg, 0}),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) ->
+    Code = ldr_y_reg(Temp, Y, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, Code),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+% term_to_float
+move_to_vm_register(
+    #state{
+        stream_module = StreamModule,
+        available_regs = [Temp1, Temp2 | _],
+        stream = Stream0,
+        variant = Variant
+    } =
+        State0,
+    {free, {ptr, Reg, 1}},
+    {fp_reg, F}
+) ->
+    I1 = jit_armv6m_asm:ldr(Temp1, ?FP_REGS),
+    I2 = jit_armv6m_asm:ldr(Temp2, {Reg, 4}),
+    case Variant band ?JIT_VARIANT_FLOAT32 of
+        0 ->
+            % Double precision: write both 32-bit parts
+            I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8}),
+            I4 = jit_armv6m_asm:ldr(Temp2, {Reg, 8}),
+            I5 = jit_armv6m_asm:str(Temp2, {Temp1, F * 8 + 4}),
+            Code = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>;
+        _ ->
+            % Single precision: write only first 32-bit part
+            I3 = jit_armv6m_asm:str(Temp2, {Temp1, F * 4}),
+            Code = <<I1/binary, I2/binary, I3/binary>>
+    end,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = free_native_register(State0, Reg),
+    State1#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move of an array element (reg[x]) to a vm or a native register.
+%% @end
+%% @param State current backend state
+%% @param Reg base register of the array
+%% @param Index index in the array, as an integer or a native register
+%% @param Dest vm or native register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_array_element(
+    state(),
+    armv6m_register(),
+    non_neg_integer() | armv6m_register(),
+    vm_register() | armv6m_register()
+) -> state().
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}),
+    I2 = jit_armv6m_asm:str(Temp, ?X_REG(X)),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {ptr, Dest}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_armv6m_asm:ldr(Temp, {Reg, Index * 4}),
+    I2 = jit_armv6m_asm:str(Temp, {Dest, 0}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} =
+        State,
+    Reg,
+    Index,
+    {y_reg, Y}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_armv6m_asm:ldr(Temp2, {Reg, Index * 4}),
+    YCode = str_y_reg(Temp2, Y, Temp1, AT),
+    Code = <<I1/binary, YCode/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} =
+        State,
+    {free, Reg},
+    Index,
+    {y_reg, Y}
+) when is_integer(Index) ->
+    I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}),
+    YCode = str_y_reg(Reg, Y, Temp, AT),
+    Code = <<I1/binary, YCode/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest
+) when is_atom(Dest) andalso is_integer(Index) ->
+    I1 = jit_armv6m_asm:ldr(Dest, {Reg, Index * 4}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(IndexReg) ->
+    I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2),
+    I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}),
+    I3 = jit_armv6m_asm:str(IndexReg, ?X_REG(X)),
+    {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {ptr, PtrReg}
+) when is_atom(IndexReg) ->
+    I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2),
+    I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}),
+    I3 = jit_armv6m_asm:str(IndexReg, {PtrReg, 0}),
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, IndexReg
+    ),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | AT] = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {y_reg, Y}
+) when is_atom(IndexReg) ->
+    I1 = jit_armv6m_asm:lsls(IndexReg, IndexReg, 2),
+    I2 = jit_armv6m_asm:ldr(IndexReg, {Reg, IndexReg}),
+    Code = str_y_reg(IndexReg, Y, Temp, AT),
+    I3 = Code,
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, IndexReg
+    ),
+    Stream1 = StreamModule:append(
+        Stream0, <<I1/binary, I2/binary, I3/binary>>
+    ),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    }.
+
+%% @doc move reg[x] to a vm or native register
+-spec get_array_element(state(), armv6m_register() | {free, armv6m_register()}, non_neg_integer()) ->
+    {state(), armv6m_register()}.
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    {free, Reg},
+    Index
+) ->
+    I1 = jit_armv6m_asm:ldr(Reg, {Reg, Index * 4}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {State#state{stream = Stream1}, Reg};
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ElemReg | AvailableT],
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    Index
+) ->
+    I1 = jit_armv6m_asm:ldr(ElemReg, {Reg, Index * 4}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {
+        State#state{
+            stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0]
+        },
+        ElemReg
+    }.
+
+%% @doc move an integer, a vm or native register to reg[x]
+-spec move_to_array_element(
+    state(), integer() | vm_register() | armv6m_register(), armv6m_register(), non_neg_integer()
+) -> state().
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    ValueReg,
+    Reg,
+    Index
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) ->
+    I1 = jit_armv6m_asm:str(ValueReg, {Reg, Index * 4}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    ValueReg,
+    Reg,
+    IndexReg
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) ->
+    I1 = jit_armv6m_asm:mov(Temp, IndexReg),
+    I2 = jit_armv6m_asm:lsls(Temp, Temp, 2),
+    I3 = jit_armv6m_asm:str(ValueReg, {Reg, Temp}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    Reg,
+    Index
+) ->
+    {State1, Temp} = copy_to_native_register(State0, Value),
+    State2 = move_to_array_element(State1, Temp, Reg, Index),
+    free_native_register(State2, Temp).
+
+move_to_array_element(
+    State,
+    Value,
+    BaseReg,
+    IndexReg,
+    Offset
+) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 ->
+    move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8));
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    ValueReg,
+    BaseReg,
+    IndexReg,
+    Offset
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) ->
+    I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset),
+    I2 = jit_armv6m_asm:lsls(Temp, Temp, 2),
+    I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    BaseReg,
+    IndexReg,
+    Offset
+) ->
+    {State1, ValueReg} = copy_to_native_register(State0, Value),
+    [Temp | _] = State1#state.available_regs,
+    I1 = jit_armv6m_asm:adds(Temp, IndexReg, Offset),
+    I2 = jit_armv6m_asm:lsls(Temp, Temp, 2),
+    I3 = jit_armv6m_asm:str(ValueReg, {BaseReg, Temp}),
+    Stream1 = (State1#state.stream_module):append(
+        State1#state.stream, <<I1/binary, I2/binary, I3/binary>>
+    ),
+    State2 = State1#state{stream = Stream1},
+    free_native_register(State2, ValueReg).
+
+-spec move_to_native_register(state(), value() | cp) -> {state(), armv6m_register()}.
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    cp
+) ->
+    I1 = jit_armv6m_asm:ldr(Reg, ?CP),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(State, Reg) when is_atom(Reg) ->
+    {State, Reg};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_armv6m_asm:ldr(Reg, {Reg, 0}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1}, Reg};
+move_to_native_register(
+    #state{
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State0,
+    Imm
+) when
+    is_integer(Imm)
+->
+    State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT},
+    {move_to_native_register(State1, Imm, Reg), Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, extra}
+) ->
+    I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(?MAX_REG)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, X}
+) when
+    X < ?MAX_REG
+->
+    I1 = jit_armv6m_asm:ldr(Reg, ?X_REG(X)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {y_reg, Y}
+) ->
+    Code = ldr_y_reg(Reg, Y, AvailT),
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [RegA, RegB | AvailT],
+        used_regs = Used
+    } = State,
+    {fp_reg, F}
+) ->
+    I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS),
+    I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}),
+    I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {
+        State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]},
+        {fp, RegA, RegB}
+    }.
+
+-spec move_to_native_register(state(), value(), armv6m_register()) -> state().
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst
+) when is_atom(RegSrc) ->
+    I = jit_armv6m_asm:mov(RegDst, RegSrc),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) ->
+    mov_immediate(State, RegDst, ValSrc);
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst
+) when ?IS_GPR(Reg) ->
+    I1 = jit_armv6m_asm:ldr(RegDst, {Reg, 0}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst
+) ->
+    I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(?MAX_REG)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst
+) when
+    X < ?MAX_REG
+->
+    I1 = jit_armv6m_asm:ldr(RegDst, ?X_REG(X)),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State,
+    {y_reg, Y},
+    RegDst
+) ->
+    Code = ldr_y_reg(RegDst, Y, AT),
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    {fp_reg, F},
+    {fp, RegA, RegB}
+) ->
+    I1 = jit_armv6m_asm:ldr(RegB, ?FP_REGS),
+    I2 = jit_armv6m_asm:ldr(RegA, {RegB, F * 8}),
+    I3 = jit_armv6m_asm:ldr(RegB, {RegB, F * 8 + 4}),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+-spec copy_to_native_register(state(), value()) -> {state(), armv6m_register()}.
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    Reg
+) when is_atom(Reg) ->
+    I1 = jit_armv6m_asm:mov(SaveReg, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_armv6m_asm:ldr(SaveReg, {Reg, 0}),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(State, Reg) ->
+    move_to_native_register(State, Reg).
+
+move_to_cp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State,
+    {y_reg, Y}
+) ->
+    I1 = ldr_y_reg(Reg, Y, AvailT),
+    I2 = jit_armv6m_asm:str(Reg, ?CP),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+increment_sp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State,
+    Offset
+) ->
+    I1 = jit_armv6m_asm:ldr(Reg, ?Y_REGS),
+    I2 = jit_armv6m_asm:adds(Reg, Offset * 4),
+    I3 = jit_armv6m_asm:str(Reg, ?Y_REGS),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+set_continuation_to_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        offset = JumpTableOffset,
+        available_regs = [Temp1, Temp2 | _]
+    } = State,
+    Label
+) ->
+    % Calculate jump table entry offset
+    JumpTableEntryOffset = (Label * ?JUMP_TABLE_ENTRY_SIZE) + JumpTableOffset,
+
+    AdrOffset = StreamModule:offset(Stream0),
+    % ADR Temp, +.4 means we're storing PC value in Temp1.
+    % For example, if AdrOffset is 0x0808034c, Temp1 will contain 0x08080350
+    I1 = jit_armv6m_asm:adr(Temp1, 4),
+    Stream1 = StreamModule:append(Stream0, I1),
+
+    AdrPC = (AdrOffset + 4) band (bnot 3),
+
+    % Calculate what we need to load: JumpTableEntryOffset - AdrPC + 1 (for thumb bit)
+    ImmediateValue = JumpTableEntryOffset + 1 - AdrPC,
+
+    % Generate mov_immediate to load the calculated offset
+    State1 = mov_immediate(State#state{stream = Stream1}, Temp2, ImmediateValue),
+
+    % Add PC + offset (with thumb bit set), load jit_state, and store continuation
+    I2 = jit_armv6m_asm:adds(Temp2, Temp2, Temp1),
+    I3 = jit_armv6m_asm:ldr(Temp1, {sp, ?STACK_OFFSET_JITSTATE}),
+    I4 = jit_armv6m_asm:str(Temp2, ?JITSTATE_CONTINUATION(Temp1)),
+
+    Code = <<I2/binary, I3/binary, I4/binary>>,
+    Stream2 = StreamModule:append(State1#state.stream, Code),
+    State1#state{stream = Stream2}.
+
+%% @doc Set the contination to a given offset
+%% Return a reference so the offset will be updated with update_branches
+%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current
+%% code and not too far, so on Thumb we can use adr instruction.
+set_continuation_to_offset(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp, TempJitState | _],
+        branches = Branches
+    } = State
+) ->
+    OffsetRef = make_ref(),
+    Offset = StreamModule:offset(Stream0),
+    I1 = jit_armv6m_asm:adr(Temp, 4),
+    Reloc = {OffsetRef, Offset, {adr, Temp}},
+    % Set thumb bit (LSB = 1) by adding 1 to the 4-byte aligned address
+    I2 = jit_armv6m_asm:adds(Temp, Temp, 1),
+    % Load jit_state pointer from stack, then store continuation
+    I3 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}),
+    I4 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)),
+    Code = <<I1/binary, I2/binary, I3/binary, I4/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}.
+
+%% @doc Implement a continuation entry point.
+%% TODO: push r4-r7 and lr
+-spec continuation_entry_point(#state{}) -> #state{}.
+continuation_entry_point(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State
+) ->
+    % Align if required.
+    Offset = StreamModule:offset(Stream0),
+    Stream1 =
+        case Offset rem 4 of
+            0 -> Stream0;
+            2 -> StreamModule:append(Stream0, <<0:16>>)
+        end,
+    Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]),
+    Stream2 = StreamModule:append(Stream1, Prolog),
+    State#state{stream = Stream2}.
+
+get_module_index(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg, TempJitState | AvailableT],
+        used_regs = UsedRegs0
+    } = State
+) ->
+    % Load jit_state pointer from stack, then load module
+    I1a = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}),
+    I1b = jit_armv6m_asm:ldr(Reg, ?JITSTATE_MODULE(TempJitState)),
+    I2 = jit_armv6m_asm:ldr(Reg, ?MODULE_INDEX(Reg)),
+    Code = <<I1a/binary, I1b/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {
+        State#state{
+            stream = Stream1,
+            available_regs = [TempJitState | AvailableT],
+            used_regs = [Reg | UsedRegs0]
+        },
+        Reg
+    }.
+
+%% @doc Perform an AND of a register with an immediate.
+%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to
+%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool
+%% by using BICS for -4.
+and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) ->
+    I1 = jit_armv6m_asm:lsls(Reg, Reg, 8),
+    I2 = jit_armv6m_asm:lsrs(Reg, Reg, 8),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State0#state{stream = Stream1};
+and_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) when Val < 0 andalso Val >= -256 ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)),
+    Stream1 = State1#state.stream,
+    I = jit_armv6m_asm:bics(Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2};
+and_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_armv6m_asm:ands(Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2};
+and_(
+    #state{stream_module = StreamModule, available_regs = []} = State0,
+    Reg,
+    Val
+) when Val < 0 andalso Val >= -256 ->
+    % No available registers, use r0 as temp and save it to r12
+    Stream0 = State0#state.stream,
+    % Save r0 to r12
+    Save = jit_armv6m_asm:mov(?IP_REG, r0),
+    Stream1 = StreamModule:append(Stream0, Save),
+    % Load immediate value into r0
+    State1 = mov_immediate(State0#state{stream = Stream1}, r0, bnot (Val)),
+    Stream2 = State1#state.stream,
+    % Perform BICS operation
+    I = jit_armv6m_asm:bics(Reg, r0),
+    Stream3 = StreamModule:append(Stream2, I),
+    % Restore r0 from r12
+    Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
+    Stream4 = StreamModule:append(Stream3, Restore),
+    State0#state{stream = Stream4};
+and_(
+    #state{stream_module = StreamModule, available_regs = []} = State0,
+    Reg,
+    Val
+) ->
+    % No available registers, use r0 as temp and save it to r12
+    Stream0 = State0#state.stream,
+    % Save r0 to r12
+    Save = jit_armv6m_asm:mov(?IP_REG, r0),
+    Stream1 = StreamModule:append(Stream0, Save),
+    % Load immediate value into r0
+    State1 = mov_immediate(State0#state{stream = Stream1}, r0, Val),
+    Stream2 = State1#state.stream,
+    % Perform ANDS operation
+    I = jit_armv6m_asm:ands(Reg, r0),
+    Stream3 = StreamModule:append(Stream2, I),
+    % Restore r0 from r12
+    Restore = jit_armv6m_asm:mov(r0, ?IP_REG),
+    Stream4 = StreamModule:append(Stream3, Restore),
+    State0#state{stream = Stream4}.
+
+or_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_armv6m_asm:orrs(Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when
+    (Val >= 0 andalso Val =< 255) orelse is_atom(Val)
+->
+    I = jit_armv6m_asm:adds(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State0#state{stream = Stream1};
+add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_armv6m_asm:adds(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    Val >= 0 andalso Val =< 255
+->
+    I = jit_armv6m_asm:movs(Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    Val >= -255 andalso Val < 0
+->
+    I1 = jit_armv6m_asm:movs(Reg, -Val),
+    I2 = jit_armv6m_asm:negs(Reg, Reg),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mov_immediate(
+    #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State, Reg, Val
+) ->
+    LdrInstructionAddr = StreamModule:offset(Stream0),
+    I1 = jit_armv6m_asm:ldr(Reg, {pc, 0}),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    State#state{stream = Stream1, literal_pool = [{LdrInstructionAddr, Reg, Val} | LP]}.
+
+flush_literal_pool(#state{literal_pool = []} = State) ->
+    State;
+flush_literal_pool(
+    #state{stream_module = StreamModule, stream = Stream0, literal_pool = LP} = State
+) ->
+    % Align
+    Offset = StreamModule:offset(Stream0),
+    Stream1 =
+        if
+            Offset rem 4 =:= 0 -> Stream0;
+            true -> StreamModule:append(Stream0, <<0:16>>)
+        end,
+    % Lay all values and update ldr instructions
+    Stream2 = lists:foldl(
+        fun({LdrInstructionAddr, Reg, Val}, AccStream) ->
+            LiteralPosition = StreamModule:offset(AccStream),
+            LdrPC = (LdrInstructionAddr band (bnot 3)) + 4,
+            LiteralOffset = LiteralPosition - LdrPC,
+            LdrInstruction = jit_armv6m_asm:ldr(Reg, {pc, LiteralOffset}),
+            AccStream1 = StreamModule:append(AccStream, <<Val:32/little>>),
+            StreamModule:replace(
+                AccStream1, LdrInstructionAddr, LdrInstruction
+            )
+        end,
+        Stream1,
+        lists:reverse(LP)
+    ),
+    State#state{stream = Stream2, literal_pool = []}.
+
+sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    (Val >= 0 andalso Val =< 255) orelse is_atom(Val)
+->
+    I1 = jit_armv6m_asm:subs(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_armv6m_asm:subs(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+mul(State, _Reg, 1) ->
+    State;
+mul(State, Reg, 2) ->
+    shift_left(State, Reg, 1);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 3) ->
+    I1 = jit_armv6m_asm:lsls(Temp, Reg, 1),
+    I2 = jit_armv6m_asm:adds(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 4) ->
+    shift_left(State, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 5) ->
+    I1 = jit_armv6m_asm:lsls(Temp, Reg, 2),
+    I2 = jit_armv6m_asm:adds(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 6) ->
+    State1 = mul(State0, Reg, 3),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 7) ->
+    I1 = jit_armv6m_asm:lsls(Temp, Reg, 3),
+    I2 = jit_armv6m_asm:subs(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 8) ->
+    shift_left(State, Reg, 3);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 9) ->
+    I1 = jit_armv6m_asm:lsls(Temp, Reg, 3),
+    I2 = jit_armv6m_asm:adds(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 10) ->
+    State1 = mul(State0, Reg, 5),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 15) ->
+    I1 = jit_armv6m_asm:lsls(Temp, Reg, 4),
+    I2 = jit_armv6m_asm:subs(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 16) ->
+    shift_left(State, Reg, 4);
+mul(State, Reg, 32) ->
+    shift_left(State, Reg, 5);
+mul(State, Reg, 64) ->
+    shift_left(State, Reg, 6);
+mul(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    % multiply by decomposing by power of 2
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_armv6m_asm:muls(Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}.
+
+%%
+%% Analysis of AArch64 pattern and ARM Thumb mapping:
+%%
+%% AArch64 layout (from call_ext_only_test):
+%%   0x0-0x8:  Decrement reductions, store back
+%%   0xc:      b.ne 0x20   ; Branch if reductions != 0 to continuation
+%%   0x10-0x1c: adr/str/ldr/br sequence for scheduling next process
+%%   0x20:     [CONTINUATION POINT] - Actual function starts here
+%%
+%% ARM Thumb equivalent should be:
+%%   0x0-0x6:  Decrement reductions, store back
+%%   0x8:      bne continuation_after_prolog ; Branch OVER the prolog if reductions != 0
+%%   0xa-0x?:  adr/str/ldr/blx sequence for scheduling
+%%   continuation: push {r1,r4-r7,lr}        ; PROLOG (only executed when scheduled)
+%%   continuation_after_prolog: [actual function body]
+%%
+%% Key insight: When reductions != 0, we branch PAST the prolog directly to the function.
+%% When reductions == 0, we schedule next process, and when we resume, we execute the prolog
+%% then continue to the function body.
+%%
+-spec decrement_reductions_and_maybe_schedule_next(state()) -> state().
+decrement_reductions_and_maybe_schedule_next(
+    #state{
+        stream_module = StreamModule, stream = Stream0, available_regs = [Temp, TempJitState | _]
+    } = State0
+) ->
+    % Load jit_state pointer from stack
+    I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}),
+    % Load reduction count
+    I1 = jit_armv6m_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)),
+    % Decrement reduction count
+    I2 = jit_armv6m_asm:subs(Temp, Temp, 1),
+    % Store back the decremented value
+    I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)),
+    Stream1 = StreamModule:append(Stream0, <<I0/binary, I1/binary, I2/binary, I3/binary>>),
+    BNEOffset = StreamModule:offset(Stream1),
+    % Branch if reduction count is not zero
+    I4 = jit_armv6m_asm:bcc(ne, 0),
+    % Set continuation to the next instruction
+    ADROffset = BNEOffset + byte_size(I4),
+    I5 = jit_armv6m_asm:adr(Temp, 4),
+    I6 = jit_armv6m_asm:adds(Temp, Temp, 1),
+    I7 = jit_armv6m_asm:str(Temp, ?JITSTATE_CONTINUATION(TempJitState)),
+    % Append the instructions to the stream
+    Stream2 = StreamModule:append(Stream1, <<I4/binary, I5/binary, I6/binary, I7/binary>>),
+    State1 = State0#state{stream = Stream2},
+    State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]),
+    % Add the prolog at the continuation point (where scheduled execution resumes)
+    #state{stream = Stream3} = State2,
+    CurrentOffset = StreamModule:offset(Stream3),
+    % Ensure continuation point is 4-byte aligned by adding NOP if necessary
+    {AlignedContinuationOffset, Stream3_5} =
+        case CurrentOffset rem 4 of
+            % Already 4-byte aligned
+            0 ->
+                {CurrentOffset, Stream3};
+            2 ->
+                % Add NOP to achieve 4-byte alignment
+                NOPPadded = StreamModule:append(Stream3, jit_armv6m_asm:nop()),
+                {StreamModule:offset(NOPPadded), NOPPadded};
+            _ ->
+                error({unexpected_alignment, CurrentOffset})
+        end,
+    Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]),
+    Stream4 = StreamModule:append(Stream3_5, Prolog),
+    % Calculate offsets for rewriting
+    ContinuationAfterPrologOffset = StreamModule:offset(Stream4),
+    % Rewrite the branch to skip over the prolog (branch to continuation_after_prolog)
+    NewI4 = jit_armv6m_asm:bcc(ne, ContinuationAfterPrologOffset - BNEOffset),
+    % Rewrite the adr to point to the aligned continuation point (prolog location)
+    % The ADR instruction uses PC aligned down to 4-byte boundary
+    ADRAlignedOffset = ADROffset band (bnot 3),
+    ADRImmediate = AlignedContinuationOffset - ADRAlignedOffset,
+    NewI5 = jit_armv6m_asm:adr(Temp, ADRImmediate),
+    Stream5 = StreamModule:replace(
+        Stream4, BNEOffset, <<NewI4/binary, NewI5/binary>>
+    ),
+    merge_used_regs(State2#state{stream = Stream5}, State1#state.used_regs).
+
+-spec call_or_schedule_next(state(), non_neg_integer()) -> state().
+call_or_schedule_next(State0, Label) ->
+    {State1, RewriteOffset, TempReg} = set_cp(State0),
+    State2 = call_only_or_schedule_next(State1, Label),
+    rewrite_cp_offset(State2, RewriteOffset, TempReg).
+
+call_only_or_schedule_next(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp, TempJitState | _]
+    } = State0,
+    Label
+) ->
+    % Load jit_state pointer from stack
+    I0 = jit_armv6m_asm:ldr(TempJitState, {sp, ?STACK_OFFSET_JITSTATE}),
+    % Load reduction count
+    I1 = jit_armv6m_asm:ldr(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)),
+    % Decrement reduction count
+    I2 = jit_armv6m_asm:subs(Temp, Temp, 1),
+    % Store back the decremented value
+    I3 = jit_armv6m_asm:str(Temp, ?JITSTATE_REDUCTIONCOUNT(TempJitState)),
+    Stream1 = StreamModule:append(Stream0, <<I0/binary, I1/binary, I2/binary, I3/binary>>),
+    % Use trampoline technique: branch if zero (eq) to skip over the long branch
+    % If not zero, we want to continue execution at Label
+    % If zero, we want to fall through to scheduling code
+
+    % Look up label once to avoid duplicate lookup in helper
+    LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels),
+
+    BccOffset = StreamModule:offset(Stream1),
+
+    State4 =
+        case LabelLookupResult of
+            {Label, LabelOffset} ->
+                % Label is known, check if we can optimize the conditional branch
+                % After bcc instruction
+                Rel = LabelOffset - BccOffset,
+
+                if
+                    Rel >= -252 andalso Rel =< 258 andalso (Rel rem 2) =:= 0 ->
+                        % Near branch: use direct conditional branch
+
+                        % Branch if NOT zero (ne)
+                        I4 = jit_armv6m_asm:bcc(ne, Rel),
+                        Stream2 = StreamModule:append(Stream1, I4),
+                        State0#state{stream = Stream2};
+                    true ->
+                        % Far branch: use trampoline with helper
+                        % Get the code block size for the far branch sequence that will follow
+                        FarSeqOffset = BccOffset + 2,
+                        {State1, FarCodeBlock} = branch_to_label_code(
+                            State0, FarSeqOffset, Label, LabelLookupResult
+                        ),
+                        FarSeqSize = byte_size(FarCodeBlock),
+                        % Skip over the far branch sequence if zero (eq)
+                        I4 = jit_armv6m_asm:bcc(eq, FarSeqSize + 2),
+                        Stream2 = StreamModule:append(Stream1, I4),
+                        Stream3 = StreamModule:append(Stream2, FarCodeBlock),
+                        State1#state{stream = Stream3}
+                end;
+            false ->
+                % Label not known, get the far branch size for the skip
+                FarSeqOffset = BccOffset + 2,
+                {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false),
+                FarSeqSize = byte_size(FarCodeBlock),
+                I4 = jit_armv6m_asm:bcc(eq, FarSeqSize + 2),
+                Stream2 = StreamModule:append(Stream1, I4),
+                Stream3 = StreamModule:append(Stream2, FarCodeBlock),
+                State1#state{stream = Stream3}
+        end,
+    State5 = set_continuation_to_label(State4, Label),
+    call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]).
+
+call_primitive_with_cp(State0, Primitive, Args) ->
+    {State1, RewriteOffset, TempReg} = set_cp(State0),
+    State2 = call_primitive_last(State1, Primitive, Args),
+    rewrite_cp_offset(State2, RewriteOffset, TempReg).
+
+-spec set_cp(state()) -> {state(), non_neg_integer(), armv6m_register()}.
+set_cp(State0) ->
+    % get module index (dynamically)
+    {
+        #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State1,
+        Reg
+    } = get_module_index(
+        State0
+    ),
+    % Get a temporary register from available registers
+    [TempReg | _] = AvailRegs,
+
+    Offset = StreamModule:offset(Stream0),
+    % build cp with module_index << 24
+    I1 = jit_armv6m_asm:lsls(Reg, Reg, 24),
+    % Emit a single nop as placeholder for offset load instruction
+    I2 = jit_armv6m_asm:nop(),
+    MOVOffset = Offset + byte_size(I1),
+    % OR the module index with the offset (loaded in temp register)
+    I3 = jit_armv6m_asm:orrs(Reg, TempReg),
+    I4 = jit_armv6m_asm:str(Reg, ?CP),
+    Code = <<I1/binary, I2/binary, I3/binary, I4/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State2 = State1#state{stream = Stream1},
+    State3 = free_native_register(State2, Reg),
+    {State3, MOVOffset, TempReg}.
+
+-spec rewrite_cp_offset(state(), non_neg_integer(), armv6m_register()) -> state().
+rewrite_cp_offset(
+    #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0,
+    RewriteOffset,
+    TempReg
+) ->
+    CurrentOffset = StreamModule:offset(Stream0),
+    AlignedOffset = (CurrentOffset + 3) band (bnot 3),
+    PaddingSize = AlignedOffset - CurrentOffset,
+    % Execution should resume at an aligned offset
+
+    Delta0 = AlignedOffset - CodeOffset,
+    OffsetImm0 = Delta0 bsl 2,
+
+    % Check if offset fits in movs immediate (0-255)
+    {NewMoveInstr, Stream1} =
+        if
+            OffsetImm0 =< 255 ->
+                PaddedStream =
+                    if
+                        PaddingSize > 0 ->
+                            StreamModule:append(Stream0, <<0:16>>);
+                        true ->
+                            Stream0
+                    end,
+                {jit_armv6m_asm:movs(TempReg, OffsetImm0), PaddedStream};
+            true ->
+                % Need to emit literal pool with proper alignment
+                Delta1 = Delta0 + 4,
+                OffsetImm1 = Delta1 bsl 2,
+                % Emit the 32-bit literal to point to position after
+                % the pool
+                StreamWithLiteral = StreamModule:append(
+                    Stream0, <<0:(PaddingSize * 8), OffsetImm1:32/little>>
+                ),
+
+                % Compute PC-relative offset for ldr instruction
+                PCValue = (RewriteOffset + 4) band (bnot 3),
+                PCRelOffset = AlignedOffset - PCValue,
+                LdrInstr = jit_armv6m_asm:ldr(TempReg, {pc, PCRelOffset}),
+                {LdrInstr, StreamWithLiteral}
+        end,
+    Stream2 = StreamModule:replace(Stream1, RewriteOffset, NewMoveInstr),
+    Prolog = jit_armv6m_asm:push([r1, r4, r5, r6, r7, lr]),
+    Stream3 = StreamModule:append(Stream2, Prolog),
+    State0#state{stream = Stream3}.
+
+set_bs(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    TermReg
+) ->
+    I1 = jit_armv6m_asm:str(TermReg, ?BS),
+    I2 = jit_armv6m_asm:movs(Temp, 0),
+    I3 = jit_armv6m_asm:str(Temp, ?BS_OFFSET),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State0#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @param State current state
+%% @param SortedLines line information, sorted by offset
+%% @doc Build labels and line tables and encode a function that returns it.
+%% In this case, the function returns the effective address of what immediately
+%% follows.
+%% @end
+%% @return New state
+%%-----------------------------------------------------------------------------
+return_labels_and_lines(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        labels = Labels
+    } = State,
+    SortedLines
+) ->
+    SortedLabels = lists:keysort(2, [
+        {Label, LabelOffset}
+     || {Label, LabelOffset} <- Labels, is_integer(Label)
+    ]),
+
+    % Check if current offset is 4-byte aligned
+    CurrentOffset = StreamModule:offset(Stream0),
+
+    {I1, Padding} =
+        case CurrentOffset rem 4 of
+            0 ->
+                % Aligned - use offset 4
+                {jit_armv6m_asm:adr(r0, 4), <<>>};
+            _ ->
+                % Unaligned - use offset 8 with 2-byte padding
+                {jit_armv6m_asm:adr(r0, 8), <<0:16>>}
+        end,
+    I2 = jit_armv6m_asm:pop([r1, r4, r5, r6, r7, pc]),
+    LabelsTable = <<<<Label:16, Offset:32>> || {Label, Offset} <- SortedLabels>>,
+    LinesTable = <<<<Line:16, Offset:32>> || {Line, Offset} <- SortedLines>>,
+    Stream1 = StreamModule:append(
+        Stream0,
+        <<I1/binary, I2/binary, Padding/binary, (length(SortedLabels)):16, LabelsTable/binary,
+            (length(SortedLines)):16, LinesTable/binary>>
+    ),
+    State#state{stream = Stream1}.
+
+%% Helper function to generate str instruction with y_reg offset, handling large offsets
+str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 ->
+    % Small offset - use immediate addressing
+    I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS),
+    I2 = jit_armv6m_asm:str(SrcReg, {TempReg, Y * 4}),
+    <<I1/binary, I2/binary>>;
+str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) ->
+    % Large offset - use register arithmetic with second available register
+    Offset = Y * 4,
+    I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS),
+    I2 = jit_armv6m_asm:movs(TempReg2, Offset),
+    I3 = jit_armv6m_asm:add(TempReg2, TempReg1),
+    I4 = jit_armv6m_asm:str(SrcReg, {TempReg2, 0}),
+    <<I1/binary, I2/binary, I3/binary, I4/binary>>;
+str_y_reg(SrcReg, Y, TempReg1, []) ->
+    % Large offset - no additional registers available, use IP_REG as second temp
+    Offset = Y * 4,
+    I1 = jit_armv6m_asm:ldr(TempReg1, ?Y_REGS),
+    I2 = jit_armv6m_asm:mov(?IP_REG, TempReg1),
+    I3 = jit_armv6m_asm:movs(TempReg1, Offset),
+    I4 = jit_armv6m_asm:add(TempReg1, ?IP_REG),
+    I5 = jit_armv6m_asm:str(SrcReg, {TempReg1, 0}),
+    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>.
+
+%% Helper function to generate ldr instruction with y_reg offset, handling large offsets
+ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 ->
+    % Small offset - use immediate addressing
+    I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS),
+    I2 = jit_armv6m_asm:ldr(DstReg, {TempReg, Y * 4}),
+    <<I1/binary, I2/binary>>;
+ldr_y_reg(DstReg, Y, [TempReg | _]) ->
+    % Large offset - use DstReg as second temp register for arithmetic
+    Offset = Y * 4,
+    I1 = jit_armv6m_asm:ldr(TempReg, ?Y_REGS),
+    I2 = jit_armv6m_asm:movs(DstReg, Offset),
+    I3 = jit_armv6m_asm:add(DstReg, TempReg),
+    I4 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}),
+    <<I1/binary, I2/binary, I3/binary, I4/binary>>;
+ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 ->
+    % Small offset, no registers available - use DstReg as temp
+    I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS),
+    I2 = jit_armv6m_asm:ldr(DstReg, {DstReg, Y * 4}),
+    <<I1/binary, I2/binary>>;
+ldr_y_reg(DstReg, Y, []) ->
+    % Large offset, no registers available - use IP_REG as temp register
+    % Note: IP_REG (r12) can only be used with mov, not ldr directly
+    Offset = Y * 4,
+    I1 = jit_armv6m_asm:ldr(DstReg, ?Y_REGS),
+    I2 = jit_armv6m_asm:mov(?IP_REG, DstReg),
+    I3 = jit_armv6m_asm:movs(DstReg, Offset),
+    I4 = jit_armv6m_asm:add(DstReg, ?IP_REG),
+    I5 = jit_armv6m_asm:ldr(DstReg, {DstReg, 0}),
+    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>.
+
+free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) ->
+    AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []),
+    true = lists:member(Reg, UsedRegs0),
+    UsedRegs1 = lists:delete(Reg, UsedRegs0),
+    {AvailableRegs1, UsedRegs1}.
+
+free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) ->
+    lists:reverse(Acc, [Reg | PrevRegs0]);
+free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) ->
+    free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]);
+free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) ->
+    free_reg0(SortedT, PrevRegs, Reg, Acc).
+
+args_regs(Args) ->
+    lists:map(
+        fun
+            ({free, {ptr, Reg}}) -> Reg;
+            ({free, Reg}) when is_atom(Reg) -> Reg;
+            ({free, Imm}) when is_integer(Imm) -> imm;
+            (offset) -> imm;
+            (ctx) -> ?CTX_REG;
+            (jit_state) -> jit_state;
+            (jit_state_tail_call) -> jit_state;
+            (stack) -> stack;
+            (Reg) when is_atom(Reg) -> Reg;
+            (Imm) when is_integer(Imm) -> imm;
+            ({ptr, Reg}) -> Reg;
+            ({x_reg, _}) -> ?CTX_REG;
+            ({y_reg, _}) -> ?CTX_REG;
+            ({fp_reg, _}) -> ?CTX_REG;
+            ({free, {x_reg, _}}) -> ?CTX_REG;
+            ({free, {y_reg, _}}) -> ?CTX_REG;
+            ({free, {fp_reg, _}}) -> ?CTX_REG;
+            ({avm_int64_t, _}) -> imm
+        end,
+        Args
+    ).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at the current offset. Eventually align it with a nop.
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference()) -> state().
+add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) ->
+    Offset0 = StreamModule:offset(Stream0),
+    {State1, Offset1} =
+        if
+            Offset0 rem 4 =:= 0 ->
+                {State0, Offset0};
+            true ->
+                Stream1 = StreamModule:append(Stream0, jit_armv6m_asm:nop()),
+                {State0#state{stream = Stream1}, Offset0 + 2}
+        end,
+    add_label(State1, Label, Offset1).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at a specific offset
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @param Offset the explicit offset for this label
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference(), integer()) -> state().
+add_label(#state{labels = Labels} = State, Label, Offset) ->
+    State#state{labels = [{Label, Offset} | Labels]}.
+
+-ifdef(JIT_DWARF).
+%%-----------------------------------------------------------------------------
+%% @doc Return the DWARF register number for the ctx parameter
+%% @returns The DWARF register number where ctx is passed (r0 in ARM)
+%% @end
+%%-----------------------------------------------------------------------------
+-spec dwarf_ctx_register() -> non_neg_integer().
+dwarf_ctx_register() ->
+    ?DWARF_R0_REG_ARMV6M.
+-endif.
diff --git a/libs/jit/src/jit_armv6m_asm.erl b/libs/jit/src/jit_armv6m_asm.erl
new file mode 100644
index 0000000000..6410e03952
--- /dev/null
+++ b/libs/jit/src/jit_armv6m_asm.erl
@@ -0,0 +1,668 @@
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+
+-module(jit_armv6m_asm).
+
+-export([
+    add/2,
+    add/3,
+    adds/2,
+    adds/3,
+    sub/2,
+    sub/3,
+    subs/2,
+    subs/3,
+    muls/2,
+    b/1,
+    bcc/2,
+    bkpt/1,
+    blx/1,
+    bx/1,
+    cmp/2,
+    ands/2,
+    bics/2,
+    negs/2,
+    rsbs/3,
+    orrs/2,
+    ldr/2,
+    lsls/2,
+    lsls/3,
+    lsrs/2,
+    lsrs/3,
+    mov/2,
+    movs/2,
+    mvns/2,
+    nop/0,
+    str/2,
+    tst/2,
+    adr/2,
+    push/1,
+    pop/1,
+    reg_to_num/1
+]).
+
+-export_type([
+    cc/0
+]).
+
+-type arm_gpr_register() ::
+    r0
+    | r1
+    | r2
+    | r3
+    | r4
+    | r5
+    | r6
+    | r7
+    | r8
+    | r9
+    | r10
+    | r11
+    | r12
+    | r13
+    | r14
+    | r15
+    | sp
+    | lr
+    | pc.
+
+-type cc() :: eq | ne | cs | cc | mi | pl | vs | vc | hi | ls | ge | lt | gt | le | al.
+
+%%-----------------------------------------------------------------------------
+%% Helper functions
+%%-----------------------------------------------------------------------------
+
+%% Convert register atoms to register numbers for assembly generation
+%% for r0 to r30
+reg_to_num(r0) -> 0;
+reg_to_num(r1) -> 1;
+reg_to_num(r2) -> 2;
+reg_to_num(r3) -> 3;
+reg_to_num(r4) -> 4;
+reg_to_num(r5) -> 5;
+reg_to_num(r6) -> 6;
+reg_to_num(r7) -> 7;
+reg_to_num(r8) -> 8;
+reg_to_num(r9) -> 9;
+reg_to_num(r10) -> 10;
+reg_to_num(r11) -> 11;
+reg_to_num(r12) -> 12;
+reg_to_num(r13) -> 13;
+reg_to_num(r14) -> 14;
+reg_to_num(r15) -> 15;
+%% Stack pointer (SP) is r13
+reg_to_num(sp) -> 13;
+%% Link register (LR) is r14
+reg_to_num(lr) -> 14;
+%% Program counter (PC) is r15
+reg_to_num(pc) -> 15.
+
+%% Convert condition atom to condition code number
+-spec cond_to_num(atom()) -> 0..15.
+% Equal (Z set)
+cond_to_num(eq) -> 0;
+% Not equal (Z clear)
+cond_to_num(ne) -> 1;
+% Carry set
+cond_to_num(cs) -> 2;
+% Carry clear
+cond_to_num(cc) -> 3;
+% Minus (N set)
+cond_to_num(mi) -> 4;
+% Plus (N clear)
+cond_to_num(pl) -> 5;
+% Overflow set
+cond_to_num(vs) -> 6;
+% Overflow clear
+cond_to_num(vc) -> 7;
+% Higher (unsigned)
+cond_to_num(hi) -> 8;
+% Lower or same (unsigned)
+cond_to_num(ls) -> 9;
+% Greater than or equal (signed)
+cond_to_num(ge) -> 10;
+% Less than (signed)
+cond_to_num(lt) -> 11;
+% Greater than (signed)
+cond_to_num(gt) -> 12;
+% Less than or equal (signed)
+cond_to_num(le) -> 13;
+% Always
+cond_to_num(al) -> 14;
+% Never
+cond_to_num(nv) -> 15.
+
+-define(IS_LOW_REGISTER(Reg),
+    (Reg =:= r0 orelse Reg =:= r1 orelse Reg =:= r2 orelse Reg =:= r3 orelse Reg =:= r4 orelse
+        Reg =:= r5 orelse Reg =:= r6 orelse Reg =:= r7)
+).
+
+%% Emit an ADD instruction (Thumb encoding, high register form)
+%% ADD Rd, Rm - adds register value to register (supports high registers including PC)
+%% Encoding: 01000100 DN RmNum[3:0] RdLow3[2:0]
+%% ADD SP, #imm - adds immediate value to stack pointer
+-spec add
+    (arm_gpr_register(), arm_gpr_register()) -> binary();
+    (sp, integer()) -> binary().
+add(Rd, Rm) when is_atom(Rd), is_atom(Rm) ->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    % Extract bit 3 of Rd
+    DN = (RdNum bsr 3) band 1,
+    RdLow3 = RdNum band 7,
+    % Build 16-bit instruction: 01000100 DN RmNum[3:0] RdLow3[2:0]
+    Instr = (2#01000100 bsl 8) bor (DN bsl 7) bor (RmNum bsl 3) bor RdLow3,
+    <<Instr:16/little>>;
+add(sp, Imm) when is_integer(Imm), Imm >= 0, Imm =< 508, (Imm rem 4) =:= 0 ->
+    %% Thumb ADD SP, SP, #imm7*4 encoding: 10110000 0iiiiiii
+    Imm7 = Imm div 4,
+    <<(16#B000 bor (Imm7 band 127)):16/little>>;
+add(sp, Imm) when is_integer(Imm) ->
+    error({unencodable_immediate, Imm}).
+
+%% ADD SP, SP, #imm - adds immediate value to stack pointer (3-operand form)
+-spec add(sp, sp, integer()) -> binary().
+add(sp, sp, Imm) ->
+    add(sp, Imm).
+
+%% Emit an ADDS instruction (Thumb encoding)
+%% ADDS Rd, #imm - adds immediate value to register and sets flags (2-operand form)
+-spec adds(arm_gpr_register(), integer()) -> binary().
+adds(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 ->
+    adds(Rd, Rd, Imm);
+adds(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm) ->
+    error({unencodable_immediate, Imm}).
+
+%% ADDS Rd, Rn, #imm - adds immediate value to register and sets flags (3-operand form)
+-spec adds(arm_gpr_register(), arm_gpr_register(), integer()) -> binary().
+
+adds(Rd, Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 ->
+    %% Thumb ADDS (immediate, 8-bit) encoding: 00110dddiiiiiiii (Rd = Rn)
+    RdNum = reg_to_num(Rd),
+    <<(16#3000 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>;
+adds(Rd, Rn, Imm) when
+    ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm), Imm >= 0, Imm =< 7
+->
+    %% Thumb ADDS (immediate, 3-bit) encoding: 0001110iiinnnddd
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    <<(16#1C00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>;
+adds(Rd, Rn, Imm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm) ->
+    error({unencodable_immediate, Imm});
+adds(Rd, Rn, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) ->
+    %% Thumb ADDS (register) encoding: 0001100mmmnnnddd
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    <<
+        (16#1800 bor ((RmNum band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little
+    >>.
+
+%% Emit an unconditional branch (B) instruction (Thumb encoding, ARMv6-M)
+%% offset is in bytes, relative to the PC+4 (next instruction)
+%% ARMv6-M only supports 16-bit Thumb B with ±2KB range
+-spec b(integer()) -> binary().
+b(Offset) when is_integer(Offset), Offset >= -2044, Offset =< 2050, (Offset rem 2) =:= 0 ->
+    %% Thumb B (unconditional) encoding: 11100iiiiiiiiiii
+    %% imm11 is (Offset / 2) signed, fits in 11 bits
+    %% Adjust offset by -4 to match assembler behavior (PC+4 relative)
+    AdjustedOffset = Offset - 4,
+    Offset11 = AdjustedOffset div 2,
+    <<(16#E000 bor (Offset11 band 16#7FF)):16/little>>;
+b(Offset) when is_integer(Offset) ->
+    error({unencodable_offset, Offset}).
+
+%% Emit a branch with link register (BLR) instruction (Thumb encoding)
+%% Register is the register atom (r0-r15)
+-spec blx(arm_gpr_register()) -> binary().
+blx(Reg) when is_atom(Reg) ->
+    RegNum = reg_to_num(Reg),
+    %% Thumb BLX (register) encoding: 010001111mmmm000
+    %% This branches to register and sets LR
+    <<(16#4780 bor (RegNum bsl 3)):16/little>>.
+
+%% Emit a branch register (BR) instruction (Thumb encoding)
+%% Register is the register atom (r0-r15)
+-spec bx(arm_gpr_register()) -> binary().
+bx(Reg) when is_atom(Reg) ->
+    RegNum = reg_to_num(Reg),
+    %% Thumb BX (branch exchange) encoding: 010001110mmmm000
+    %% This branches to register without setting LR
+    <<(16#4700 bor (RegNum bsl 3)):16/little>>.
+
+%% Emit a BKPT (breakpoint) instruction
+-spec bkpt(byte()) -> binary().
+bkpt(Imm) when is_integer(Imm), Imm >= 0, Imm =< 16#FF ->
+    %% ARM Thumb BKPT encoding: 1011 1110 iiii iiii
+    %% where iiii iiii is the 8-bit immediate value
+    <<(16#BE00 bor (Imm band 16#FF)):16/little>>.
+
+%% Emit a load register (LDR) instruction
+-spec ldr(arm_gpr_register(), {arm_gpr_register(), integer()}) -> binary().
+%% LDR Rt, [Rn, #imm5*4] - 16-bit immediate offset (0-124, multiple of 4)
+ldr(Rt, {Rn, Imm}) when
+    ?IS_LOW_REGISTER(Rt),
+    ?IS_LOW_REGISTER(Rn),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 124,
+    (Imm rem 4) =:= 0
+->
+    RtNum = reg_to_num(Rt),
+    RnNum = reg_to_num(Rn),
+    Imm5 = Imm div 4,
+    %% Thumb LDR immediate: 01101iiiiinnnttt
+    <<(16#6800 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>;
+%% LDR Rt, [SP, #imm8*4] - SP-relative load (0-1020, multiple of 4)
+ldr(Rt, {sp, Imm}) when
+    ?IS_LOW_REGISTER(Rt),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 1020,
+    (Imm rem 4) =:= 0
+->
+    RtNum = reg_to_num(Rt),
+    Imm8 = Imm div 4,
+    %% Thumb LDR SP-relative: 10011tttiiiiiiii
+    <<(16#9800 bor (RtNum bsl 8) bor Imm8):16/little>>;
+%% LDR Rt, [PC, #imm8*4] - PC-relative load (0-1020, multiple of 4)
+ldr(Rt, {pc, Imm}) when
+    ?IS_LOW_REGISTER(Rt),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 1020,
+    (Imm rem 4) =:= 0
+->
+    RtNum = reg_to_num(Rt),
+    Imm8 = Imm div 4,
+    %% Thumb LDR PC-relative: 01001tttiiiiiiii
+    <<(16#4800 bor (RtNum bsl 8) bor Imm8):16/little>>;
+%% LDR Rt, [Rn, Rm] - register offset
+ldr(Rt, {Rn, Rm}) when
+    ?IS_LOW_REGISTER(Rt),
+    ?IS_LOW_REGISTER(Rn),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RtNum = reg_to_num(Rt),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% Thumb LDR register: 0101100mmmnnntttt
+    <<(16#5800 bor (RmNum bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>.
+
+%% ARMv6-M Thumb MOVS instruction (sets flags)
+-spec movs(arm_gpr_register(), integer() | arm_gpr_register()) -> binary().
+%% MOVS immediate - 8-bit immediates only (0-255)
+movs(Rd, Imm) when
+    ?IS_LOW_REGISTER(Rd),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 255
+->
+    RdNum = reg_to_num(Rd),
+    %% Thumb MOVS immediate: 00100dddiiiiiiii
+    <<(16#2000 bor (RdNum bsl 8) bor Imm):16/little>>;
+%% MOVS register - low registers only (both must be r0-r7)
+movs(Rd, Rm) when
+    ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm)
+->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    <<(16#0000 bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+%% MVNS bitwise NOT
+-spec mvns(arm_gpr_register(), arm_gpr_register()) -> binary().
+mvns(Rd, Rm) when
+    ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm)
+->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    %% Thumb MVNS register: 0100001111mmmdddd
+    <<(16#43C0 bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+%% ARMv6-M Thumb MOV instruction - handle both immediate and register moves
+-spec mov(arm_gpr_register(), arm_gpr_register() | arm_gpr_register()) -> binary().
+mov(Rd, Rm) when is_atom(Rd), is_atom(Rm) ->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    D =
+        if
+            RdNum >= 8 -> 1;
+            true -> 0
+        end,
+    M =
+        if
+            RmNum >= 8 -> 1;
+            true -> 0
+        end,
+    RdLow = RdNum band 7,
+    RmLow = RmNum band 7,
+    <<(16#4600 bor (D bsl 7) bor (M bsl 6) bor (RmLow bsl 3) bor RdLow):16/little>>.
+
+%% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4)
+str(Rt, {Rn, Imm}) when
+    ?IS_LOW_REGISTER(Rt),
+    ?IS_LOW_REGISTER(Rn),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 124,
+    (Imm rem 4) =:= 0
+->
+    RtNum = reg_to_num(Rt),
+    RnNum = reg_to_num(Rn),
+    Imm5 = Imm div 4,
+    %% Thumb STR immediate: 01100iiiiinnnttt
+    <<(16#6000 bor (Imm5 bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>;
+%% SP-relative STR (0-1020, multiple of 4)
+str(Rt, {sp, Imm}) when
+    ?IS_LOW_REGISTER(Rt),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 1020,
+    (Imm rem 4) =:= 0
+->
+    RtNum = reg_to_num(Rt),
+    Imm8 = Imm div 4,
+    %% Thumb STR SP relative: 1001ttttiiiiiiiii
+    <<(16#9000 bor (RtNum bsl 8) bor Imm8):16/little>>;
+%% STR Rt, [Rn, Rm] - register offset
+str(Rt, {Rn, Rm}) when
+    ?IS_LOW_REGISTER(Rt),
+    ?IS_LOW_REGISTER(Rn),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RtNum = reg_to_num(Rt),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% Thumb STR register: 0101000mmmnnntttt
+    <<(16#5000 bor (RmNum bsl 6) bor (RnNum bsl 3) bor RtNum):16/little>>.
+
+%% Emit a conditional branch instruction
+-spec bcc(cc(), integer()) -> binary().
+%% Special case: 'al' (always) condition uses unconditional branch for efficiency
+bcc(al, Offset) when is_integer(Offset) ->
+    b(Offset);
+bcc(Cond, Offset) when
+    is_atom(Cond), is_integer(Offset), Offset >= -252, Offset =< 258, (Offset rem 2) =:= 0
+->
+    CondNum = cond_to_num(Cond),
+    %% Thumb conditional branch encoding (ARMv6-M): 1101cccciiiiiiiii
+    %% imm8 is (Offset / 2) signed, fits in 8 bits
+    %% ARMv6-M only supports 16-bit Thumb conditional branches with ±256B range
+    %% Adjust offset by -4 to match assembler behavior (PC+4 relative)
+    AdjustedOffset = Offset - 4,
+    Offset8 = AdjustedOffset div 2,
+    <<(16#D000 bor (CondNum bsl 8) bor (Offset8 band 16#FF)):16/little>>;
+bcc(Cond, Offset) when is_atom(Cond), is_integer(Offset) ->
+    error({unencodable_offset, Offset}).
+
+%% ARMv6-M Thumb CMP instruction
+-spec cmp(arm_gpr_register(), arm_gpr_register() | integer()) -> binary().
+%% CMP register-register form (low registers only)
+cmp(Rn, Rm) when
+    ?IS_LOW_REGISTER(Rn),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    %% Thumb CMP register: 0100001010mmmnnn
+    <<(16#4280 bor (RmNum bsl 3) bor RnNum):16/little>>;
+%% CMP register-immediate form (8-bit immediate 0-255)
+cmp(Rn, Imm) when
+    ?IS_LOW_REGISTER(Rn),
+    is_integer(Imm),
+    Imm >= 0,
+    Imm =< 255
+->
+    RnNum = reg_to_num(Rn),
+    %% Thumb CMP immediate: 00101nnniiiiiiiii
+    <<(16#2800 bor (RnNum bsl 8) bor Imm):16/little>>;
+cmp(Rn, Imm) when ?IS_LOW_REGISTER(Rn), is_integer(Imm) ->
+    error({unencodable_immediate, Imm}).
+
+%% Emit an AND instruction (bitwise AND)
+%% ARMv6-M Thumb ANDS instruction (register only - no immediate support)
+-spec ands(arm_gpr_register(), arm_gpr_register()) -> binary().
+ands(Rd, Rm) when
+    ?IS_LOW_REGISTER(Rd),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    %% Thumb ANDS (2-operand): 0100000000mmmddd
+    <<(16#4000 bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+%% Emit an BICS instruction (bitwise AND with complement)
+-spec bics(arm_gpr_register(), arm_gpr_register()) -> binary().
+bics(Rd, Rm) when
+    ?IS_LOW_REGISTER(Rd),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    %% Thumb ANDS (2-operand): 0100000000mmmddd
+    <<(16#4380 bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+%% Emit an NEGS instruction (bitwise NAND)
+-spec negs(arm_gpr_register(), arm_gpr_register()) -> binary().
+negs(Rd, Rm) ->
+    rsbs(Rd, Rm, 0).
+
+-spec rsbs(arm_gpr_register(), arm_gpr_register(), 0) -> binary().
+rsbs(Rd, Rn, 0) when
+    ?IS_LOW_REGISTER(Rd),
+    ?IS_LOW_REGISTER(Rn)
+->
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    %% Thumb ANDS (2-operand): 0100000000mmmddd
+    <<(16#4240 bor (RnNum bsl 3) bor RdNum):16/little>>.
+
+%% ARMv6-M Thumb ORRS instruction (register only - sets flags)
+-spec orrs(arm_gpr_register(), arm_gpr_register()) -> binary().
+orrs(Rd, Rm) when
+    ?IS_LOW_REGISTER(Rd),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    %% Thumb ORRS (2-operand): 0100001100mmmddd
+    <<(16#4300 bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+%% ARMv6-M Thumb logical shift left (LSLS) instructions
+-spec lsls(arm_gpr_register(), arm_gpr_register(), integer()) -> binary().
+%% LSLS Rd, Rm, #imm5 - immediate shift (1-31)
+lsls(Rd, Rm, Imm) when
+    ?IS_LOW_REGISTER(Rd),
+    ?IS_LOW_REGISTER(Rm),
+    is_integer(Imm),
+    Imm >= 1,
+    Imm =< 31
+->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    %% Thumb LSLS immediate: 00000iiiiimmmddd
+    <<(16#0000 bor (Imm bsl 6) bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+-spec lsls(arm_gpr_register(), arm_gpr_register()) -> binary().
+%% LSLS Rdn, Rm - register shift (Rdn = Rdn << Rm)
+lsls(Rdn, Rm) when
+    ?IS_LOW_REGISTER(Rdn),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RdnNum = reg_to_num(Rdn),
+    RmNum = reg_to_num(Rm),
+    %% Thumb LSLS register: 0100000010mmmddd
+    <<(16#4080 bor (RmNum bsl 3) bor RdnNum):16/little>>.
+
+%% ARMv6-M Thumb logical shift right (LSRS) instructions
+-spec lsrs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary().
+%% LSRS Rd, Rm, #imm5 - immediate shift (1-32)
+lsrs(Rd, Rm, Imm) when
+    ?IS_LOW_REGISTER(Rd),
+    ?IS_LOW_REGISTER(Rm),
+    is_integer(Imm),
+    Imm >= 1,
+    Imm =< 32
+->
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    %% Thumb LSRS immediate: 00001iiiiimmmddd (imm5=0 means shift by 32)
+    Imm5 =
+        if
+            Imm =:= 32 -> 0;
+            true -> Imm
+        end,
+    <<(16#0800 bor (Imm5 bsl 6) bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+-spec lsrs(arm_gpr_register(), arm_gpr_register()) -> binary().
+%% LSRS Rdn, Rm - register shift (Rdn = Rdn >> Rm)
+lsrs(Rdn, Rm) when
+    ?IS_LOW_REGISTER(Rdn),
+    ?IS_LOW_REGISTER(Rm)
+->
+    RdnNum = reg_to_num(Rdn),
+    RmNum = reg_to_num(Rm),
+    %% Thumb LSRS register: 0100000011mmmddd
+    <<(16#40C0 bor (RmNum bsl 3) bor RdnNum):16/little>>.
+
+%% ARMv6-M Thumb TST instruction (register only)
+-spec tst(arm_gpr_register(), arm_gpr_register()) -> binary().
+%% TST Rn, Rm - test bits (performs Rn & Rm, updates flags, low registers only)
+tst(Rn, Rm) when ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) ->
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    <<(16#4200 bor (RmNum bsl 3) bor RnNum):16/little>>.
+
+%% Emit a SUBS instruction (Thumb encoding)
+%% SUBS Rd, #imm - subtracts immediate value from register and sets flags (2-operand form)
+-spec subs(arm_gpr_register(), integer()) -> binary().
+subs(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 ->
+    subs(Rd, Rd, Imm);
+subs(Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm) ->
+    error({unencodable_immediate, Imm}).
+
+%% SUBS Rd, Rn, #imm - subtracts immediate value from register and sets flags (3-operand form)
+-spec subs(arm_gpr_register(), arm_gpr_register(), integer()) -> binary().
+subs(Rd, Rd, Imm) when ?IS_LOW_REGISTER(Rd), is_integer(Imm), Imm >= 0, Imm =< 255 ->
+    %% Thumb SUBS (immediate, 8-bit) encoding: 00111dddiiiiiiii (Rd = Rn)
+    RdNum = reg_to_num(Rd),
+    <<(16#3800 bor ((RdNum band 7) bsl 8) bor (Imm band 255)):16/little>>;
+subs(Rd, Rn, Imm) when
+    ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm), Imm >= 0, Imm =< 7
+->
+    %% Thumb SUBS (immediate, 3-bit) encoding: 0001111iiinnnddd
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    <<(16#1E00 bor ((Imm band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little>>;
+subs(Rd, Rn, Imm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), is_integer(Imm) ->
+    error({unencodable_immediate, Imm});
+subs(Rd, Rn, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rn), ?IS_LOW_REGISTER(Rm) ->
+    %% Thumb SUBS (register) encoding: 0001101mmmnnnddd
+    RdNum = reg_to_num(Rd),
+    RnNum = reg_to_num(Rn),
+    RmNum = reg_to_num(Rm),
+    <<
+        (16#1A00 bor ((RmNum band 7) bsl 6) bor ((RnNum band 7) bsl 3) bor (RdNum band 7)):16/little
+    >>.
+
+%% SUB SP, #imm - subtracts immediate value from stack pointer (2-operand form)
+-spec sub(sp, integer()) -> binary().
+sub(sp, Imm) when is_integer(Imm), Imm >= 0, Imm =< 508, (Imm rem 4) =:= 0 ->
+    %% Thumb SUB SP, SP, #imm7*4 encoding: 10110000 1iiiiiii
+    Imm7 = Imm div 4,
+    <<(16#B080 bor (Imm7 band 127)):16/little>>;
+sub(sp, Imm) when is_integer(Imm) ->
+    error({unencodable_immediate, Imm}).
+
+%% SUB SP, SP, #imm - subtracts immediate value from stack pointer (3-operand form)
+-spec sub(sp, sp, integer()) -> binary().
+sub(sp, sp, Imm) ->
+    sub(sp, Imm).
+
+%% ARMv6-M Thumb address calculation (ADR) instruction
+%% ADR is implemented as ADD Rd, PC, #imm8*4 in Thumb
+%% In Thumb, PC = current_instruction_address + 4, so adr(Rd, N) means:
+%% Rd = (current_pc + 4) + immediate = current_pc + (N - 4) + 4 = current_pc + N
+-spec adr(arm_gpr_register(), integer()) -> binary().
+adr(Rd, Offset) when
+    ?IS_LOW_REGISTER(Rd),
+    is_integer(Offset),
+    Offset >= 4,
+    Offset =< 1024,
+    (Offset rem 4) =:= 0
+->
+    RdNum = reg_to_num(Rd),
+    %% PC-relative offset in Thumb is (PC+4) + immediate
+    %% So for adr(Rd, N): immediate = N - 4
+    Immediate = Offset - 4,
+    Imm8 = Immediate div 4,
+    %% Thumb ADR (ADD PC-relative): 10100dddiiiiiiii
+    <<(16#A000 bor (RdNum bsl 8) bor Imm8):16/little>>.
+
+%% Emit a MULS instruction (Thumb encoding)
+%% MULS Rd, Rm - multiply Rd by Rm, store result in Rd (sets flags)
+-spec muls(arm_gpr_register(), arm_gpr_register()) -> binary().
+muls(Rd, Rm) when ?IS_LOW_REGISTER(Rd), ?IS_LOW_REGISTER(Rm) ->
+    %% Thumb MULS encoding: 0100001101mmmrrr (Rd is both source and destination)
+    RdNum = reg_to_num(Rd),
+    RmNum = reg_to_num(Rm),
+    <<(16#4340 bor (RmNum bsl 3) bor RdNum):16/little>>.
+
+%% ARMv6-M Thumb PUSH instruction
+%% PUSH {register_list} - push registers to stack (low registers + optional LR)
+-spec push([arm_gpr_register()]) -> binary().
+push(RegList) when is_list(RegList) ->
+    %% Process register list and build bitmask
+    {LowRegMask, LRBit} = process_reglist(RegList, lr),
+    %% Thumb PUSH encoding: 1011010Rlllllll where R=LR bit, lllllll=low register mask
+    <<(16#B400 bor (LRBit bsl 8) bor LowRegMask):16/little>>.
+
+%% ARMv6-M Thumb POP instruction
+%% POP {register_list} - pop registers from stack (low registers + optional PC)
+-spec pop([arm_gpr_register()]) -> binary().
+pop(RegList) when is_list(RegList) ->
+    %% Process register list and build bitmask
+    {LowRegMask, PCBit} = process_reglist(RegList, pc),
+    %% Thumb POP encoding: 1011110Plllllll where P=PC bit, lllllll=low register mask
+    <<(16#BC00 bor (PCBit bsl 8) bor LowRegMask):16/little>>.
+
+%% ARMv6-M Thumb NOP instruction
+%% NOP - no operation (encoded as mov r8, r8)
+-spec nop() -> binary().
+nop() ->
+    <<16#46c0:16/little>>.
+
+%% Generic helper function to process register lists for PUSH/POP
+process_reglist(RegList, SpecialReg) ->
+    RegBits = lists:foldl(
+        fun(Reg, Acc) ->
+            Acc + (1 bsl reg_to_num(Reg))
+        end,
+        0,
+        RegList
+    ),
+    LowRegsBits = RegBits band 2#11111111,
+    SpecialRegBit = RegBits band (1 bsl reg_to_num(SpecialReg)),
+    if
+        RegBits =/= LowRegsBits + SpecialRegBit ->
+            error({invalid_register, RegBits - LowRegsBits - SpecialRegBit});
+        SpecialRegBit =/= 0 ->
+            {LowRegsBits, 1};
+        true ->
+            {LowRegsBits, 0}
+    end.
diff --git a/libs/jit/src/jit_backend_dwarf_impl.hrl b/libs/jit/src/jit_backend_dwarf_impl.hrl
new file mode 100644
index 0000000000..cfba532531
--- /dev/null
+++ b/libs/jit/src/jit_backend_dwarf_impl.hrl
@@ -0,0 +1,39 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-ifdef(JIT_DWARF).
+
+dwarf_opcode(#state{stream = Stream0} = State, OpCode) ->
+    Stream1 = jit_dwarf:opcode(Stream0, OpCode),
+    State#state{stream = Stream1}.
+
+dwarf_label(#state{stream = Stream0} = State, Label) ->
+    Stream1 = jit_dwarf:label(Stream0, Label),
+    State#state{stream = Stream1}.
+
+dwarf_line(#state{stream = Stream0} = State, Line) ->
+    Stream1 = jit_dwarf:line(Stream0, Line),
+    State#state{stream = Stream1}.
+
+dwarf_function(#state{stream = Stream0} = State, FunctionName, Arity) ->
+    Stream1 = jit_dwarf:function(Stream0, FunctionName, Arity),
+    State#state{stream = Stream1}.
+
+-endif.
diff --git a/libs/jit/src/jit_dwarf.erl b/libs/jit/src/jit_dwarf.erl
new file mode 100644
index 0000000000..b68d20f681
--- /dev/null
+++ b/libs/jit/src/jit_dwarf.erl
@@ -0,0 +1,1766 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_dwarf).
+
+-include("jit_dwarf.hrl").
+
+-record(dwarf, {
+    % Backend module (jit_armv6m, etc.)
+    backend :: module(),
+    % Current module being compiled
+    module_name :: module(),
+    opcodes = [] :: [{Offset :: non_neg_integer(), Opcode :: atom(), Size :: non_neg_integer()}],
+    labels = [] :: [{Offset :: non_neg_integer(), Label :: non_neg_integer()}],
+    functions = [] :: [
+        {Offset :: non_neg_integer(), FunctionName :: atom(), Arity :: non_neg_integer()}
+    ],
+    lines = [] :: [
+        {Offset :: non_neg_integer(), Filename :: binary(), LineNumber :: pos_integer()}
+    ],
+    stream_module :: module(),
+    stream :: any(),
+    line_resolver :: fun((non_neg_integer()) -> false | {ok, binary(), pos_integer()})
+}).
+
+-type state() :: #dwarf{}.
+
+-export([
+    new/5,
+    opcode/2,
+    label/2,
+    function/3,
+    line/2,
+    stream/1,
+    elf/2
+]).
+
+% jit_stream interface
+-export([
+    offset/1,
+    append/2,
+    replace/3,
+    map/4
+]).
+
+%%-----------------------------------------------------------------------------
+%% @returns A new state
+%% @doc     Create a new state with the proxied stream.
+%% @end
+%%-----------------------------------------------------------------------------
+-spec new(module(), module(), module(), pos_integer(), fun(
+    (non_neg_integer()) -> false | {ok, binary(), pos_integer()}
+)) -> state().
+new(Backend, ModuleName, StreamModule, MaxSize, LineResolver) ->
+    Stream = StreamModule:new(MaxSize),
+    #dwarf{
+        backend = Backend,
+        module_name = ModuleName,
+        stream_module = StreamModule,
+        stream = Stream,
+        line_resolver = LineResolver,
+        % Add jump table symbol at offset 0, size will be calculated
+        opcodes = [{0, jump_table, 0}]
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @param Stream    stream to get the offset from
+%% @returns The current offset
+%% @doc     Get the current offset in the stream
+%% @end
+%%-----------------------------------------------------------------------------
+-spec offset(state()) -> non_neg_integer().
+offset(#dwarf{stream_module = StreamModule, stream = Stream}) ->
+    StreamModule:offset(Stream).
+
+%%-----------------------------------------------------------------------------
+%% @param Stream    stream to append to
+%% @param Binary    binary to append to the stream
+%% @returns The updated stream
+%% @doc     Append a binary to the stream
+%% @end
+%%-----------------------------------------------------------------------------
+-spec append(state(), binary()) -> state().
+append(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Binary) ->
+    Stream1 = StreamModule:append(Stream0, Binary),
+    State#dwarf{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @param Stream        stream to update
+%% @param Offset        offset to update from
+%% @param Replacement   binary to write at offset
+%% @returns The updated stream
+%% @doc     Replace bytes at a given offset
+%% @end
+%%-----------------------------------------------------------------------------
+-spec replace(state(), non_neg_integer(), binary()) -> state().
+replace(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Offset, Replacement) ->
+    Stream1 = StreamModule:replace(Stream0, Offset, Replacement),
+    State#dwarf{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @param Stream        stream to update
+%% @param Offset        offset to update from
+%% @param Length        length of the section to update
+%% @param MapFunction   function that updates the binary
+%% @returns The updated stream
+%% @doc     Replace bytes at a given offset calling a map function
+%% @end
+%%-----------------------------------------------------------------------------
+-spec map(state(), non_neg_integer(), pos_integer(), fun((binary()) -> binary())) -> state().
+map(#dwarf{stream_module = StreamModule, stream = Stream0} = State, Offset, Length, MapFunction) ->
+    Stream1 = StreamModule:map(Stream0, Offset, Length, MapFunction),
+    State#dwarf{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @param State    current state
+%% @param Opcode   the opcode atom to record
+%% @returns The updated state with opcode recorded at current offset
+%% @doc     Record an opcode at the current stream offset
+%% @end
+%%-----------------------------------------------------------------------------
+-spec opcode(state(), binary()) -> state();
+            (any(), binary()) -> any().
+opcode(#dwarf{stream_module = StreamModule, stream = Stream, opcodes = Opcodes0} = State, Opcode) ->
+    Offset = StreamModule:offset(Stream),
+    % Update size of previous opcode and add new opcode
+    Opcodes1 = update_previous_opcode_size(Opcodes0, Offset),
+    % Size will be calculated later
+    Opcodes2 = [{Offset, Opcode, 0} | Opcodes1],
+    State#dwarf{opcodes = Opcodes2};
+opcode(BackendStateDwarfDisabled, _Opcode) ->
+    BackendStateDwarfDisabled.
+
+%%-----------------------------------------------------------------------------
+%% @param State    current state
+%% @param Label    the label number to record
+%% @returns The updated state with label recorded at current offset
+%% @doc     Record a label at the current stream offset
+%% @end
+%%-----------------------------------------------------------------------------
+-spec label(state(), non_neg_integer()) -> state();
+           (any(), non_neg_integer()) -> any().
+label(
+    #dwarf{stream_module = StreamModule, stream = Stream, labels = Labels0, opcodes = Opcodes0} =
+        State,
+    Label
+) ->
+    Offset = StreamModule:offset(Stream),
+    % Update size of previous opcode before adding label
+    Opcodes1 = update_previous_opcode_size(Opcodes0, Offset),
+    Labels1 = [{Offset, Label} | Labels0],
+    State#dwarf{labels = Labels1, opcodes = Opcodes1};
+label(BackendStateDwarfDisabled, _Label) ->
+    BackendStateDwarfDisabled.
+
+%%-----------------------------------------------------------------------------
+%% @param State         current state
+%% @param FunctionName  the function name atom to record
+%% @param Arity         the function arity
+%% @returns The updated state with function recorded at current offset
+%% @doc     Record a function at the current stream offset
+%% @end
+%%-----------------------------------------------------------------------------
+-spec function(state(), atom(), non_neg_integer()) -> state();
+              (any(), atom(), non_neg_integer()) -> any().
+function(
+    #dwarf{stream_module = StreamModule, stream = Stream, functions = Functions0} = State,
+    FunctionName,
+    Arity
+) ->
+    Offset = StreamModule:offset(Stream),
+    Functions1 = [{Offset, FunctionName, Arity} | Functions0],
+    State#dwarf{functions = Functions1};
+function(BackendStateDwarfDisabled, _FunctionName, _Arity) ->
+    BackendStateDwarfDisabled.
+
+%%-----------------------------------------------------------------------------
+%% @param State    current state
+%% @param Line     the line number to record
+%% @returns The updated state with line recorded at current offset
+%% @doc     Record a line number at the current stream offset
+%% @end
+%%-----------------------------------------------------------------------------
+-spec line(state(), pos_integer()) -> state();
+          (any(), pos_integer()) -> any().
+line(
+    #dwarf{
+        stream_module = StreamModule,
+        stream = Stream,
+        lines = Lines0,
+        line_resolver = LineResolver,
+        module_name = ModuleName
+    } = State,
+    LineRef
+) ->
+    Offset = StreamModule:offset(Stream),
+    case LineResolver(LineRef) of
+        {ok, Filename, LineNumber} ->
+            % Check if this is the first time we see the module file and add line 1 at offset 0
+            Lines1 = maybe_add_initial_line(Lines0, ModuleName, Filename),
+            Lines2 = [{Offset, Filename, LineNumber} | Lines1],
+            State#dwarf{lines = Lines2};
+        false ->
+            % No line information available, skip storing this line
+            State
+    end;
+line(BackendStateDwarfDisabled, _LineRef) ->
+    BackendStateDwarfDisabled.
+
+%% Helper function to add line 1 at offset 0 for the module file if not already present
+maybe_add_initial_line(Lines, ModuleName, Filename) ->
+    ExpectedBasename = <<(atom_to_binary(ModuleName, utf8))/binary, ".erl">>,
+    Basename = lists:last(binary:split(Filename, <<"/">>, [global])),
+    case Basename =:= ExpectedBasename of
+        true ->
+            % This is the module file, check if we already have an entry at offset 0
+            case lists:any(fun({Offset, _, _}) -> Offset =:= 0 end, Lines) of
+                false ->
+                    % Add line 1 at offset 0 for the jump table
+                    [{0, Filename, 1} | Lines];
+                true ->
+                    % Already have an entry at offset 0, don't duplicate
+                    Lines
+            end;
+        false ->
+            % Not the module file, no change needed
+            Lines
+    end.
+
+%% Helper function to update the size of the most recent opcode
+update_previous_opcode_size([], _NewOffset) ->
+    % No previous opcode to update
+    [];
+update_previous_opcode_size([{Offset, Opcode, 0} | Rest], NewOffset) ->
+    % Update the size of the most recent opcode
+    Size = NewOffset - Offset,
+    [{Offset, Opcode, Size} | Rest];
+update_previous_opcode_size([{Offset, Opcode, Size} | Rest], _NewOffset) when Size > 0 ->
+    % Previous opcode already has a calculated size, don't change it
+    [{Offset, Opcode, Size} | Rest];
+update_previous_opcode_size(Opcodes, _NewOffset) ->
+    % Unexpected format, return unchanged
+    Opcodes.
+
+-spec stream(state()) -> any().
+stream(#dwarf{stream = Stream}) ->
+    Stream.
+
+%%-----------------------------------------------------------------------------
+%% @param State    DWARF state containing debug information
+%% @returns {ok, binary(), binary()} with ELF structure containing DWARF info,
+%%          (without and with native code in .text) or false if not compiled
+%%          with JIT_DWARF
+%% @doc     Generate ELF binaries with DWARF debug sections
+%% @end
+%%-----------------------------------------------------------------------------
+-spec elf(state(), binary()) -> {ok, binary(), binary()} | false.
+-ifdef(JIT_DWARF).
+elf(#dwarf{module_name = ModuleName, backend = Backend} = State, NativeCode) ->
+    SourceFile = <<(atom_to_binary(ModuleName, utf8))/binary, ".erl">>,
+
+    % Generate DWARF sections
+    DebugInfoSection = generate_debug_info_section_with_opcodes(State, SourceFile),
+    DebugLineSection = generate_debug_line_section(State, SourceFile),
+    DebugAbbrevSection = generate_debug_abbrev_section_with_opcodes(),
+    DebugStrSection = generate_debug_str_section(State, SourceFile),
+    DebugArangesSection = generate_debug_aranges_section(State),
+
+    % Generate symbol table sections for function names
+    {SymtabSection, StrtabSection} = generate_symbol_table(State, Backend),
+
+    % Create base sections list
+    BaseSections = [
+        {<<".debug_info">>, DebugInfoSection},
+        {<<".debug_line">>, DebugLineSection},
+        {<<".debug_abbrev">>, DebugAbbrevSection},
+        {<<".debug_str">>, DebugStrSection},
+        {<<".debug_aranges">>, DebugArangesSection},
+        {<<".symtab">>, SymtabSection},
+        {<<".strtab">>, StrtabSection}
+    ],
+
+    % Add ARM attributes section for armv6m backend
+    Sections =
+        case Backend of
+            jit_armv6m ->
+                ArmAttributesSection = generate_arm_attributes_section(),
+                BaseSections ++ [{<<".ARM.attributes">>, ArmAttributesSection}];
+            _ ->
+                BaseSections
+        end,
+
+    % Create complete ELF with text section and debug sections
+    {CombinedELF, TextSectionOffset} = create_elf_with_text_and_debug_sections(
+        Backend, Sections, NativeCode
+    ),
+    {ok, TextSectionOffset, CombinedELF}.
+-else.
+elf(_State, _NativeCode) ->
+    false.
+-endif.
+
+-ifdef(JIT_DWARF).
+
+%% Map JIT backend to ELF machine type
+backend_to_machine_type(jit_x86_64) -> ?EM_X86_64;
+backend_to_machine_type(jit_aarch64) -> ?EM_AARCH64;
+backend_to_machine_type(jit_armv6m) -> ?EM_ARM;
+backend_to_machine_type(jit_riscv32) -> ?EM_RISCV.
+
+%% Map JIT backend to ELF flags
+backend_to_elf_flags(jit_armv6m) ->
+    ?EF_ARM_EABI_VER5 bor ?EF_ARM_ABI_FLOAT_SOFT bor ?EF_ARM_ARCH_V6M;
+backend_to_elf_flags(_) ->
+    0.
+
+%% Find section index by name
+find_section_index(SectionName, SectionNames) ->
+    find_section_index_helper(SectionName, SectionNames, 0).
+
+find_section_index_helper(_, [], _) ->
+    error({section_not_found});
+find_section_index_helper(SectionName, [SectionName | _], Index) ->
+    Index;
+find_section_index_helper(SectionName, [_ | Rest], Index) ->
+    find_section_index_helper(SectionName, Rest, Index + 1).
+
+%% Find .symtab section index in section headers
+
+%% Generate ARM attributes section for ARMv6-M
+generate_arm_attributes_section() ->
+    % ARM EABI attributes format according to ARM IHI 0045E
+
+    % Build the tag-value pairs for file attributes
+    TagValuePairs = <<
+        % CPU_arch attribute: ARMv6S-M (value 11)
+        6,
+        11,
+        % CPU_arch_profile attribute: 'M' profile (value 77 = 'M')
+        7,
+        77,
+        % ARM_ISA_use attribute: No ARM ISA (value 0)
+        8,
+        0,
+        % THUMB_ISA_use attribute: Thumb-1 only (value 1)
+        9,
+        1,
+        % FP_arch attribute: No FP (value 0)
+        10,
+        0,
+        % ABI_PCS_wchar_t attribute: 4 bytes (value 2)
+        18,
+        2,
+        % ABI_enum_size attribute: int-sized (value 2)
+        26,
+        2,
+        % ABI_align_needed attribute: 8-byte alignment (value 1)
+        24,
+        1,
+        % ABI_align_preserved attribute: 8-byte alignment (value 1)
+        25,
+        1
+    >>,
+
+    % Calculate file attributes subsection length (tag + length field + tag-value pairs)
+    FileAttributesLength = 1 + 4 + byte_size(TagValuePairs),
+
+    % Build file attributes subsection
+    FileAttributes = <<
+        % File attributes tag
+        1,
+        % Length of this file attributes subsection
+        FileAttributesLength:32/little,
+        % The tag-value pairs
+        TagValuePairs/binary
+    >>,
+
+    % Build vendor subsection ("aeabi" + null + file attributes)
+    VendorContent = <<"aeabi", 0, FileAttributes/binary>>,
+    VendorLength = byte_size(VendorContent),
+
+    % Calculate total section length (format version + vendor length + vendor content)
+    TotalLength = 1 + 4 + VendorLength,
+
+    % Build final section according to ARM EABI spec
+    <<
+        % Format version 'A'
+        $A,
+        % Total section length (4 bytes, little-endian)
+        TotalLength:32/little,
+        % Vendor subsection content
+        VendorContent/binary
+    >>.
+
+generate_debug_str_section(#dwarf{module_name = ModuleName}, SourceFile) ->
+    % String table: null-terminated strings
+    Strings = [
+        % Index 0: empty string
+        <<0>>,
+        % Index 1: source file name
+        SourceFile,
+        <<0>>,
+        % Index 2: producer
+        <<"AtomVM JIT Compiler v0.7.0">>,
+        <<0>>,
+        % Index 3: comp_dir
+        <<"/tmp">>,
+        <<0>>,
+        % Index 4: module name
+        atom_to_binary(ModuleName, utf8),
+        <<0>>
+    ],
+    iolist_to_binary(Strings).
+
+generate_debug_aranges_section(#dwarf{backend = Backend} = State) ->
+    % Get word size and calculate address range
+    WordSize = Backend:word_size(),
+    WordSizeInBits = WordSize * 8,
+    {LowPC, HighPC} = calculate_address_range(State),
+    Length = HighPC - LowPC,
+
+    % Calculate padding needed to align descriptor to 2*address_size
+    % Header so far: version(2) + debug_info_offset(4) + addr_size(1) + seg_size(1) = 8 bytes
+    % Need to align to 2*WordSize boundary
+    HeaderSize = 8,
+    TupleAlignment = 2 * WordSize,
+    PaddingSize = (TupleAlignment - (HeaderSize rem TupleAlignment)) rem TupleAlignment,
+    Padding = <<0:(PaddingSize*8)/little>>,
+
+    % Header
+    Header = <<
+        % DWARF version
+        2:16/little,
+        % Debug info offset (always 0 - first compile unit)
+        0:32/little,
+        % Address size
+        WordSize,
+        % Segment size (0 for flat address space)
+        0
+    >>,
+
+    % Address descriptors
+    Descriptors = <<
+        % Address range descriptor
+        LowPC:WordSizeInBits/little,  % Start address
+        Length:WordSizeInBits/little,  % Length
+        % Terminating entry (two zero addresses)
+        0:WordSizeInBits/little,
+        0:WordSizeInBits/little
+    >>,
+
+    % Combine all parts
+    HeaderAndTable = <<Header/binary, Padding/binary, Descriptors/binary>>,
+
+    % Calculate total length (header + table - 4 for the length field itself)
+    TotalLength = byte_size(HeaderAndTable),
+
+    % Build final section with length prefix
+    <<TotalLength:32/little, HeaderAndTable/binary>>.
+
+generate_debug_abbrev_section_with_opcodes() ->
+    % Abbreviation table
+    <<
+        % Abbrev 1: DW_TAG_compile_unit
+
+        % Abbreviation code
+        1,
+        % Tag
+        ?DW_TAG_compile_unit,
+        % Has children (DW_CHILDREN_yes)
+        1,
+        % Name attribute
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Compilation directory
+        ?DW_AT_comp_dir,
+        ?DW_FORM_string,
+        % Producer
+        ?DW_AT_producer,
+        ?DW_FORM_string,
+        % Language
+        ?DW_AT_language,
+        ?DW_FORM_data4,
+        % Low PC
+        ?DW_AT_low_pc,
+        ?DW_FORM_addr,
+        % High PC
+        ?DW_AT_high_pc,
+        ?DW_FORM_addr,
+        % Statement list
+        ?DW_AT_stmt_list,
+        ?DW_FORM_sec_offset,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 2: DW_TAG_lexical_block (for opcodes)
+        % Abbreviation code
+        2,
+        % Tag
+        ?DW_TAG_lexical_block,
+        % Has no children
+        0,
+        % Name attribute (opcode name)
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Low PC
+        ?DW_AT_low_pc,
+        ?DW_FORM_addr,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 3: DW_TAG_label (for labels)
+        % Abbreviation code
+        3,
+        % Tag
+        ?DW_TAG_label,
+        % Has no children
+        0,
+        % Name attribute (label name)
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Low PC
+        ?DW_AT_low_pc,
+        ?DW_FORM_addr,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 4: DW_TAG_subprogram (for functions)
+        % Abbreviation code
+        4,
+        % Tag
+        ?DW_TAG_subprogram,
+        % Has children (ctx parameter)
+        1,
+        % Name attribute (module:function/arity)
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Low PC
+        ?DW_AT_low_pc,
+        ?DW_FORM_addr,
+        % High PC
+        ?DW_AT_high_pc,
+        ?DW_FORM_addr,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 5: DW_TAG_formal_parameter (for ctx parameter with type)
+        % Abbreviation code
+        5,
+        % Tag
+        ?DW_TAG_formal_parameter,
+        % Has no children
+        0,
+        % Name attribute (parameter name)
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Type attribute (reference to type DIE)
+        ?DW_AT_type,
+        ?DW_FORM_ref4,
+        % Location attribute (register location)
+        ?DW_AT_location,
+        ?DW_FORM_exprloc,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 6: DW_TAG_base_type (for term/uintptr_t)
+        % Abbreviation code
+        6,
+        % Tag
+        ?DW_TAG_base_type,
+        % Has no children
+        0,
+        % Name attribute
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Byte size
+        ?DW_AT_byte_size,
+        ?DW_FORM_data1,
+        % Encoding
+        ?DW_AT_encoding,
+        ?DW_FORM_data1,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 7: DW_TAG_pointer_type (for Context*)
+        % Abbreviation code
+        7,
+        % Tag
+        ?DW_TAG_pointer_type,
+        % Has no children
+        0,
+        % Byte size
+        ?DW_AT_byte_size,
+        ?DW_FORM_data1,
+        % Type attribute (points to Context structure)
+        ?DW_AT_type,
+        ?DW_FORM_ref4,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 8: DW_TAG_structure_type (for Context)
+        % Abbreviation code
+        8,
+        % Tag
+        ?DW_TAG_structure_type,
+        % Has children (members)
+        1,
+        % Name attribute
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Byte size
+        ?DW_AT_byte_size,
+        ?DW_FORM_data4,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 9: DW_TAG_member (for structure members)
+        % Abbreviation code
+        9,
+        % Tag
+        ?DW_TAG_member,
+        % Has no children
+        0,
+        % Name attribute
+        ?DW_AT_name,
+        ?DW_FORM_string,
+        % Type attribute
+        ?DW_AT_type,
+        ?DW_FORM_ref4,
+        % Data member location (offset from structure start)
+        ?DW_AT_data_member_location,
+        ?DW_FORM_data4,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 10: DW_TAG_array_type (for term x[MAX_REG+1])
+        % Abbreviation code
+        10,
+        % Tag
+        ?DW_TAG_array_type,
+        % Has children (subrange)
+        1,
+        % Type attribute (element type)
+        ?DW_AT_type,
+        ?DW_FORM_ref4,
+        % End of attributes
+        0,
+        0,
+
+        % Abbrev 11: DW_TAG_subrange_type (for array bounds)
+        % Abbreviation code
+        11,
+        % Tag
+        ?DW_TAG_subrange_type,
+        % Has no children
+        0,
+        % Upper bound
+        ?DW_AT_upper_bound,
+        ?DW_FORM_data1,
+        % End of attributes
+        0,
+        0,
+
+        % End of abbreviations
+        0
+    >>.
+
+generate_debug_info_section_with_opcodes(
+    #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName, backend = Backend} =
+        State,
+    SourceFile
+) ->
+    % Calculate address ranges
+    {LowPC, HighPC} = calculate_address_range(State),
+
+    % Get word size from backend and convert to bits
+    WordSize = Backend:word_size(),
+    WordSizeInBits = WordSize * 8,
+
+    % Build content first to calculate actual length
+    CompileUnitContent = <<
+        % DWARF version
+        4:16/little,
+        % Abbreviation offset
+        0:32/little,
+        % Address size
+        WordSize,
+        % Compilation unit DIE (abbreviation 1)
+        1,
+        % DW_AT_name
+        SourceFile/binary,
+        0,
+        % DW_AT_comp_dir
+        "/tmp",
+        0,
+        % DW_AT_producer
+        "AtomVM JIT Compiler v0.7.0",
+        0,
+        % DW_AT_language
+        ?DW_LANG_Erlang:32/little, % for now, we always say Erlang
+        % DW_AT_low_pc
+        LowPC:WordSizeInBits/little,
+        % DW_AT_high_pc
+        HighPC:WordSizeInBits/little,
+        % DW_AT_stmt_list (offset into .debug_line)
+        0:32/little
+    >>,
+
+    % Calculate base offset for type DIEs
+    % DW_FORM_ref4 offsets are relative to start of compile unit (the length field itself)
+    % So we need to add 4 bytes for the length field
+    % CompileUnitContent already includes the header (version + abbrev_offset + addr_size)
+    TypeDIEsBaseOffset = 4 + byte_size(CompileUnitContent),
+    io:format("DEBUG CU: ContentSize=~p + 4 (length) = ~p~n",
+              [byte_size(CompileUnitContent), TypeDIEsBaseOffset]),
+
+    % Generate type DIEs and get the Context* type offset
+    {TypeDIEs, ContextPtrTypeOffset} = generate_type_dies(State, TypeDIEsBaseOffset),
+
+    % Generate DIEs for functions, opcodes and labels
+    FunctionDIEs = generate_function_dies_with_module(Functions, ModuleName, State, ContextPtrTypeOffset, HighPC),
+    OpcodeDIEs = generate_opcode_dies(Opcodes, Backend),
+    LabelDIEs = generate_label_dies(Labels, Backend),
+
+    % End of children marker
+    EndMarker = <<0>>,
+
+    % Calculate actual unit length (everything after the length field)
+    Content =
+        <<CompileUnitContent/binary, TypeDIEs/binary, FunctionDIEs/binary, OpcodeDIEs/binary, LabelDIEs/binary,
+            EndMarker/binary>>,
+    UnitLength = byte_size(Content),
+
+    % Build final section with correct length
+    <<UnitLength:32/little, Content/binary>>.
+
+generate_debug_line_section(#dwarf{lines = Lines, opcodes = _Opcodes}, SourceFile) ->
+    % Build header content first to calculate actual lengths
+    HeaderContent = <<
+        % DWARF version
+        4:16/little,
+        % Header length (placeholder, calculated below)
+        0:32/little,
+        % Minimum instruction length (Thumb)
+        2,
+        % Maximum operations per instruction
+        1,
+        % Default is_stmt
+        1,
+        % Line base
+        (-5):8/signed,
+        % Line range
+        14,
+        % Opcode base
+        13
+    >>,
+
+    % Standard opcode lengths (for opcodes 1-12, opcode_base-1 entries)
+    % DW_LNS_copy(1)=0, DW_LNS_advance_pc(2)=1, DW_LNS_advance_line(3)=1, etc.
+    StdOpcodeLengths = <<0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1>>,
+
+    % Build file table with actual filenames from line data
+    UniqueFullPaths =
+        case Lines of
+            [] ->
+                [SourceFile];
+            _ ->
+                % Extract unique filenames from Lines, don't add SourceFile as it may be a duplicate
+                Filenames = [Filename || {_Offset, Filename, _LineNum} <- Lines],
+                lists:usort(Filenames)
+        end,
+
+    % Split paths into directories and filenames, avoiding duplicates
+    {Directories, FileEntries, _} = lists:foldl(
+        fun(FullPath, {DirAcc, FileAcc, FileSet}) ->
+            case filename:split(binary_to_list(FullPath)) of
+                [Basename] ->
+                    % Just a filename, no directory
+                    FileKey = {Basename, 0},
+                    case sets:is_element(FileKey, FileSet) of
+                        % Skip duplicate
+                        true -> {DirAcc, FileAcc, FileSet};
+                        false -> {DirAcc, [FileKey | FileAcc], sets:add_element(FileKey, FileSet)}
+                    end;
+                PathParts ->
+                    DirParts = lists:droplast(PathParts),
+                    Dir = filename:join(DirParts),
+                    Basename = lists:last(PathParts),
+                    % Find or add directory to get proper index (1-based)
+                    {NewDirAcc, DirIndex} =
+                        case lists:search(fun(D) -> D =:= Dir end, DirAcc) of
+                            {value, _} ->
+                                % Find index of existing directory (1-based)
+                                ExistingIndex =
+                                    length(lists:takewhile(fun(D) -> D =/= Dir end, DirAcc)) + 1,
+                                {DirAcc, ExistingIndex};
+                            false ->
+                                % Add new directory and return its 1-based index
+                                NewIndex = length(DirAcc) + 1,
+                                {DirAcc ++ [Dir], NewIndex}
+                        end,
+                    FileKey = {Basename, DirIndex},
+                    case sets:is_element(FileKey, FileSet) of
+                        % Skip duplicate
+                        true ->
+                            {NewDirAcc, FileAcc, FileSet};
+                        false ->
+                            {NewDirAcc, [FileKey | FileAcc], sets:add_element(FileKey, FileSet)}
+                    end
+            end
+        end,
+        {[], [], sets:new()},
+        UniqueFullPaths
+    ),
+
+    % Build directory table
+    DirectoryTable = lists:foldl(
+        fun(Dir, Acc) ->
+            DirBin = list_to_binary(Dir),
+            <<Acc/binary, DirBin/binary, 0>>
+        end,
+        <<>>,
+        Directories
+    ),
+
+    % Build file table entries with proper ULEB128 encoding for directory index
+    FileTableEntries = lists:foldl(
+        fun({Filename, DirIndex}, Acc) ->
+            DirIndexEncoded = encode_uleb128(DirIndex),
+            <<Acc/binary, (list_to_binary(Filename))/binary, 0, DirIndexEncoded/binary, 0, 0>>
+        end,
+        <<>>,
+        lists:reverse(FileEntries)
+    ),
+
+    FileTable = <<
+        % Directory table
+        DirectoryTable/binary,
+        % End of directory table
+        0,
+        % File table entries
+        FileTableEntries/binary,
+        % End of file table
+        0
+    >>,
+
+    % Line number program - using actual line data with file mapping
+    FileMapping = lists:zip(UniqueFullPaths, lists:seq(1, length(FileEntries))),
+    Program = generate_line_program(Lines, FileMapping),
+
+    % Calculate actual header length (everything after header_length field to end of file table)
+    HeaderPlusTablesContent = <<StdOpcodeLengths/binary, FileTable/binary>>,
+    % -6 to exclude version (2 bytes) and header_length field itself (4 bytes)
+    DebugHeaderContentSize = byte_size(HeaderContent),
+    DebugStdOpcodeSize = byte_size(StdOpcodeLengths),
+    DebugFileTableSize = byte_size(FileTable),
+    DebugHeaderPlusTablesSize = byte_size(HeaderPlusTablesContent),
+    io:format("DEBUG: HeaderContent=~p StdOpcodes=~p FileTable=~p HeaderPlusTables=~p~n",
+              [DebugHeaderContentSize, DebugStdOpcodeSize, DebugFileTableSize, DebugHeaderPlusTablesSize]),
+    HeaderLength = byte_size(HeaderContent) - 6 + byte_size(HeaderPlusTablesContent),
+    io:format("DEBUG: HeaderLength = ~p - 6 + ~p = ~p~n",
+              [DebugHeaderContentSize, DebugHeaderPlusTablesSize, HeaderLength]),
+
+    % Build corrected header with actual length
+    CorrectedHeader = <<
+        % DWARF version
+        4:16/little,
+        % Header length (actual)
+        HeaderLength:32/little,
+        % Minimum instruction length (Thumb)
+        2,
+        % Maximum operations per instruction
+        1,
+        % Default is_stmt
+        1,
+        % Line base
+        (-5):8/signed,
+        % Line range
+        14,
+        % Opcode base
+        13
+    >>,
+
+    % Calculate total unit length (everything after unit length field)
+    ContentAfterLength =
+        <<CorrectedHeader/binary, StdOpcodeLengths/binary, FileTable/binary, Program/binary>>,
+    UnitLength = byte_size(ContentAfterLength),
+
+    <<UnitLength:32/little, ContentAfterLength/binary>>.
+
+create_elf_header_and_sections(Backend, Sections) ->
+    % Determine ELF format based on backend word size
+    WordSize = Backend:word_size(),
+    % 32 or 64 bits
+    WordSizeInBits = WordSize * 8,
+    ElfClass =
+        case WordSize of
+            8 -> ?ELFCLASS64;
+            4 -> ?ELFCLASS32
+        end,
+
+    % ELF format dependent sizes
+    {ElfHeaderSize, SectionHeaderSize} =
+        case WordSize of
+            % ELF64
+            8 -> {64, 64};
+            % ELF32
+            4 -> {52, 40}
+        end,
+
+    % Create section name string table (dynamic based on sections)
+    SectionNames =
+        [<<>>] ++ [SectionName || {SectionName, _Section} <- Sections] ++ [<<".shstrtab">>],
+    ShStrTab = create_string_table(SectionNames),
+
+    % Calculate offsets
+    % null + debug sections + shstrtab
+    SectionCount = length(SectionNames),
+
+    % String table index is the last section
+    ShStrTabIndex = SectionCount - 1,
+
+    % Section data layout: debug sections + string table
+    {SectionData, SectionOffsets} = layout_sections(Sections, ShStrTab, ElfHeaderSize),
+
+    % Section headers start after all section data
+    SectionHeaderOffset = ElfHeaderSize + byte_size(SectionData),
+
+    % Get machine type and flags for this backend
+    MachineType = backend_to_machine_type(Backend),
+    ElfFlags = backend_to_elf_flags(Backend),
+
+    % ELF header
+    ElfHeader = <<
+        % Magic
+        ?EI_MAG0,
+        ?EI_MAG1,
+        ?EI_MAG2,
+        ?EI_MAG3,
+        % ELF class (32-bit or 64-bit)
+        ElfClass,
+        % Little endian
+        ?ELFDATA2LSB,
+        % ELF version
+        ?EV_CURRENT,
+        % OS ABI
+        0,
+        % ABI version
+        0,
+        % Padding
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        0,
+        % Relocatable file
+        ?ET_REL:16/little,
+        % Architecture from backend
+        MachineType:16/little,
+        % Version
+        1:32/little,
+        % Entry point - 32 or 64 bit depending on word size
+        0:WordSizeInBits/little,
+        % Program header offset - 32 or 64 bit depending on word size
+        0:WordSizeInBits/little,
+        % Section header offset - 32 or 64 bit depending on word size
+        SectionHeaderOffset:WordSizeInBits/little,
+        % Flags
+        ElfFlags:32/little,
+        % ELF header size
+        ElfHeaderSize:16/little,
+        % Program header entry size
+        0:16/little,
+        % Program header count
+        0:16/little,
+        % Section header entry size
+        SectionHeaderSize:16/little,
+        % Section count
+        SectionCount:16/little,
+        % String table index (.shstrtab)
+        ShStrTabIndex:16/little
+    >>,
+
+    % Generate section headers
+    SectionHeaders = create_section_headers_proper(
+        SectionNames, Sections, SectionOffsets, ShStrTab, Backend, WordSizeInBits
+    ),
+
+    <<ElfHeader/binary, SectionData/binary, SectionHeaders/binary>>.
+
+%% Helper functions
+calculate_address_range(#dwarf{opcodes = Opcodes}) ->
+    case Opcodes of
+        [] ->
+            {0, 0};
+        _ ->
+            % Use the new 3-tuple format {Offset, Opcode, Size}
+            OffsetsAndSizes = [{Offset, Size} || {Offset, _, Size} <- Opcodes],
+            Offsets = [Offset || {Offset, _} <- OffsetsAndSizes],
+            MinOffset = lists:min(Offsets),
+            % For max, use offset + size, or fallback to offset + 4 if size is 0
+            MaxOffset = lists:max([
+                case Size of
+                    % Fallback for opcodes without calculated size
+                    0 -> Offset + 4;
+                    _ -> Offset + Size
+                end
+             || {Offset, Size} <- OffsetsAndSizes
+            ]),
+            {MinOffset, MaxOffset}
+    end.
+
+generate_line_program(Lines, FileMapping) ->
+    case Lines of
+        [] ->
+            % No line data - generate simple program
+            <<
+                % Set file to 1 using DW_LNS_set_file (opcode 4) with file index 1
+                4,
+                1,
+                % End sequence: extended opcode
+                % Extended opcode prefix
+                0,
+                % Length of extended opcode
+                1,
+                % DW_LNE_end_sequence
+                1
+            >>;
+        _ ->
+            % Sort lines by offset
+            SortedLines = lists:sort(
+                fun({OffsetA, _, _}, {OffsetB, _, _}) ->
+                    OffsetA =< OffsetB
+                end,
+                Lines
+            ),
+            generate_line_program_entries(SortedLines, FileMapping, 0, 1, 0)
+    end.
+
+generate_line_program_entries([], _FileMapping, _LastOffset, _LastLine, _LastFileIndex) ->
+    % End the sequence
+    <<
+        % End sequence: extended opcode
+        % Extended opcode prefix
+        0,
+        % Length of extended opcode
+        1,
+        % DW_LNE_end_sequence
+        1
+    >>;
+generate_line_program_entries(
+    [{Offset, Filename, LineNumber} | Rest], FileMapping, LastOffset, LastLine, LastFileIndex
+) ->
+    % Generate DWARF line program opcodes
+    % For simplicity, we'll use DW_LNS_advance_pc and DW_LNS_advance_line
+
+    % Find file index from mapping
+    FileIndex =
+        case lists:keyfind(Filename, 1, FileMapping) of
+            {Filename, Index} -> Index;
+            % Default to first file if not found
+            false -> 1
+        end,
+
+    % Calculate address and line deltas
+    AddressDelta = Offset - LastOffset,
+    LineDelta = LineNumber - LastLine,
+
+    % Build opcodes
+    FileOpcodes =
+        if
+            FileIndex =/= LastFileIndex ->
+                % DW_LNS_set_file (opcode 4) with file index
+                <<4, FileIndex>>;
+            true ->
+                <<>>
+        end,
+
+    InitialOpcodes =
+        if
+            LastOffset == 0 ->
+                % Set initial file index
+                <<4, FileIndex>>;
+            true ->
+                FileOpcodes
+        end,
+
+    Opcodes = <<
+        InitialOpcodes/binary,
+        % DW_LNS_advance_pc (opcode 2) with ULEB128 delta
+        2,
+        (encode_uleb128(AddressDelta))/binary,
+        % DW_LNS_advance_line (opcode 3) with SLEB128 delta
+        3,
+        (encode_sleb128(LineDelta))/binary,
+        % DW_LNS_copy (opcode 1) - emit a new row
+        1
+    >>,
+
+    RestOpcodes = generate_line_program_entries(Rest, FileMapping, Offset, LineNumber, FileIndex),
+    <<Opcodes/binary, RestOpcodes/binary>>.
+
+% Encode unsigned LEB128
+encode_uleb128(Value) when Value < 128 ->
+    <<Value>>;
+encode_uleb128(Value) ->
+    Byte = (Value band 16#7F) bor 16#80,
+    Rest = encode_uleb128(Value bsr 7),
+    <<Byte, Rest/binary>>.
+
+% Encode signed LEB128
+encode_sleb128(Value) when Value >= -64, Value < 64 ->
+    ByteValue = Value band 16#7F,
+    <<ByteValue>>;
+encode_sleb128(Value) when Value >= 0 ->
+    encode_uleb128(Value);
+encode_sleb128(Value) ->
+    encode_sleb128_negative(Value).
+
+encode_sleb128_negative(Value) ->
+    Byte = Value band 16#7F,
+    NewValue = Value bsr 7,
+    if
+        NewValue == -1, (Byte band 16#40) =/= 0 ->
+            <<Byte>>;
+        true ->
+            ByteWithCont = Byte bor 16#80,
+            Rest = encode_sleb128_negative(NewValue),
+            <<ByteWithCont, Rest/binary>>
+    end.
+
+%% Generate type DIEs for Context structure and return the Context* type offset
+generate_type_dies(#dwarf{backend = Backend}, BaseOffset) ->
+    % Get word size from backend
+    WordSize = Backend:word_size(),
+
+    % Abbrev 6: term base type (uintptr_t)
+    TermTypeDIE = <<
+        6,  % Abbreviation code
+        "term", 0,  % Name
+        WordSize,  % Byte size
+        ?DW_ATE_unsigned  % Encoding (unsigned)
+    >>,
+    TermTypeOffset = BaseOffset,
+    io:format("DEBUG TYPE OFFSETS: Base=~p Term=~p~n", [BaseOffset, TermTypeOffset]),
+
+    % Abbrev 10: Array type for x[MAX_REG+1] (term x[17])
+    % Abbrev 11: Subrange type
+    XArraySubrangeDIE = <<
+        11,  % Abbreviation code
+        16  % Upper bound (MAX_REG = 16, so array is [0..16])
+    >>,
+    XArrayTypeDIE = <<
+        10,  % Abbreviation code
+        TermTypeOffset:32/little,  % Type (term)
+        XArraySubrangeDIE/binary,
+        0  % End of children
+    >>,
+    XArrayTypeOffset = BaseOffset + byte_size(TermTypeDIE),
+
+    % Abbrev 8: Context structure type
+    % Only include the x array member for now (most important for debugging)
+    XOffset = case Backend of
+        jit_x86_64 -> 16#30;
+        jit_aarch64 -> 16#30;
+        _ -> 16#18  % riscv32 and armv6m
+    end,
+    XMemberDIE = <<
+        9,  % Abbreviation code
+        "x", 0,  % Name
+        XArrayTypeOffset:32/little,  % Type (term array)
+        XOffset:32/little  % Data member location
+    >>,
+    % Estimate Context size (actual size varies, but this is good enough)
+    ContextSize = 512,
+    ContextStructDIE = <<
+        8,  % Abbreviation code
+        "Context", 0,  % Name
+        ContextSize:32/little,  % Byte size
+        XMemberDIE/binary,
+        0  % End of children
+    >>,
+    ContextStructOffset = BaseOffset + byte_size(TermTypeDIE) + byte_size(XArrayTypeDIE),
+
+    % Abbrev 7: Context* pointer type
+    ContextPtrTypeDIE = <<
+        7,  % Abbreviation code
+        WordSize,  % Byte size
+        ContextStructOffset:32/little  % Type (Context)
+    >>,
+    ContextPtrTypeOffset = BaseOffset + byte_size(TermTypeDIE) + byte_size(XArrayTypeDIE) + byte_size(ContextStructDIE),
+
+    % Combine all type DIEs
+    AllTypes = <<TermTypeDIE/binary, XArrayTypeDIE/binary, ContextStructDIE/binary, ContextPtrTypeDIE/binary>>,
+
+    {AllTypes, ContextPtrTypeOffset}.
+
+%% Generate DIEs for functions as DW_TAG_subprogram with module:func/arity naming
+generate_function_dies_with_module(Functions, ModuleName, #dwarf{backend = Backend}, ContextPtrTypeOffset, CodeSize) ->
+    % Filter and sort functions by address
+    ValidFunctions = lists:sort([
+        {Offset, FunctionName, Arity}
+     || {Offset, FunctionName, Arity} <- Functions, Offset >= 0
+    ]),
+
+    % Calculate function sizes by finding the next function's offset
+    % For the last function, use CodeSize to determine its end
+    FunctionsWithSizes = case ValidFunctions of
+        [] -> [];
+        _ ->
+            lists:zipwith(
+                fun({Offset, Name, Arity}, NextFunc) ->
+                    Size = case NextFunc of
+                        {NextOffset, _, _} -> NextOffset - Offset;
+                        end_of_code -> CodeSize - Offset  % Last function extends to end of code
+                    end,
+                    {Offset, Name, Arity, Size}
+                end,
+                ValidFunctions,
+                tl(ValidFunctions) ++ [end_of_code]
+            )
+    end,
+
+    % Generate DIE for each function
+    FunctionDIEsList = [
+        generate_function_die_with_module(Offset, FunctionName, Arity, Size, ModuleName, Backend, ContextPtrTypeOffset)
+     || {Offset, FunctionName, Arity, Size} <- FunctionsWithSizes
+    ],
+    iolist_to_binary(FunctionDIEsList).
+
+%% Generate DIE for a single function with module name
+generate_function_die_with_module(Offset, FunctionName, Arity, FunctionSize, ModuleName, Backend, ContextPtrTypeOffset) ->
+    % Create module:function/arity format
+    FunctionString = list_to_binary(io_lib:format("~s:~s/~B", [ModuleName, FunctionName, Arity])),
+
+    % Get the DWARF register number for ctx from the backend
+    CtxRegNum = Backend:dwarf_ctx_register(),
+
+    % Generate ctx parameter DIE
+    CtxParamDIE = generate_ctx_parameter_die(CtxRegNum, ContextPtrTypeOffset),
+
+    % Get word size for addresses and convert to bits
+    WordSize = Backend:word_size(),
+    WordSizeInBits = WordSize * 8,
+
+    <<
+        % Abbreviation code (4 = DW_TAG_subprogram)
+        4,
+        % DW_AT_name
+        FunctionString/binary,
+        0,
+        % DW_AT_low_pc
+        Offset:WordSizeInBits/little,
+        % DW_AT_high_pc (low_pc + size)
+        (Offset + FunctionSize):WordSizeInBits/little,
+        % Child: ctx parameter
+        CtxParamDIE/binary,
+        % End of children marker
+        0
+    >>.
+
+%% Generate DIE for ctx parameter
+generate_ctx_parameter_die(CtxRegNum, ContextPtrTypeOffset) ->
+    % DW_FORM_exprloc requires a ULEB128 length followed by the expression
+    % Expression: DW_OP_reg0 + register_number (single byte)
+    % DW_OP_regN means the value is in register N
+    RegOpcode = ?DW_OP_reg0 + CtxRegNum,
+    LocationExpr = <<RegOpcode>>,
+    LocationExprLen = encode_uleb128(byte_size(LocationExpr)),
+
+    <<
+        % Abbreviation code (5 = DW_TAG_formal_parameter)
+        5,
+        % DW_AT_name
+        "ctx",
+        0,
+        % DW_AT_type (reference to Context* type)
+        ContextPtrTypeOffset:32/little,
+        % DW_AT_location (exprloc: length + expression)
+        LocationExprLen/binary,
+        LocationExpr/binary
+    >>.
+
+%% Generate DIEs for opcodes as DW_TAG_lexical_block
+generate_opcode_dies(Opcodes, Backend) ->
+    % Filter and sort opcodes by address
+    ValidOpcodes = lists:sort([{Offset, Opcode} || {Offset, Opcode} <- Opcodes, Offset >= 0]),
+
+    % Generate DIE for each opcode
+    OpcodeDIEsList = [generate_opcode_die(Offset, Opcode, Backend) || {Offset, Opcode} <- ValidOpcodes],
+    iolist_to_binary(OpcodeDIEsList).
+
+%% Generate DIE for a single opcode
+generate_opcode_die(Offset, Opcode, Backend) ->
+    OpcodeString = list_to_binary(io_lib:format("~s@~B", [Opcode, Offset])),
+    WordSize = Backend:word_size(),
+    WordSizeInBits = WordSize * 8,
+
+    <<
+        % Abbreviation code (2 = DW_TAG_lexical_block)
+        2,
+        % DW_AT_name
+        OpcodeString/binary,
+        0,
+        % DW_AT_low_pc
+        Offset:WordSizeInBits/little
+    >>.
+
+%% Generate DIEs for labels as DW_TAG_label
+generate_label_dies(Labels, Backend) ->
+    % Filter and sort labels by address
+    ValidLabels = lists:sort([{Offset, Label} || {Offset, Label} <- Labels, Offset >= 0]),
+
+    % Generate DIE for each label
+    LabelDIEsList = [generate_label_die(Offset, Label, Backend) || {Offset, Label} <- ValidLabels],
+    iolist_to_binary(LabelDIEsList).
+
+%% Generate DIE for a single label
+generate_label_die(Offset, Label, Backend) ->
+    LabelString = list_to_binary(io_lib:format("label_~B", [Label])),
+    WordSize = Backend:word_size(),
+    WordSizeInBits = WordSize * 8,
+
+    <<
+        % Abbreviation code (3 = DW_TAG_label)
+        3,
+        % DW_AT_name
+        LabelString/binary,
+        0,
+        % DW_AT_low_pc
+        Offset:WordSizeInBits/little
+    >>.
+
+%% Generate symbol table for function names and opcode symbols
+generate_symbol_table(
+    #dwarf{functions = Functions, opcodes = Opcodes, labels = Labels, module_name = ModuleName},
+    Backend
+) ->
+    % Determine ELF format based on backend word size
+    WordSize = Backend:word_size(),
+    % Build string table for symbol names (functions) with module:function/arity format
+    FunctionNames = [
+        list_to_binary(io_lib:format("~s:~s/~B", [ModuleName, FunctionName, Arity]))
+     || {_Offset, FunctionName, Arity} <- Functions
+    ],
+    % Build string table for opcode symbols with module:op_opcode@offset format
+    OpcodeNames = [
+        list_to_binary(io_lib:format("~s:op_~s@~w", [ModuleName, Opcode, Offset]))
+     || {Offset, Opcode, _Size} <- Opcodes
+    ],
+    % Build string table for label symbols with module:label_X@offset format
+    LabelNames = [
+        list_to_binary(io_lib:format("~s:label_~w@~w", [ModuleName, LabelNum, Offset]))
+     || {Offset, LabelNum} <- Labels
+    ],
+    % Add ARM mapping symbol to indicate Thumb code (for armv6m backend)
+    MappingSymbols =
+        case Backend of
+            % Thumb mapping symbol at start of .text section
+            jit_armv6m -> [<<"$t">>];
+            _ -> []
+        end,
+    SymbolNames = FunctionNames ++ OpcodeNames ++ LabelNames ++ MappingSymbols,
+
+    % String table starts with null string
+    StrtabContent = lists:foldl(
+        fun(Name, Acc) ->
+            <<Acc/binary, Name/binary, 0>>
+        end,
+        % Start with null string at offset 0
+        <<0>>,
+        SymbolNames
+    ),
+
+    % Calculate string offsets
+    {_, StringOffsets} = lists:foldl(
+        fun(Name, {CurrentOffset, Offsets}) ->
+            % +1 for null terminator
+            NextOffset = CurrentOffset + byte_size(Name) + 1,
+            {NextOffset, [CurrentOffset | Offsets]}
+        end,
+        % Start after null string
+        {1, []},
+        SymbolNames
+    ),
+    ReversedOffsets = lists:reverse(StringOffsets),
+
+    % Generate symbol table entries
+    % First entry is always the null symbol
+    NullSymbol =
+        case WordSize of
+            8 ->
+                % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8)
+                <<0:32/little, 0, 0, 0:16/little, 0:64/little, 0:64/little>>;
+            4 ->
+                % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2)
+                <<0:32/little, 0:32/little, 0:32/little, 0, 0, 0:16/little>>
+        end,
+
+    % Generate function symbols
+    FunctionSymbols = lists:foldl(
+        fun({{Offset, _FunctionName, _Arity}, StringOffset}, Acc) ->
+            % Function name is now module:function/arity (already in FunctionNames)
+            % Estimated function size
+            FuncSize = 100,
+
+            % Use raw offset for symbol address (no Thumb bit)
+            FunctionAddress = Offset,
+
+            % Symbol table entry (format depends on word size)
+            Symbol =
+                case WordSize of
+                    8 ->
+                        % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8)
+                        <<
+                            StringOffset:32/little,
+                            % st_info (STB_GLOBAL << 4 | STT_FUNC)
+                            16#12,
+                            % st_other
+                            0,
+                            % st_shndx (section index - .text will be section 1)
+                            1:16/little,
+                            % st_value (function address)
+                            FunctionAddress:64/little,
+                            % st_size (function size)
+                            FuncSize:64/little
+                        >>;
+                    4 ->
+                        % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2)
+                        <<
+                            StringOffset:32/little,
+                            FunctionAddress:32/little,
+                            FuncSize:32/little,
+                            16#12,
+                            0,
+                            1:16/little
+                        >>
+                end,
+            <<Acc/binary, Symbol/binary>>
+        end,
+        <<>>,
+        lists:zip(Functions, lists:sublist(ReversedOffsets, length(Functions)))
+    ),
+
+    % Generate opcode symbols
+    OpcodeStringOffsets = lists:sublist(ReversedOffsets, length(Functions) + 1, length(Opcodes)),
+    OpcodeSymbols = lists:foldl(
+        fun({{Offset, _Opcode, Size}, StringOffset}, Acc) ->
+            % Use raw offset for symbol address (no Thumb bit)
+            OpcodeAddress = Offset,
+
+            % Symbol table entry (format depends on word size)
+            Symbol =
+                case WordSize of
+                    8 ->
+                        % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8)
+                        <<
+                            StringOffset:32/little,
+                            % st_info (STB_GLOBAL << 4 | STT_NOTYPE)
+                            16#10,
+                            % st_other
+                            0,
+                            % st_shndx (section index - .text will be section 1)
+                            1:16/little,
+                            % st_value (opcode address)
+                            OpcodeAddress:64/little,
+                            % st_size (actual calculated opcode size)
+                            Size:64/little
+                        >>;
+                    4 ->
+                        % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2)
+                        <<
+                            StringOffset:32/little,
+                            OpcodeAddress:32/little,
+                            Size:32/little,
+                            16#10,
+                            0,
+                            1:16/little
+                        >>
+                end,
+            <<Acc/binary, Symbol/binary>>
+        end,
+        <<>>,
+        lists:zip(Opcodes, OpcodeStringOffsets)
+    ),
+
+    % Generate label symbols
+    LabelStringOffsets = lists:sublist(
+        ReversedOffsets, length(Functions) + length(Opcodes) + 1, length(Labels)
+    ),
+    LabelSymbols = lists:foldl(
+        fun({{Offset, _LabelNum}, StringOffset}, Acc) ->
+            % Use raw offset for symbol address
+            LabelAddress = Offset,
+
+            % Symbol table entry (format depends on word size)
+            Symbol =
+                case WordSize of
+                    8 ->
+                        % ELF64: 24 bytes - st_name(4) + st_info(1) + st_other(1) + st_shndx(2) + st_value(8) + st_size(8)
+                        <<
+                            StringOffset:32/little,
+                            % st_info (STB_GLOBAL << 4 | STT_NOTYPE)
+                            16#10,
+                            % st_other
+                            0,
+                            % st_shndx (section index - .text will be section 1)
+                            1:16/little,
+                            % st_value (label address)
+                            LabelAddress:64/little,
+                            % st_size (label size - 0 for point labels)
+                            0:64/little
+                        >>;
+                    4 ->
+                        % ELF32: 16 bytes - st_name(4) + st_value(4) + st_size(4) + st_info(1) + st_other(1) + st_shndx(2)
+                        <<
+                            StringOffset:32/little,
+                            LabelAddress:32/little,
+                            0:32/little,
+                            16#10,
+                            0,
+                            1:16/little
+                        >>
+                end,
+            <<Acc/binary, Symbol/binary>>
+        end,
+        <<>>,
+        lists:zip(Labels, LabelStringOffsets)
+    ),
+
+    % Generate mapping symbols for ARM (Thumb indicator)
+    MappingSymbolOffsets =
+        case Backend of
+            jit_armv6m ->
+                lists:sublist(
+                    ReversedOffsets, length(Functions) + length(Opcodes) + length(Labels) + 1, 1
+                );
+            _ ->
+                []
+        end,
+    MappingSymbolBinaries =
+        case Backend of
+            jit_armv6m ->
+                [StringOffset] = MappingSymbolOffsets,
+                % $t mapping symbol at address 0 (start of .text) to indicate Thumb code
+                MappingSymbol = <<
+                    % st_name (offset in string table for "$t")
+                    StringOffset:32/little,
+                    % st_value (address 0 - start of .text section)
+                    0:32/little,
+                    % st_size (0 for mapping symbols)
+                    0:32/little,
+                    % st_info (STB_LOCAL << 4 | STT_NOTYPE) - local symbol
+                    16#00,
+                    % st_other
+                    0,
+                    % st_shndx (section index - .text will be section 1)
+                    1:16/little
+                >>,
+                <<MappingSymbol/binary>>;
+            _ ->
+                <<>>
+        end,
+
+    % Symbol table must have local symbols first, then global symbols
+    SymtabContent =
+        <<NullSymbol/binary, MappingSymbolBinaries/binary, FunctionSymbols/binary,
+            OpcodeSymbols/binary, LabelSymbols/binary>>,
+
+    {SymtabContent, StrtabContent}.
+
+%% Create string table from list of binaries
+create_string_table(Binaries) ->
+    <<<<Binary/binary, 0>> || Binary <- Binaries>>.
+
+%% Layout sections in memory and calculate offsets
+layout_sections(Sections, ShStrTab, BaseOffset) ->
+    {Data, Offsets} = lists:foldl(
+        fun({_Name, SectionData}, {AccData, AccOffsets}) ->
+            Offset = BaseOffset + byte_size(AccData),
+            NewData = <<AccData/binary, SectionData/binary>>,
+            NewOffsets = [Offset | AccOffsets],
+            {NewData, NewOffsets}
+        end,
+        {<<>>, []},
+        Sections
+    ),
+
+    % Add string table at the end
+    ShStrTabOffset = BaseOffset + byte_size(Data),
+    FinalData = <<Data/binary, ShStrTab/binary>>,
+    FinalOffsets = [ShStrTabOffset | lists:reverse(Offsets)],
+
+    {FinalData, FinalOffsets}.
+
+%% Create properly formatted section headers
+create_section_headers_proper(
+    SectionNames, Sections, SectionOffsets, ShStrTab, Backend, WordSizeInBits
+) ->
+    % Create null section header (index 0)
+    % Size depends on ELF format: 40 bytes (ELF32) or 64 bytes (ELF64)
+    SectionHeaderSizeBits =
+        case WordSizeInBits of
+            % 64 bytes * 8 bits
+            64 -> 512;
+            % 40 bytes * 8 bits
+            32 -> 320
+        end,
+    NullHeader = <<0:SectionHeaderSizeBits/little>>,
+
+    % Create section headers for all sections (indices 1-6)
+    % SectionOffsets from layout_sections: [ShStrTabOffset, ...SectionOffsets in order...]
+    [_ShStrTabOffset | SectionOffsetsInOrder] = SectionOffsets,
+
+    SectionHeaders = lists:foldl(
+        fun({_Index, {{SectionName, SectionData}, FileOffset}}, Acc) ->
+            % Calculate name offset in string table by finding the null-terminated section name
+            SectionNameWithNull = <<SectionName/binary, 0>>,
+            {NameOffset, _Length} = binary:match(ShStrTab, SectionNameWithNull),
+
+            % Determine section type, properties, and flags
+            {SectionType, SectionFlags, Link, Info, EntrySize} =
+                case SectionName of
+                    <<".symtab">> ->
+                        % Find .strtab index dynamically
+                        StrtabIndex = find_section_index(<<".strtab">>, SectionNames),
+                        % Local symbols: null symbol + mapping symbol (for armv6m)
+                        NumLocalSymbols =
+                            case Backend of
+                                % null + $t mapping symbol
+                                jit_armv6m -> 2;
+                                % only null symbol
+                                _ -> 1
+                            end,
+                        % SHT_SYMTAB, link to strtab, info = first non-local symbol, entsize = 16
+                        SymTabEntrySize =
+                            case WordSizeInBits of
+                                32 -> 16;
+                                64 -> 24
+                            end,
+                        {?SHT_SYMTAB, 0, StrtabIndex, NumLocalSymbols, SymTabEntrySize};
+                    % SHT_STRTAB
+                    <<".strtab">> ->
+                        {3, 0, 0, 0, 0};
+                    % ARM attributes
+                    <<".ARM.attributes">> ->
+                        {?SHT_ARM_ATTRIBUTES, 0, 0, 0, 0};
+                    % .text section - executable code
+                    <<".text">> ->
+                        {?SHT_PROGBITS, ?SHF_ALLOC bor ?SHF_EXECINSTR, 0, 0, 0};
+                    % Debug sections and other progbits
+                    _ ->
+                        {?SHT_PROGBITS, 0, 0, 0, 0}
+                end,
+
+            Header = <<
+                % Name offset - always 32-bit
+                NameOffset:32/little,
+                % Type - always 32-bit
+                SectionType:32/little,
+                % Flags - 32/64 bit depending on word size
+                SectionFlags:WordSizeInBits/little,
+                % Address - 32/64 bit depending on word size
+                0:WordSizeInBits/little,
+                % File offset - 32/64 bit depending on word size
+                FileOffset:WordSizeInBits/little,
+                % Size - 32/64 bit depending on word size
+                (byte_size(SectionData)):WordSizeInBits/little,
+                % Link - always 32-bit
+                Link:32/little,
+                % Info - always 32-bit
+                Info:32/little,
+                % Address align - 32/64 bit depending on word size
+                1:WordSizeInBits/little,
+                % Entry size - 32/64 bit depending on word size
+                EntrySize:WordSizeInBits/little
+            >>,
+            <<Acc/binary, Header/binary>>
+        end,
+        <<>>,
+        lists:zip(lists:seq(1, length(Sections)), lists:zip(Sections, SectionOffsetsInOrder))
+    ),
+
+    % Create string table section header (index 7, the last section)
+
+    % Calculate offset for ".shstrtab"
+    ShStrTabNameWithNull = <<".shstrtab", 0>>,
+    {ShStrTabNameOffset, _Length} = binary:match(ShStrTab, ShStrTabNameWithNull),
+    % First in offsets (ShStrTabOffset is added at the beginning)
+    ShStrTabFileOffset = lists:nth(1, SectionOffsets),
+    ShStrTabHeader = <<
+        % Name offset - always 32-bit
+        ShStrTabNameOffset:32/little,
+        % Type - always 32-bit
+        ?SHT_STRTAB:32/little,
+        % Flags - 32/64 bit depending on word size
+        0:WordSizeInBits/little,
+        % Address - 32/64 bit depending on word size
+        0:WordSizeInBits/little,
+        % File offset - 32/64 bit depending on word size
+        ShStrTabFileOffset:WordSizeInBits/little,
+        % Size - 32/64 bit depending on word size
+        (byte_size(ShStrTab)):WordSizeInBits/little,
+        % Link - always 32-bit
+        0:32/little,
+        % Info - always 32-bit
+        0:32/little,
+        % Address align - 32/64 bit depending on word size
+        1:WordSizeInBits/little,
+        % Entry size - 32/64 bit depending on word size
+        0:WordSizeInBits/little
+    >>,
+
+    <<NullHeader/binary, SectionHeaders/binary, ShStrTabHeader/binary>>.
+
+%% @doc Add .text section containing native code to existing debug-only ELF
+%% @doc Create complete ELF with .text section and debug sections from the start
+create_elf_with_text_and_debug_sections(Backend, DebugSections, NativeCode) ->
+    % Add .text section as the first section
+    TextSection = {<<".text">>, NativeCode},
+    AllSections = [TextSection | DebugSections],
+
+    % Calculate text section offset: it's the first section after the ELF header
+    WordSize = Backend:word_size(),
+    TextSectionOffset =
+        case WordSize of
+            % ELF64 header size
+            8 -> 64;
+            % ELF32 header size
+            4 -> 52
+        end,
+
+    % Create complete ELF with all sections
+    ElfBinary = create_elf_header_and_sections(Backend, AllSections),
+
+    {ElfBinary, TextSectionOffset}.
+
+-endif.
diff --git a/libs/jit/src/jit_dwarf.hrl b/libs/jit/src/jit_dwarf.hrl
new file mode 100644
index 0000000000..4c071d5273
--- /dev/null
+++ b/libs/jit/src/jit_dwarf.hrl
@@ -0,0 +1,104 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+%% DWARF Tag constants
+-define(DW_TAG_compile_unit, 16#11).
+-define(DW_TAG_subprogram, 16#2e).
+-define(DW_TAG_lexical_block, 16#0b).
+-define(DW_TAG_label, 16#0a).
+-define(DW_TAG_formal_parameter, 16#05).
+-define(DW_TAG_pointer_type, 16#0f).
+-define(DW_TAG_structure_type, 16#13).
+-define(DW_TAG_member, 16#0d).
+-define(DW_TAG_array_type, 16#01).
+-define(DW_TAG_subrange_type, 16#21).
+-define(DW_TAG_base_type, 16#24).
+
+%% DWARF Attribute constants
+-define(DW_AT_name, 16#03).
+-define(DW_AT_comp_dir, 16#1b).
+-define(DW_AT_producer, 16#25).
+-define(DW_AT_language, 16#13).
+-define(DW_AT_low_pc, 16#11).
+-define(DW_AT_high_pc, 16#12).
+-define(DW_AT_stmt_list, 16#10).
+-define(DW_AT_type, 16#49).
+-define(DW_AT_data_member_location, 16#38).
+-define(DW_AT_byte_size, 16#0b).
+-define(DW_AT_encoding, 16#3e).
+-define(DW_AT_location, 16#02).
+-define(DW_AT_upper_bound, 16#2f).
+
+%% DWARF Form constants
+-define(DW_FORM_string, 16#08).
+-define(DW_FORM_addr, 16#01).
+-define(DW_FORM_data4, 16#06).
+-define(DW_FORM_data1, 16#0b).
+-define(DW_FORM_udata, 16#0f).
+-define(DW_FORM_ref4, 16#13).
+-define(DW_FORM_sec_offset, 16#17).
+-define(DW_FORM_exprloc, 16#18).
+
+%% DWARF Encoding constants
+-define(DW_ATE_unsigned, 16#07).
+-define(DW_ATE_signed, 16#05).
+
+%% DWARF Location expression opcodes
+-define(DW_OP_reg0, 16#50).
+-define(DW_OP_fbreg, 16#91).
+
+%% DWARF Language constants
+-define(DW_LANG_C, 16#02).
+-define(DW_LANG_Erlang, 16#46).
+-define(DW_LANG_Elixir, 16#47).
+-define(DW_LANG_Gleam, 16#48).
+
+%% ELF constants
+-define(EI_MAG0, 16#7f).
+-define(EI_MAG1, $E).
+-define(EI_MAG2, $L).
+-define(EI_MAG3, $F).
+-define(ELFCLASS32, 1).
+-define(ELFCLASS64, 2).
+-define(ELFDATA2LSB, 1).
+-define(EV_CURRENT, 1).
+-define(ET_REL, 1).
+-define(EM_ARM, 40).
+-define(EM_X86_64, 62).
+-define(EM_AARCH64, 183).
+-define(EM_RISCV, 243).
+-define(SHT_PROGBITS, 1).
+-define(SHT_SYMTAB, 2).
+-define(SHT_STRTAB, 3).
+-define(SHT_ARM_ATTRIBUTES, 16#70000003).
+-define(SHF_ALLOC, 2).
+-define(SHF_EXECINSTR, 4).
+
+%% ARM EABI flags
+-define(EF_ARM_EABI_VER5, 16#05000000).
+-define(EF_ARM_ABI_FLOAT_SOFT, 16#00000200).
+-define(EF_ARM_ARCH_V6M, 16#00000009).
+
+%% DWARF register numbers
+%% These follow the DWARF register numbering conventions for each architecture
+-define(DWARF_RDI_REG_X86_64, 5).   % rdi register in x86_64
+-define(DWARF_X0_REG_AARCH64, 0).   % x0 register in aarch64
+-define(DWARF_A0_REG_RISCV32, 10).  % a0 register in RISC-V
+-define(DWARF_R0_REG_ARMV6M, 0).    % r0 register in ARM
diff --git a/libs/jit/src/jit_precompile.erl b/libs/jit/src/jit_precompile.erl
index 151e470c54..bd0476f68d 100644
--- a/libs/jit/src/jit_precompile.erl
+++ b/libs/jit/src/jit_precompile.erl
@@ -19,19 +19,47 @@
 %
 -module(jit_precompile).
 
--export([start/0, compile/3, atom_resolver/1, type_resolver/1]).
+-export([start/0, compile/4, atom_resolver/1, type_resolver/1]).
 
 -include_lib("jit.hrl").
 
+-include("compact_term.hrl").
+
 %% @doc Precompile BEAM files on command line
 start() ->
-    [Target, Dir | Files] = init:get_plain_arguments(),
-    lists:foreach(fun(File) -> compile(Target, Dir, File) end, Files).
+    [Target, Dir | Files0] = init:get_plain_arguments(),
+    {Files, Dwarf} = case Files0 of
+        ["-g" | FilesT] -> {FilesT, true};
+        _ -> {Files0, true}
+    end,
+    lists:foreach(fun(File) -> compile(Target, Dir, Dwarf, File) end, Files).
 
-compile(Target, Dir, Path) ->
+%% @doc Parse target string to extract base architecture and requested variant
+%% Examples:
+%%   "armv6m" -> {"armv6m", ?JIT_VARIANT_PIC}
+%%   "armv6m+float32" -> {"armv6m", ?JIT_VARIANT_PIC + ?JIT_VARIANT_FLOAT32}
+%%   "x86_64" -> {"x86_64", ?JIT_VARIANT_PIC}
+parse_target(Target) ->
+    case string:split(Target, "+", all) of
+        [BaseTarget] ->
+            {BaseTarget, ?JIT_VARIANT_PIC};
+        [BaseTarget | Variants] ->
+            RequestedVariant = lists:foldl(
+                fun(Variant, Acc) ->
+                    case Variant of
+                        "float32" -> Acc + ?JIT_VARIANT_FLOAT32
+                    end
+                end,
+                ?JIT_VARIANT_PIC,
+                Variants
+            ),
+            {BaseTarget, RequestedVariant}
+    end.
+
+compile(Target, Dir, Dwarf, Path) ->
     try
         {ok, InitialBinary} = file:read_file(Path),
-        {ok, _Module, InitialChunks} = beam_lib:all_chunks(InitialBinary),
+        {ok, Module, InitialChunks} = beam_lib:all_chunks(InitialBinary),
         FilteredChunks0 = lists:keydelete("avmN", 1, InitialChunks),
         FilteredChunks = lists:keydelete("Code", 1, FilteredChunks0),
         {"Code", CodeChunk} = lists:keyfind("Code", 1, InitialChunks),
@@ -62,27 +90,66 @@ compile(Target, Dir, Path) ->
             end,
         TypeResolver = type_resolver(TypesChunk),
 
-        Stream0 = jit_stream_binary:new(0),
-        <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> =
-            CodeChunk,
+        % Parse line table (Line chunk) for DWARF line information
+        LineResolver =
+            case lists:keyfind("Line", 1, InitialChunks) of
+                {"Line", LineTable} ->
+                    fun(LineRef) -> resolve_line_info(Module, LineTable, LineRef) end;
+                false ->
+                    io:format("LineResolver -- Line chunk not found\n"),
+                    % No line table - return false
+                    fun(_LineRef) -> false end
+            end,
+
+        % Parse target to extract arch and variant
+        {BaseTarget, RequestedVariant} = parse_target(Target),
+        Backend = list_to_atom("jit_" ++ BaseTarget),
 
         Arch =
-            case Target of
+            case BaseTarget of
                 "x86_64" -> ?JIT_ARCH_X86_64;
                 "aarch64" -> ?JIT_ARCH_AARCH64;
+                "armv6m" -> ?JIT_ARCH_ARMV6M;
+                "riscv32" -> ?JIT_ARCH_RISCV32;
                 _ -> error({unsupported_target, Target})
             end,
 
-        Stream1 = jit_stream_binary:append(
-            Stream0, jit:beam_chunk_header(LabelsCount, Arch, ?JIT_VARIANT_PIC)
-        ),
-        Backend = list_to_atom("jit_" ++ Target),
-        Stream2 = Backend:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1),
+        <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> =
+            CodeChunk,
+        <<InfoSize:32, Info:InfoSize/binary>> = jit:beam_chunk_header(LabelsCount, Arch, RequestedVariant),
+
+        Stream2 = case Dwarf of
+            true ->
+                Stream0 = jit_dwarf:new(Backend, Module, jit_stream_binary, 0, LineResolver),
+                Backend:new(RequestedVariant, jit_dwarf, Stream0);
+            false ->
+                Backend:new(RequestedVariant, jit_stream_binary, <<InfoSize:32, Info:InfoSize/binary>>)
+        end,
+
         {LabelsCount, Stream3} = jit:compile(
             CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2
         ),
-        NativeCode = Backend:stream(Stream3),
-        UpdatedChunks = FilteredChunks ++ [{"avmN", NativeCode}],
+
+        NewChunks =
+            case Dwarf of
+                true ->
+                    DwarfStream = Backend:stream(Stream3),
+                    NativeCode = jit_dwarf:stream(DwarfStream),
+
+                    case jit_dwarf:elf(DwarfStream, NativeCode) of
+                        false ->
+                            % No debug info - just store native code with info header
+                            [{"avmN", <<InfoSize:32, Info:InfoSize/binary, NativeCode/binary>>}];
+                        {ok, TextSectionOffset, ELF} ->
+                            % Update BEAM chunk header structure and combine with ELF.
+                            EmbeddedElfChunk = update_avmn_chunk_with_elf(Info, ELF, TextSectionOffset),
+                            [{"avmN", EmbeddedElfChunk}]
+                    end;
+                false ->
+                    [{"avmN", Backend:stream(Stream3)}]
+        end,
+
+        UpdatedChunks = FilteredChunks ++ NewChunks,
         {ok, Binary} = beam_lib:build_module(UpdatedChunks),
         Basename = filename:basename(Path),
         UpdatedFile = filename:join(Dir, Basename),
@@ -224,3 +291,133 @@ parse_extra(0, 0, 1, <<Value:8/unsigned, Rest/binary>>, LowerBound, UpperBound,
     parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Value + 1);
 parse_extra(0, 0, 0, Rest, LowerBound, UpperBound, Unit) ->
     {Rest, LowerBound, UpperBound, Unit}.
+
+%% @doc Update existing Info by updating offset
+update_avmn_chunk_with_elf(Info, ElfBinary, TextSectionOffset) ->
+    % Parse Info to update the offset: LabelsCount + Version + ArchCount + NativeCodeArch
+    <<LabelsCount:32, Version:16, ArchCount:16, Arch:16, Variant:16, _OldOffset:32>> = Info,
+
+    % Calculate new offset: from start of ELF to .text section
+    NewOffset = TextSectionOffset,
+
+    % Create updated Info with new offset
+    UpdatedInfo = <<LabelsCount:32, Version:16, ArchCount:16, Arch:16, Variant:16, NewOffset:32>>,
+
+    % Build updated chunk: InfoSize + UpdatedInfo + ELF
+    <<(byte_size(UpdatedInfo)):32, UpdatedInfo/binary, ElfBinary/binary>>.
+
+%% @doc Resolve a line reference to filename and line number
+resolve_line_info(
+    Module,
+    <<Version:32, _Flags:32, _NumInstr:32, NumRefs:32, _NumFilenames:32, Rest/binary>>,
+    LineRef
+) when Version =:= 0, LineRef > 0, LineRef =< NumRefs ->
+    resolve_line_info0(Module, 1, 0, LineRef, NumRefs, Rest, false);
+resolve_line_info(_Module, <<Version:32, _/binary>>, _) when Version =/= 0 ->
+    io:format("resolve_line_info -- unknown Line table version (~p)\n", [Version]),
+    false;
+resolve_line_info(
+    _Module,
+    <<_Version:32, _Flags:32, _NumInstr:32, _NumRefs:32, _NumFilenames:32, _Rest/binary>>,
+    0
+) ->
+    false;
+resolve_line_info(
+    _Module,
+    <<_Version:32, _Flags:32, _NumInstr:32, NumRefs:32, _NumFilenames:32, _Rest/binary>>,
+    LineRef
+) ->
+    io:format("resolve_line_info -- invalid lineref (~p) (NumRefs = ~p)\n", [LineRef, NumRefs]),
+    false.
+
+resolve_line_info0(
+    Module, CurrentLineRef, _CurrentLocationIx, _LineRef, NumRefs, LocationData, {Line, LocationIx}
+) when CurrentLineRef > NumRefs ->
+    resolve_line_info1(Module, LocationIx, LocationData, Line);
+resolve_line_info0(
+    Module,
+    LineRef,
+    CurrentLocationIx,
+    LineRef,
+    NumRefs,
+    <<_:4, ?COMPACT_INTEGER:4, _/binary>> = Bin,
+    false
+) ->
+    {Line, Rest} = jit:decode_value64(Bin),
+    resolve_line_info0(
+        Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx}
+    );
+resolve_line_info0(
+    Module,
+    CurrentLineRef,
+    CurrentLocationIx,
+    LineRef,
+    NumRefs,
+    <<_:4, ?COMPACT_INTEGER:4, _/binary>> = Bin,
+    Acc
+) ->
+    {_Line, Rest} = jit:decode_value64(Bin),
+    resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc);
+resolve_line_info0(
+    Module,
+    LineRef,
+    CurrentLocationIx,
+    LineRef,
+    NumRefs,
+    <<Val:3, ?COMPACT_LARGE_INTEGER_11BITS:5, NextByte, Rest/binary>>,
+    false
+) ->
+    Line = (Val bsl 8) bor NextByte,
+    resolve_line_info0(
+        Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx}
+    );
+resolve_line_info0(
+    Module,
+    CurrentLineRef,
+    CurrentLocationIx,
+    LineRef,
+    NumRefs,
+    <<_Val:3, ?COMPACT_LARGE_INTEGER_11BITS:5, _NextByte, Rest/binary>>,
+    Acc
+) ->
+    resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc);
+resolve_line_info0(
+    Module,
+    LineRef,
+    CurrentLocationIx,
+    LineRef,
+    NumRefs,
+    <<Size0:3, ?COMPACT_LARGE_INTEGER_NBITS:5, Line:(8 * (Size0 + 2))/signed, Rest/binary>>,
+    false
+) ->
+    resolve_line_info0(
+        Module, LineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, {Line, CurrentLocationIx}
+    );
+resolve_line_info0(
+    Module,
+    CurrentLineRef,
+    CurrentLocationIx,
+    LineRef,
+    NumRefs,
+    <<Size0:3, ?COMPACT_LARGE_INTEGER_NBITS:5, _:(8 * (Size0 + 2))/signed, Rest/binary>>,
+    Acc
+) ->
+    resolve_line_info0(Module, CurrentLineRef + 1, CurrentLocationIx, LineRef, NumRefs, Rest, Acc);
+resolve_line_info0(
+    Module,
+    CurrentLineRef,
+    _CurrentLocationIx,
+    LineRef,
+    NumRefs,
+    <<_:4, AtomTag:4, _/binary>> = Bin,
+    Acc
+) when AtomTag =:= ?COMPACT_LARGE_ATOM; AtomTag =:= ?COMPACT_ATOM ->
+    {NewLocationIx, Rest} = jit:decode_value64(Bin),
+    resolve_line_info0(Module, CurrentLineRef, NewLocationIx, LineRef, NumRefs, Rest, Acc).
+
+resolve_line_info1(Module, 0, _LocationData, Line) ->
+    {ok, <<(atom_to_binary(Module, utf8))/binary, ".erl">>, Line};
+resolve_line_info1(_Module, 1, <<Size:16, Filename:Size/binary, _/binary>>, Line) ->
+    {ok, Filename, Line};
+resolve_line_info1(Module, N, <<Size:16, _:Size/binary, Rest/binary>>, Line) ->
+    resolve_line_info1(Module, N - 1, Rest, Line).
diff --git a/libs/jit/src/jit_riscv32.erl b/libs/jit/src/jit_riscv32.erl
new file mode 100644
index 0000000000..f27bc35e40
--- /dev/null
+++ b/libs/jit/src/jit_riscv32.erl
@@ -0,0 +1,3075 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32).
+
+-export([
+    word_size/0,
+    new/3,
+    stream/1,
+    offset/1,
+    flush/1,
+    debugger/1,
+    used_regs/1,
+    available_regs/1,
+    free_native_registers/2,
+    assert_all_native_free/1,
+    jump_table/2,
+    update_branches/1,
+    call_primitive/3,
+    call_primitive_last/3,
+    call_primitive_with_cp/3,
+    return_if_not_equal_to_ctx/2,
+    jump_to_label/2,
+    jump_to_continuation/2,
+    jump_to_offset/2,
+    if_block/3,
+    if_else_block/4,
+    shift_right/3,
+    shift_left/3,
+    move_to_vm_register/3,
+    move_to_native_register/2,
+    move_to_native_register/3,
+    move_to_cp/2,
+    move_array_element/4,
+    move_to_array_element/4,
+    move_to_array_element/5,
+    set_bs/2,
+    copy_to_native_register/2,
+    get_array_element/3,
+    increment_sp/2,
+    set_continuation_to_label/2,
+    set_continuation_to_offset/1,
+    continuation_entry_point/1,
+    get_module_index/1,
+    and_/3,
+    or_/3,
+    add/3,
+    sub/3,
+    mul/3,
+    decrement_reductions_and_maybe_schedule_next/1,
+    call_or_schedule_next/2,
+    call_only_or_schedule_next/2,
+    call_func_ptr/3,
+    return_labels_and_lines/2,
+    add_label/2,
+    add_label/3
+]).
+
+-ifdef(JIT_DWARF).
+-export([
+    dwarf_opcode/2,
+    dwarf_label/2,
+    dwarf_function/3,
+    dwarf_line/2,
+    dwarf_ctx_register/0
+]).
+-endif.
+
+-compile([warnings_as_errors]).
+
+-include_lib("jit.hrl").
+
+-include("primitives.hrl").
+
+-ifdef(JIT_DWARF).
+-include("jit_dwarf.hrl").
+-endif.
+
+-define(ASSERT(Expr), true = Expr).
+
+%% RISC-V32 ILP32 ABI: a0-a7 are used for argument passing (8 registers).
+%% a0-a1 are used for return values (a0 for 32-bit, a0-a1 for 64-bit returns).
+%% s0-s11 are callee-saved registers (must be preserved across calls).
+%% t0-t6 are caller-saved temporary registers.
+%% sp is the stack pointer.
+%% ra is the return address register.
+%% zero (x0) is hardwired to constant 0.
+%% This implementation uses RV32IMC (base + multiply/compressed extensions).
+%%
+%% See: RISC-V Calling Convention
+%% https://riscv.org/wp-content/uploads/2024/12/riscv-calling.pdf
+%%
+%% Registers used by the JIT backend (RISC-V32):
+%%   - Argument/return: a0-a7 (up to 8 args in registers)
+%%   - Callee-saved: s0-s11 (must preserve)
+%%   - Temporaries: t0-t6 (caller-saved)
+%%   - Stack pointer: sp
+%%   - Return address: ra
+%%   - Zero register: zero (always 0)
+%%   - Available for JIT scratch: t0-t6 (7 temp registers)
+%%
+%% Note: RISC-V32 instructions are fixed 32-bit with uniform encoding,
+%% allowing access to all 32 registers.
+%%
+%% For more details, refer to the RISC-V ILP32 Procedure Call Standard.
+
+-type riscv32_register() ::
+    a0
+    | a1
+    | a2
+    | a3
+    | a4
+    | a5
+    | a6
+    | a7
+    | t0
+    | t1
+    | t2
+    | t3
+    | t4
+    | t5
+    | t6
+    | s0
+    | s1
+    | s2
+    | s3
+    | s4
+    | s5
+    | s6
+    | s7
+    | s8
+    | s9
+    | s10
+    | s11
+    | sp
+    | ra.
+
+-define(IS_GPR(Reg),
+    (Reg =:= a0 orelse Reg =:= a1 orelse Reg =:= a2 orelse Reg =:= a3 orelse Reg =:= a4 orelse
+        Reg =:= a5 orelse Reg =:= a6 orelse Reg =:= a7 orelse Reg =:= t0 orelse Reg =:= t1 orelse
+        Reg =:= t2 orelse Reg =:= t3 orelse Reg =:= t4 orelse Reg =:= t5 orelse Reg =:= t6 orelse
+        Reg =:= s0 orelse Reg =:= s1 orelse Reg =:= s2 orelse Reg =:= s3 orelse Reg =:= s4 orelse
+        Reg =:= s5 orelse Reg =:= s6 orelse Reg =:= s7 orelse Reg =:= s8 orelse Reg =:= s9 orelse
+        Reg =:= s10 orelse Reg =:= s11 orelse Reg =:= sp orelse Reg =:= ra)
+).
+
+-type stream() :: any().
+
+-record(state, {
+    stream_module :: module(),
+    stream :: stream(),
+    offset :: non_neg_integer(),
+    branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
+    available_regs :: [riscv32_register()],
+    used_regs :: [riscv32_register()],
+    labels :: [{integer() | reference(), integer()}],
+    variant :: non_neg_integer()
+}).
+
+-type state() :: #state{}.
+-type immediate() :: non_neg_integer().
+-type vm_register() ::
+    {x_reg, non_neg_integer()} | {y_reg, non_neg_integer()} | {ptr, riscv32_register()}.
+-type value() :: immediate() | vm_register() | riscv32_register() | {ptr, riscv32_register()}.
+-type arg() :: ctx | jit_state | offset | value() | {free, value()} | {avm_int64_t, integer()}.
+
+-type maybe_free_riscv32_register() ::
+    {free, riscv32_register()} | riscv32_register().
+
+-type condition() ::
+    {riscv32_register(), '<', integer()}
+    | {maybe_free_riscv32_register(), '<', riscv32_register()}
+    | {maybe_free_riscv32_register(), '==', integer()}
+    | {maybe_free_riscv32_register(), '!=', riscv32_register() | integer()}
+    | {'(int)', maybe_free_riscv32_register(), '==', integer()}
+    | {'(int)', maybe_free_riscv32_register(), '!=', riscv32_register() | integer()}
+    | {'(bool)', maybe_free_riscv32_register(), '==', false}
+    | {'(bool)', maybe_free_riscv32_register(), '!=', false}
+    | {maybe_free_riscv32_register(), '&', non_neg_integer(), '!=', integer()}
+    | {{free, riscv32_register()}, '==', {free, riscv32_register()}}.
+
+% Context offsets (32-bit architecture)
+% ctx->e is 0x14
+% ctx->x is 0x18
+-define(CTX_REG, a0).
+-define(NATIVE_INTERFACE_REG, a2).
+-define(Y_REGS, {?CTX_REG, 16#14}).
+-define(X_REG(N), {?CTX_REG, 16#18 + (N * 4)}).
+-define(CP, {?CTX_REG, 16#5C}).
+-define(FP_REGS, {?CTX_REG, 16#60}).
+-define(BS, {?CTX_REG, 16#64}).
+-define(BS_OFFSET, {?CTX_REG, 16#68}).
+% JITSTATE is in a1 register (no prolog, following aarch64 model)
+-define(JITSTATE_REG, a1).
+% Return address register (like LR in AArch64)
+-define(RA_REG, ra).
+-define(JITSTATE_MODULE_OFFSET, 0).
+-define(JITSTATE_CONTINUATION_OFFSET, 16#4).
+-define(JITSTATE_REDUCTIONCOUNT_OFFSET, 16#8).
+-define(PRIMITIVE(N), {?NATIVE_INTERFACE_REG, N * 4}).
+-define(MODULE_INDEX(ModuleReg), {ModuleReg, 0}).
+
+-define(JUMP_TABLE_ENTRY_SIZE, 8).
+
+%% RISC-V32 register mappings
+
+%% Use t3 as temporary for some operations
+-define(IP_REG, t3).
+
+-define(IS_SINT8_T(X), is_integer(X) andalso X >= -128 andalso X =< 127).
+-define(IS_SINT32_T(X), is_integer(X) andalso X >= -16#80000000 andalso X < 16#80000000).
+-define(IS_UINT8_T(X), is_integer(X) andalso X >= 0 andalso X =< 255).
+-define(IS_UINT32_T(X), is_integer(X) andalso X >= 0 andalso X < 16#100000000).
+-define(IS_SIGNED_OR_UNSIGNED_INT32_T(X),
+    is_integer(X) andalso X >= -16#80000000 andalso X < 16#100000000
+).
+
+%% RISC-V32 ILP32 ABI register allocation:
+%% - a0: context pointer (reserved, passed as first parameter)
+%% - a1-a5: available for parameters to native functions (up to 6 params)
+%% - a2: native interface pointer (reserved)
+%% - t0-t6: temporaries, caller-saved, available for JIT use
+%% - s0-s11: callee-saved (would need to be saved/restored)
+-define(AVAILABLE_REGS, [t6, t5, t4, t3, t2, t1, t0]).
+-define(PARAMETER_REGS, [a0, a1, a2, a3, a4, a5, a6, a7]).
+-define(SCRATCH_REGS, [t6, t5, t4, t2, t1, t0]).
+
+-include("jit_backend_dwarf_impl.hrl").
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the word size in bytes, i.e. the sizeof(term) i.e.
+%% sizeof(uintptr_t)
+%%
+%% C code equivalent is:
+%% #if UINTPTR_MAX == UINT32_MAX
+%%    #define TERM_BYTES 4
+%% #elif UINTPTR_MAX == UINT64_MAX
+%%    #define TERM_BYTES 8
+%% #else
+%%    #error "Term size must be either 32 bit or 64 bit."
+%% #endif
+%%
+%% @end
+%% @return Word size in bytes
+%%-----------------------------------------------------------------------------
+-spec word_size() -> 4 | 8.
+word_size() -> 4.
+
+%%-----------------------------------------------------------------------------
+%% @doc Create a new backend state for provided variant, module and stream.
+%% @end
+%% @param Variant JIT variant to use (currently ?JIT_VARIANT_PIC)
+%% @param StreamModule module to stream instructions
+%% @param Stream stream state
+%% @return New backend state
+%%-----------------------------------------------------------------------------
+-spec new(any(), module(), stream()) -> state().
+new(Variant, StreamModule, Stream) ->
+    #state{
+        stream_module = StreamModule,
+        stream = Stream,
+        branches = [],
+        offset = StreamModule:offset(Stream),
+        available_regs = ?AVAILABLE_REGS,
+        used_regs = [],
+        labels = [],
+        variant = Variant
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Access the stream object.
+%% @end
+%% @param State current backend state
+%% @return The stream object
+%%-----------------------------------------------------------------------------
+-spec stream(state()) -> stream().
+stream(#state{stream = Stream}) ->
+    Stream.
+
+%%-----------------------------------------------------------------------------
+%% @doc Get the current offset in the stream
+%% @end
+%% @param State current backend state
+%% @return The current offset
+%%-----------------------------------------------------------------------------
+-spec offset(state()) -> non_neg_integer().
+offset(#state{stream_module = StreamModule, stream = Stream}) ->
+    StreamModule:offset(Stream).
+
+%%-----------------------------------------------------------------------------
+%% @doc Flush the stream.
+%% @end
+%% @param State current backend state
+%% @return The new state
+%%-----------------------------------------------------------------------------
+-spec flush(state()) -> stream().
+flush(#state{stream_module = StreamModule, stream = Stream0} = State) ->
+    Stream1 = StreamModule:flush(Stream0),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a debugger of breakpoint instruction. This is used for debugging
+%% and not in production.
+%% @end
+%% @param State current backend state
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec debugger(state()) -> state().
+debugger(#state{stream_module = StreamModule, stream = Stream0} = State) ->
+    Stream1 = StreamModule:append(Stream0, jit_riscv32_asm:c_ebreak()),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently used native registers. This is used for
+%% debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of used registers
+%%-----------------------------------------------------------------------------
+-spec used_regs(state()) -> [riscv32_register()].
+used_regs(#state{used_regs = Used}) -> Used.
+
+%%-----------------------------------------------------------------------------
+%% @doc Return the list of currently available native scratch registers. This
+%% is used for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return The list of available registers
+%%-----------------------------------------------------------------------------
+-spec available_regs(state()) -> [riscv32_register()].
+available_regs(#state{available_regs = Available}) -> Available.
+
+%%-----------------------------------------------------------------------------
+%% @doc Free native registers. The passed list of registers can contain
+%% registers, pointer to registers or other values that are ignored.
+%% @end
+%% @param State current backend state
+%% @param Regs list of registers or other values
+%% @return The updated backend state
+%%-----------------------------------------------------------------------------
+-spec free_native_registers(state(), [value()]) -> state().
+free_native_registers(State, []) ->
+    State;
+free_native_registers(State, [Reg | Rest]) ->
+    State1 = free_native_register(State, Reg),
+    free_native_registers(State1, Rest).
+
+-spec free_native_register(state(), value()) -> state().
+free_native_register(
+    #state{available_regs = Available0, used_regs = Used0} = State,
+    Reg
+) when
+    is_atom(Reg)
+->
+    {Available1, Used1} = free_reg(Available0, Used0, Reg),
+    State#state{available_regs = Available1, used_regs = Used1};
+free_native_register(State, {ptr, Reg}) ->
+    free_native_register(State, Reg);
+free_native_register(State, _Other) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Assert that all native scratch registers are available. This is used
+%% for debugging and not in production.
+%% @end
+%% @param State current backend state
+%% @return ok
+%%-----------------------------------------------------------------------------
+-spec assert_all_native_free(state()) -> ok.
+assert_all_native_free(#state{
+    available_regs = ?AVAILABLE_REGS, used_regs = []
+}) ->
+    ok.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit the jump table at the beginning of the module. Branches will be
+%% updated afterwards with update_branches/2. Emit branches for labels from
+%% 0 (special entry for lines and labels information) to LabelsCount included
+%% (special entry for OP_INT_CALL_END).
+%%
+%% On this platform, each jump table entry is 12 bytes.
+%% ```
+%% ldr a3, pc+4
+%% push {a1, r4, r5, r6, r7, lr}
+%% add pc, pc, a3
+%% nop()
+%% offset_to_label0
+%% ```
+%%
+%% @end
+%% @param State current backend state
+%% @param LabelsCount number of labels in the module.
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec jump_table(state(), pos_integer()) -> state().
+jump_table(State, LabelsCount) ->
+    jump_table0(State, 0, LabelsCount).
+
+jump_table0(State, N, LabelsCount) when N > LabelsCount ->
+    State;
+jump_table0(
+    #state{stream_module = StreamModule, stream = Stream0, branches = Branches} = State,
+    N,
+    LabelsCount
+) ->
+    % Create jump table entry: AUIPC + JALR (8 bytes total)
+    % This will be patched later in update_branches/2
+    Offset = StreamModule:offset(Stream0),
+    JumpEntry = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>,
+    Stream1 = StreamModule:append(Stream0, JumpEntry),
+
+    % Record both AUIPC and JALR offsets for patching
+    Reloc = {N, Offset, jump_table_auipc_jalr},
+    UpdatedState = State#state{stream = Stream1, branches = [Reloc | Branches]},
+
+    jump_table0(UpdatedState, N + 1, LabelsCount).
+
+%%-----------------------------------------------------------------------------
+%% @doc Rewrite stream to update all branches for labels.
+%% @end
+%% @param State current backend state
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec update_branches(state()) -> state().
+update_branches(#state{branches = []} = State) ->
+    State;
+update_branches(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        branches = [{Label, Offset, Type} | BranchesT],
+        labels = Labels
+    } = State
+) ->
+    {Label, LabelOffset} = lists:keyfind(Label, 1, Labels),
+    Rel = LabelOffset - Offset,
+    NewInstr =
+        case Type of
+            {adr, Reg} when Rel rem 4 =:= 0 ->
+                % Generate pc_relative_address and pad to 8 bytes with NOP
+                I = pc_relative_address(Reg, Rel),
+                case byte_size(I) of
+                    4 -> <<I/binary, (jit_riscv32_asm:nop())/binary>>;
+                    6 -> <<I/binary, (jit_riscv32_asm:c_nop())/binary>>;
+                    8 -> I
+                end;
+            {adr, Reg} when Rel rem 4 =:= 2; Rel rem 4 =:= -2 ->
+                % Handle 2-byte aligned offsets and pad to 8 bytes
+                % Handle both positive and negative offsets (Erlang rem can be negative)
+                I = pc_relative_address(Reg, Rel),
+                case byte_size(I) of
+                    4 -> <<I/binary, (jit_riscv32_asm:nop())/binary>>;
+                    6 -> <<I/binary, (jit_riscv32_asm:c_nop())/binary>>;
+                    8 -> I
+                end;
+            {far_branch, TempReg} ->
+                % Check if branch can now be optimized to near branch
+                if
+                    Rel >= -1048576 andalso Rel =< 1048574 andalso (Rel rem 2) =:= 0 ->
+                        % RISC-V jal has ±1MB range
+                        % Optimize to near branch: jal + nops to fill original size
+                        DirectBranch = jit_riscv32_asm:jal(zero, Rel),
+                        case byte_size(DirectBranch) of
+                            2 ->
+                                <<DirectBranch/binary, (jit_riscv32_asm:c_nop())/binary,
+                                    (jit_riscv32_asm:nop())/binary>>;
+                            4 ->
+                                <<DirectBranch/binary, (jit_riscv32_asm:nop())/binary>>
+                        end;
+                    true ->
+                        % Keep far branch sequence: auipc + jalr (PC-relative, 8 bytes)
+                        % Split the relative offset into upper 20 bits and lower 12 bits
+                        Hi20 = (Rel + 16#800) bsr 12,
+                        Lo12 = Rel - (Hi20 bsl 12),
+                        I1 = jit_riscv32_asm:auipc(TempReg, Hi20),
+                        I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12),
+                        Entry = <<I1/binary, I2/binary>>,
+                        case byte_size(Entry) of
+                            6 -> <<Entry/binary, (jit_riscv32_asm:c_nop())/binary>>;
+                            8 -> Entry
+                        end
+                end;
+            jump_table_auipc_jalr ->
+                % Calculate PC-relative offset from AUIPC instruction to target
+                % AUIPC is at Offset, JALR is at Offset+4
+                % Target is at LabelOffset
+                % Offset from AUIPC PC to target
+                PCRelOffset = LabelOffset - Offset,
+
+                % Split into upper 20 bits and lower 12 bits
+                % RISC-V encodes: target = PC + (upper20 << 12) + sign_ext(lower12)
+                % If lower12 >= 0x800, it's negative when sign-extended, so add 1 to upper
+                Upper20 = (PCRelOffset + 16#800) bsr 12,
+                Lower12 = PCRelOffset band 16#FFF,
+                % Sign-extend lower 12 bits for JALR immediate
+                Lower12Signed =
+                    if
+                        Lower12 >= 16#800 -> Lower12 - 16#1000;
+                        true -> Lower12
+                    end,
+
+                % Encode AUIPC and JALR with computed offsets
+                I1 = jit_riscv32_asm:auipc(a3, Upper20),
+                I2 = jit_riscv32_asm:jalr(zero, a3, Lower12Signed),
+                % Map to 8 bytes
+                JumpTableEntry = <<I1/binary, I2/binary>>,
+                case byte_size(JumpTableEntry) of
+                    6 -> <<JumpTableEntry/binary, (jit_riscv32_asm:c_nop())/binary>>;
+                    8 -> JumpTableEntry
+                end
+        end,
+    Stream1 = StreamModule:replace(Stream0, Offset, NewInstr),
+    update_branches(State#state{stream = Stream1, branches = BranchesT}).
+
+%%-----------------------------------------------------------------------------
+%% @doc Generate code to load a primitive function pointer into a register
+%% @param Primitive index to the primitive to call
+%% @param TargetReg register to load the function pointer into
+%% @return Binary instruction sequence
+%%-----------------------------------------------------------------------------
+-spec load_primitive_ptr(non_neg_integer(), riscv32_register()) -> binary().
+load_primitive_ptr(Primitive, TargetReg) ->
+    case Primitive of
+        0 ->
+            jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, 0);
+        N when N * 4 =< 124 ->
+            jit_riscv32_asm:lw(TargetReg, ?NATIVE_INTERFACE_REG, N * 4);
+        N when N * 4 < 256 ->
+            % Can encode N * 4 directly in li instruction
+            I1 = jit_riscv32_asm:li(TargetReg, N * 4),
+            I2 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG),
+            I3 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0),
+            <<I1/binary, I2/binary, I3/binary>>;
+        N ->
+            % For very large primitive numbers, load N and shift left by 2 (multiply by 4)
+            I1 = jit_riscv32_asm:li(TargetReg, N),
+            I2 = jit_riscv32_asm:slli(TargetReg, TargetReg, 2),
+            I3 = jit_riscv32_asm:add(TargetReg, TargetReg, ?NATIVE_INTERFACE_REG),
+            I4 = jit_riscv32_asm:lw(TargetReg, TargetReg, 0),
+            <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    end.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call (call with return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention. It also saves scratch registers we need to preserve.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec call_primitive(state(), non_neg_integer(), [arg()]) -> {state(), riscv32_register()}.
+call_primitive(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [TempReg | RestRegs],
+        used_regs = UsedRegs
+    } = State,
+    Primitive,
+    Args
+) ->
+    % Use a low register for LDR since ARM Thumb LDR only works with low registers
+    PrepCall = load_primitive_ptr(Primitive, TempReg),
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+    StateCall = State#state{
+        stream = Stream1,
+        available_regs = RestRegs,
+        used_regs = [TempReg | UsedRegs]
+    },
+    call_func_ptr(StateCall, {free, TempReg}, Args);
+call_primitive(
+    #state{available_regs = []} = State,
+    Primitive,
+    Args
+) ->
+    call_func_ptr(State, {primitive, Primitive}, Args).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump (call without return) to a primitive with arguments. This
+%% function converts arguments and pass them following the backend ABI
+%% convention.
+%% @end
+%% @param State current backend state
+%% @param Primitive index to the primitive to call
+%% @param Args arguments to pass to the primitive
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+call_primitive_last(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    Primitive,
+    Args
+) ->
+    % We need a register for the function pointer that should not be used as a parameter
+    % Since we're not returning, we can use all scratch registers except
+    % registers used for parameters
+    ParamRegs = lists:sublist(?PARAMETER_REGS, length(Args)),
+    ArgsRegs = args_regs(Args),
+    ScratchRegs = ?AVAILABLE_REGS -- ArgsRegs -- ParamRegs,
+    [Temp | AvailableRegs1] = ScratchRegs,
+    UsedRegs = ?AVAILABLE_REGS -- AvailableRegs1,
+    PrepCall = load_primitive_ptr(Primitive, Temp),
+    Stream1 = StreamModule:append(Stream0, PrepCall),
+
+    State1 = State0#state{
+        stream = Stream1, available_regs = AvailableRegs1, used_regs = UsedRegs
+    },
+
+    % Preprocess offset special arg
+    Args1 = lists:map(
+        fun(Arg) ->
+            case Arg of
+                offset -> StreamModule:offset(Stream1);
+                _ -> Arg
+            end
+        end,
+        Args
+    ),
+
+    % In RISC-V, all up to 8 arguments fit in registers (a0-a7)
+    % Always use tail call when calling primitives in tail position
+    State4 =
+        case Args1 of
+            [FirstArg, jit_state | ArgsT] ->
+                % Use tail call
+                ArgsForTailCall = [FirstArg, jit_state_tail_call | ArgsT],
+                State2 = set_registers_args(State1, ArgsForTailCall, 0),
+                tail_call_with_jit_state_registers_only(State2, Temp)
+        end,
+    State4#state{available_regs = ?AVAILABLE_REGS, used_regs = []}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Tail call to address in register.
+%% RA is preserved across regular calls (call_func_ptr saves/restores it),
+%% so when the called C primitive returns, it returns to opcodesswitch.h.
+%% @end
+%% @param State current backend state
+%% @param Reg register containing the target address
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+tail_call_with_jit_state_registers_only(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    Reg
+) ->
+    % Jump to address in register (tail call)
+    I1 = jit_riscv32_asm:jr(Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a return of a value if it's not equal to ctx.
+%% This logic is used to break out to the scheduler, typically after signal
+%% messages have been processed.
+%% @end
+%% @param State current backend state
+%% @param Reg register to compare to (should be {free, Reg} as it's always freed)
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+return_if_not_equal_to_ctx(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    {free, Reg}
+) ->
+    % RISC-V doesn't have a separate cmp instruction, use beq directly
+    I2 =
+        case Reg of
+            % Return value is already in a0
+            a0 -> <<>>;
+            % Move to a0 (return register)
+            _ -> jit_riscv32_asm:mv(a0, Reg)
+        end,
+    I3 = jit_riscv32_asm:ret(),
+    % Branch if equal (skip the return)
+    % Offset must account for the beq instruction itself (4 bytes) plus I2 and I3
+    I1 = jit_riscv32_asm:beq(Reg, ?CTX_REG, 4 + byte_size(I2) + byte_size(I3)),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, Reg
+    ),
+    State#state{
+        stream = Stream1,
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1
+    }.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a jump to a label. The offset of the relocation is saved and will
+%% be updated with `update_branches/2`.
+%% @end
+%% @param State current backend state
+%% @param Label to jump to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_label(
+    #state{stream_module = StreamModule, stream = Stream0, labels = Labels} = State0, Label
+) ->
+    LabelLookupResult = lists:keyfind(Label, 1, Labels),
+    Offset = StreamModule:offset(Stream0),
+    {State1, CodeBlock} = branch_to_label_code(State0, Offset, Label, LabelLookupResult),
+    Stream1 = StreamModule:append(Stream0, CodeBlock),
+    State1#state{stream = Stream1}.
+
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    CodeBlock = branch_to_offset_code(State, Offset, TargetOffset),
+    Stream1 = StreamModule:append(Stream0, CodeBlock),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Jump to address in continuation pointer register
+%% Calculate absolute address and jump to it.
+%% @end
+%% @param State current backend state
+%% @param {free, OffsetReg} register containing the offset value
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_continuation(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        offset = BaseOffset
+    } = State0,
+    {free, OffsetReg}
+) ->
+    % Calculate absolute address: native_code_base + target_offset
+    % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset)
+    CurrentStreamOffset = StreamModule:offset(Stream0),
+    NetOffset = BaseOffset - CurrentStreamOffset,
+
+    % Get native code base address into temporary register
+    I1 = pc_relative_address(Temp, NetOffset),
+    % Add target offset to get final absolute address
+    I2 = jit_riscv32_asm:add(Temp, Temp, OffsetReg),
+    % Indirect branch to the calculated absolute address
+    I3 = jit_riscv32_asm:jr(Temp),
+
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    % Free all registers since this is a tail jump
+    State0#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}.
+
+branch_to_offset_code(_State, Offset, TargetOffset) when
+    TargetOffset - Offset =< 2050, TargetOffset - Offset >= -2044
+->
+    % Near branch: use direct J instruction
+    Rel = TargetOffset - Offset,
+    jit_riscv32_asm:j(Rel);
+branch_to_offset_code(
+    #state{available_regs = [TempReg | _]}, Offset, TargetOffset
+) ->
+    % Far branch: use auipc + jalr sequence for PC-relative addressing
+    % This computes: PC + Immediate and jumps to it
+
+    Rel = TargetOffset - Offset,
+    % Split the relative offset into upper 20 bits and lower 12 bits
+    % RISC-V PC-relative addressing: target = PC + (imm20 << 12) + sign_extend(imm12)
+    % Since jalr's imm12 is sign-extended, if bit 11 of Rel is set,
+    % we need to add 0x800 before splitting to compensate
+    Hi20 = (Rel + 16#800) bsr 12,
+    Lo12Unsigned = Rel band 16#FFF,
+    % Convert to signed 12-bit value: if bit 11 is set, subtract 4096
+    Lo12 =
+        if
+            Lo12Unsigned >= 16#800 -> Lo12Unsigned - 16#1000;
+            true -> Lo12Unsigned
+        end,
+
+    % TempReg = PC + (Hi20 << 12)
+    I1 = jit_riscv32_asm:auipc(TempReg, Hi20),
+    % Jump to TempReg + sign_extend(Lo12)
+    I2 = jit_riscv32_asm:jalr(zero, TempReg, Lo12),
+    <<I1/binary, I2/binary>>.
+
+branch_to_label_code(State, Offset, Label, {Label, LabelOffset}) ->
+    CodeBlock = branch_to_offset_code(State, Offset, LabelOffset),
+    {State, CodeBlock};
+branch_to_label_code(
+    #state{available_regs = [TempReg | _], branches = Branches} = State0, Offset, Label, false
+) ->
+    % RISC-V: Far branch sequence using PC-relative auipc + jalr (8 bytes)
+
+    % Placeholder: auipc TempReg, 0
+    % Placeholder: jalr zero, TempReg, 0
+    CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>,
+    % Add relocation entry
+    Reloc = {Label, Offset, {far_branch, TempReg}},
+    State1 = State0#state{branches = [Reloc | Branches]},
+    {State1, CodeBlock};
+branch_to_label_code(
+    #state{available_regs = [], branches = Branches} = State0, Offset, Label, false
+) ->
+    % RISC-V: Use t6 as scratch (caller-saved, safe to clobber)
+    % Far branch sequence using PC-relative auipc + jalr (8 bytes)
+
+    % Placeholder: auipc t6, 0
+    % Placeholder: jalr zero, t6, 0
+    CodeBlock = <<16#FFFFFFFF:32, 16#FFFFFFFF:32>>,
+    % Add relocation entry
+    Reloc = {Label, Offset, {far_branch, t6}},
+    State1 = State0#state{branches = [Reloc | Branches]},
+    {State1, CodeBlock};
+branch_to_label_code(#state{available_regs = []}, _Offset, _Label, _LabelLookup) ->
+    error({no_available_registers, _LabelLookup}).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if block, i.e. emit a test of a condition and conditionnally
+%% execute a block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockFn function to emit the block that may be executed
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_block(state(), condition() | {'and', [condition()]}, fun((state()) -> state())) -> state().
+if_block(
+    #state{stream_module = StreamModule} = State0,
+    {'and', CondList},
+    BlockFn
+) ->
+    {Replacements, State1} = lists:foldl(
+        fun(Cond, {AccReplacements, AccState}) ->
+            Offset = StreamModule:offset(AccState#state.stream),
+            {NewAccState, BranchInfo, ReplaceDelta} = if_block_cond(AccState, Cond),
+            {[{Offset + ReplaceDelta, BranchInfo} | AccReplacements], NewAccState}
+        end,
+        {[], State0},
+        CondList
+    ),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    Stream3 = lists:foldl(
+        fun({ReplacementOffset, {BranchFunc, Reg, Operand}}, AccStream) ->
+            BranchOffset = OffsetAfter - ReplacementOffset,
+            NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]),
+            StreamModule:replace(AccStream, ReplacementOffset, NewBranchInstr)
+        end,
+        Stream2,
+        Replacements
+    ),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs);
+if_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond),
+    State2 = BlockFn(State1),
+    Stream2 = State2#state.stream,
+    OffsetAfter = StreamModule:offset(Stream2),
+    %% Patch the conditional branch instruction to jump to the end of the block
+    BranchInstrOffset = Offset + BranchInstrDelta,
+    BranchOffset = OffsetAfter - BranchInstrOffset,
+    NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, BranchOffset]),
+    Stream3 = StreamModule:replace(Stream2, BranchInstrOffset, NewBranchInstr),
+    merge_used_regs(State2#state{stream = Stream3}, State1#state.used_regs).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit an if else block, i.e. emit a test of a condition and
+%% conditionnally execute a block or another block.
+%% @end
+%% @param State current backend state
+%% @param Cond condition to test
+%% @param BlockTrueFn function to emit the block that is executed if condition is true
+%% @param BlockFalseFn function to emit the block that is executed if condition is false
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec if_else_block(state(), condition(), fun((state()) -> state()), fun((state()) -> state())) ->
+    state().
+if_else_block(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    Cond,
+    BlockTrueFn,
+    BlockFalseFn
+) ->
+    Offset = StreamModule:offset(Stream0),
+    {State1, {BranchFunc, Reg, Operand}, BranchInstrDelta} = if_block_cond(State0, Cond),
+    BranchInstrOffset = Offset + BranchInstrDelta,
+    State2 = BlockTrueFn(State1),
+    Stream2 = State2#state.stream,
+    %% Emit unconditional branch to skip the else block (will be replaced)
+    ElseJumpOffset = StreamModule:offset(Stream2),
+    ElseJumpInstr = jit_riscv32_asm:j(0),
+    Stream3 = StreamModule:append(Stream2, ElseJumpInstr),
+    %% Else block starts here.
+    OffsetAfter = StreamModule:offset(Stream3),
+    %% Patch the conditional branch to jump to the else block
+    ElseBranchOffset = OffsetAfter - BranchInstrOffset,
+    NewBranchInstr = apply(jit_riscv32_asm, BranchFunc, [Reg, Operand, ElseBranchOffset]),
+    Stream4 = StreamModule:replace(Stream3, BranchInstrOffset, NewBranchInstr),
+    %% Build the else block
+    StateElse = State2#state{
+        stream = Stream4,
+        used_regs = State1#state.used_regs,
+        available_regs = State1#state.available_regs
+    },
+    State3 = BlockFalseFn(StateElse),
+    Stream5 = State3#state.stream,
+    OffsetFinal = StreamModule:offset(Stream5),
+    %% Patch the unconditional branch to jump to the end
+    FinalJumpOffset = OffsetFinal - ElseJumpOffset,
+    NewElseJumpInstr = jit_riscv32_asm:j(FinalJumpOffset),
+    Stream6 = StreamModule:replace(Stream5, ElseJumpOffset, NewElseJumpInstr),
+    merge_used_regs(State3#state{stream = Stream6}, State2#state.used_regs).
+
+-spec if_block_cond(state(), condition()) ->
+    {
+        state(),
+        {beq | bne | blt | bge, atom(), atom() | integer()},
+        non_neg_integer()
+    }.
+if_block_cond(#state{stream_module = StreamModule, stream = Stream0} = State0, {Reg, '<', 0}) ->
+    %% RISC-V: bge Reg, zero, offset (branch if Reg >= 0, i.e., NOT negative/NOT less than 0)
+    BranchInstr = jit_riscv32_asm:bge(Reg, zero, 0),
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = State0#state{stream = Stream1},
+    {State1, {bge, Reg, zero}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {Reg, '<', Val}
+) when is_atom(Reg), is_integer(Val), Val >= 0, Val =< 255 ->
+    % RISC-V: bge Reg, Val, offset (branch if Reg >= Val, i.e., NOT less than)
+    % Load immediate into a temp register for comparison
+    [Temp | _] = State0#state.available_regs,
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = State1#state{stream = Stream2},
+    {State2, {bge, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {Reg, '<', Val}
+) when is_atom(Reg), is_integer(Val) ->
+    % RISC-V: bge Reg, Temp, offset (branch if Reg >= Temp, i.e., NOT less than)
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = jit_riscv32_asm:bge(Reg, Temp, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = State1#state{stream = Stream2},
+    {State2, {bge, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '<', RegB}
+) when is_atom(RegB) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    % RISC-V: bge Reg, RegB, offset (branch if Reg >= RegB, i.e., NOT less than)
+    BranchInstr = jit_riscv32_asm:bge(Reg, RegB, 0),
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {bge, Reg, RegB}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0, {RegOrTuple, '==', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: bne Reg, zero, offset (branch if Reg != 0, i.e., NOT equal to 0)
+    BranchInstr = jit_riscv32_asm:bne(Reg, zero, 0),
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {bne, Reg, zero}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '==', RegB}
+) when is_atom(RegB) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: bne Reg, RegB, offset (branch if Reg != RegB, i.e., NOT equal)
+    BranchInstr = jit_riscv32_asm:bne(Reg, RegB, 0),
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {bne, Reg, RegB}, 0};
+%% Delegate (int) forms to regular forms since we only have 32-bit words
+if_block_cond(State, {'(int)', RegOrTuple, '==', 0}) ->
+    if_block_cond(State, {RegOrTuple, '==', 0});
+if_block_cond(State, {'(int)', RegOrTuple, '==', Val}) when is_integer(Val) ->
+    if_block_cond(State, {RegOrTuple, '==', Val});
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '!=', Val}
+) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Load immediate into temp, then beq Reg, Temp, offset
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {beq, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {RegOrTuple, '!=', Val}
+) when ?IS_GPR(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: beq Reg, Val, offset (branch if Reg == Val, i.e., NOT not-equal)
+    BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0),
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream1},
+    {State2, {beq, Reg, Val}, 0};
+if_block_cond(State, {'(int)', RegOrTuple, '!=', Val}) when is_integer(Val) ->
+    if_block_cond(State, {RegOrTuple, '!=', Val});
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '==', Val}
+) when is_integer(Val) andalso Val >= 0 andalso Val =< 255 ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Load immediate into temp, then bne Reg, Temp, offset
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bne, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    {{free, RegA}, '==', {free, RegB}}
+) ->
+    %% RISC-V: bne RegA, RegB, offset (branch if RegA != RegB, i.e., NOT equal)
+    BranchInstr = jit_riscv32_asm:bne(RegA, RegB, 0),
+    Stream1 = StreamModule:append(Stream0, BranchInstr),
+    State1 = State0#state{stream = Stream1},
+    State2 = if_block_free_reg({free, RegA}, State1),
+    State3 = if_block_free_reg({free, RegB}, State2),
+    {State3, {bne, RegA, RegB}, 0};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '==', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    %% RISC-V: bne Reg, Temp, offset (branch if Reg != Temp, i.e., NOT equal)
+    BranchInstr = jit_riscv32_asm:bne(Reg, Temp, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {bne, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    {RegOrTuple, '!=', Val}
+) when is_integer(Val) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = mov_immediate(State0, Temp, Val),
+    Stream1 = State1#state.stream,
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    %% RISC-V: beq Reg, Temp, offset (branch if Reg == Temp, i.e., NOT not-equal)
+    BranchInstr = jit_riscv32_asm:beq(Reg, Temp, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State2 = if_block_free_reg(RegOrTuple, State1),
+    State3 = State2#state{stream = Stream2},
+    {State3, {beq, Reg, Temp}, BranchDelta};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {'(bool)', RegOrTuple, '==', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Test bit 0 by shifting to MSB, then branch if negative (bit was 1, NOT false)
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 31),
+    Stream1 = StreamModule:append(Stream0, I1),
+    BranchInstr = jit_riscv32_asm:blt(Temp, zero, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream2},
+    {State2, {blt, Temp, zero}, byte_size(I1)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {'(bool)', RegOrTuple, '!=', false}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Test bit 0 by shifting to MSB, then branch if non-negative (bit was 0, NOT true)
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 31),
+    Stream1 = StreamModule:append(Stream0, I1),
+    BranchInstr = jit_riscv32_asm:bge(Temp, zero, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream2},
+    {State2, {bge, Temp, zero}, byte_size(I1)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {RegOrTuple, '&', Val, '!=', 0}
+) ->
+    Reg =
+        case RegOrTuple of
+            {free, Reg0} -> Reg0;
+            RegOrTuple -> RegOrTuple
+        end,
+    %% RISC-V: Test bits using ANDI or li+and
+    TestCode =
+        if
+            Val >= -2048 andalso Val =< 2047 ->
+                %% Can use ANDI instruction directly
+                jit_riscv32_asm:andi(Temp, Reg, Val);
+            true ->
+                %% Need to load immediate into temp register first
+                TestCode0 = jit_riscv32_asm:li(Temp, Val),
+                TestCode1 = jit_riscv32_asm:and_(Temp, Reg, Temp),
+                <<TestCode0/binary, TestCode1/binary>>
+        end,
+    OffsetBefore = StreamModule:offset(Stream0),
+    Stream1 = StreamModule:append(Stream0, TestCode),
+    BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+    %% Branch if result is zero (no bits set, NOT != 0)
+    BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = if_block_free_reg(RegOrTuple, State0),
+    State2 = State1#state{stream = Stream2},
+    {State2, {beq, Temp, zero}, BranchDelta};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    {Reg, '&', 16#F, '!=', 16#F}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG
+    I1 = jit_riscv32_asm:not_(Temp, Reg),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 28),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = State0#state{stream = Stream2},
+    {State1, {beq, Temp, zero}, byte_size(I1) + byte_size(I2)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State0,
+    {{free, Reg} = RegTuple, '&', 16#F, '!=', 16#F}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: Special case Reg & ?TERM_IMMED_TAG_MASK != ?TERM_INTEGER_TAG
+    I1 = jit_riscv32_asm:not_(Reg, Reg),
+    I2 = jit_riscv32_asm:slli(Reg, Reg, 28),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0),
+    Stream2 = StreamModule:append(Stream1, BranchInstr),
+    State1 = State0#state{stream = Stream2},
+    State2 = if_block_free_reg(RegTuple, State1),
+    {State2, {beq, Reg, zero}, byte_size(I1) + byte_size(I2)};
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | AT]
+    } = State0,
+    {Reg, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: AND with mask, then compare with value
+    OffsetBefore = StreamModule:offset(Stream0),
+    I1 = jit_riscv32_asm:mv(Temp, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State1 = State0#state{stream = Stream1},
+    State2 = and_(State1#state{available_regs = AT}, Temp, Mask),
+    Stream2 = State2#state.stream,
+    %% Compare Temp with Val and branch if equal (NOT != Val)
+    case Val of
+        0 ->
+            %% Optimize comparison with zero
+            BranchDelta = StreamModule:offset(Stream2) - OffsetBefore,
+            BranchInstr = jit_riscv32_asm:beq(Temp, zero, 0),
+            Stream3 = StreamModule:append(Stream2, BranchInstr),
+            State3 = State2#state{
+                stream = Stream3, available_regs = [Temp | State2#state.available_regs]
+            },
+            {State3, {beq, Temp, zero}, BranchDelta};
+        _ when ?IS_GPR(Val) ->
+            %% Val is a register
+            BranchDelta = StreamModule:offset(Stream2) - OffsetBefore,
+            BranchInstr = jit_riscv32_asm:beq(Temp, Val, 0),
+            Stream3 = StreamModule:append(Stream2, BranchInstr),
+            State3 = State2#state{
+                stream = Stream3, available_regs = [Temp | State2#state.available_regs]
+            },
+            {State3, {beq, Temp, Val}, BranchDelta};
+        _ ->
+            %% Val is an immediate - need second temp register
+            %% Reuse the mask register for the comparison value
+            [MaskReg | AT2] = AT,
+            State3 = mov_immediate(State2#state{available_regs = AT2}, MaskReg, Val),
+            Stream3 = State3#state.stream,
+            BranchDelta = StreamModule:offset(Stream3) - OffsetBefore,
+            BranchInstr = jit_riscv32_asm:beq(Temp, MaskReg, 0),
+            Stream4 = StreamModule:append(Stream3, BranchInstr),
+            State4 = State3#state{
+                stream = Stream4, available_regs = [Temp, MaskReg | State3#state.available_regs]
+            },
+            {State4, {beq, Temp, MaskReg}, BranchDelta}
+    end;
+if_block_cond(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailRegs
+    } = State0,
+    {{free, Reg} = RegTuple, '&', Mask, '!=', Val}
+) when ?IS_GPR(Reg) ->
+    %% RISC-V: AND with mask, then compare with value
+    OffsetBefore = StreamModule:offset(Stream0),
+    State1 = and_(State0, Reg, Mask),
+    Stream1 = State1#state.stream,
+    %% Compare Reg with Val and branch if equal (NOT != Val)
+    case Val of
+        0 ->
+            %% Optimize comparison with zero
+            BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+            BranchInstr = jit_riscv32_asm:beq(Reg, zero, 0),
+            Stream2 = StreamModule:append(Stream1, BranchInstr),
+            State2 = State1#state{stream = Stream2},
+            State3 = if_block_free_reg(RegTuple, State2),
+            {State3, {beq, Reg, zero}, BranchDelta};
+        _ when ?IS_GPR(Val) ->
+            %% Val is a register
+            BranchDelta = StreamModule:offset(Stream1) - OffsetBefore,
+            BranchInstr = jit_riscv32_asm:beq(Reg, Val, 0),
+            Stream2 = StreamModule:append(Stream1, BranchInstr),
+            State2 = State1#state{stream = Stream2},
+            State3 = if_block_free_reg(RegTuple, State2),
+            {State3, {beq, Reg, Val}, BranchDelta};
+        _ ->
+            %% Val is an immediate - need temp register
+            %% Reuse the mask register for the comparison value
+            [MaskReg | AT] = State1#state.available_regs,
+            State2 = mov_immediate(State1#state{available_regs = AT}, MaskReg, Val),
+            Stream2 = State2#state.stream,
+            BranchDelta = StreamModule:offset(Stream2) - OffsetBefore,
+            BranchInstr = jit_riscv32_asm:beq(Reg, MaskReg, 0),
+            Stream3 = StreamModule:append(Stream2, BranchInstr),
+            State3 = State2#state{stream = Stream3, available_regs = AvailRegs},
+            State4 = if_block_free_reg(RegTuple, State3),
+            {State4, {beq, Reg, MaskReg}, BranchDelta}
+    end.
+
+-spec if_block_free_reg(riscv32_register() | {free, riscv32_register()}, state()) -> state().
+if_block_free_reg({free, Reg}, State0) ->
+    #state{available_regs = AvR0, used_regs = UR0} = State0,
+    {AvR1, UR1} = free_reg(AvR0, UR0, Reg),
+    State0#state{
+        available_regs = AvR1,
+        used_regs = UR1
+    };
+if_block_free_reg(Reg, State0) when ?IS_GPR(Reg) ->
+    State0.
+
+-spec merge_used_regs(state(), [riscv32_register()]) -> state().
+merge_used_regs(#state{used_regs = UR0, available_regs = AvR0} = State, [
+    Reg | T
+]) ->
+    case lists:member(Reg, UR0) of
+        true ->
+            merge_used_regs(State, T);
+        false ->
+            AvR1 = lists:delete(Reg, AvR0),
+            UR1 = [Reg | UR0],
+            merge_used_regs(
+                State#state{used_regs = UR1, available_regs = AvR1}, T
+            )
+    end;
+merge_used_regs(State, []) ->
+    State.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register right by a fixed number of bits, effectively
+%% dividing it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+-spec shift_right(#state{}, maybe_free_riscv32_register(), non_neg_integer()) ->
+    {#state{}, riscv32_register()}.
+shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_riscv32_asm:srli(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    {State#state{stream = Stream1}, Reg};
+shift_right(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ResultReg | T],
+        used_regs = UR
+    } = State,
+    Reg,
+    Shift
+) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I = jit_riscv32_asm:srli(ResultReg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a shift register left by a fixed number of bits, effectively
+%% multiplying it by 2^Shift
+%% @param State current state
+%% @param Reg register to shift
+%% @param Shift number of bits to shift
+%% @return new state
+%%-----------------------------------------------------------------------------
+shift_left(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when
+    is_atom(Reg)
+->
+    I = jit_riscv32_asm:slli(Reg, Reg, Shift),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a call to a function pointer with arguments. This function converts
+%% arguments and passes them following the backend ABI convention.
+%% @end
+%% @param State current backend state
+%% @param FuncPtrTuple either {free, Reg} or {primitive, PrimitiveIndex}
+%% @param Args arguments to pass to the function
+%% @return Updated backend state and return register
+%%-----------------------------------------------------------------------------
+-spec call_func_ptr(state(), {free, riscv32_register()} | {primitive, non_neg_integer()}, [arg()]) ->
+    {state(), riscv32_register()}.
+call_func_ptr(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State0,
+    FuncPtrTuple,
+    Args
+) ->
+    FreeRegs = lists:flatmap(
+        fun
+            ({free, {ptr, Reg}}) -> [Reg];
+            ({free, Reg}) when is_atom(Reg) -> [Reg];
+            (_) -> []
+        end,
+        [FuncPtrTuple | Args]
+    ),
+    UsedRegs1 = UsedRegs0 -- FreeRegs,
+    % Save RA (like AArch64 saves LR) so it's preserved across jalr calls
+    SavedRegs = [?RA_REG, ?CTX_REG, ?JITSTATE_REG, ?NATIVE_INTERFACE_REG | UsedRegs1],
+
+    % Calculate available registers
+    FreeGPRegs = FreeRegs -- (FreeRegs -- ?AVAILABLE_REGS),
+    AvailableRegs1 = FreeGPRegs ++ AvailableRegs0,
+
+    % Calculate stack space: round up to 16-byte boundary for RISC-V ABI
+    NumRegs = length(SavedRegs),
+    StackBytes = NumRegs * 4,
+    AlignedStackBytes = ((StackBytes + 15) div 16) * 16,
+
+    Stream1 = push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0),
+
+    % Set up arguments following RISC-V ILP32 calling convention
+    % Arguments are passed in a0-a7 (up to 8 register arguments)
+    Args1 = lists:map(
+        fun(Arg) ->
+            case Arg of
+                offset -> StreamModule:offset(Stream1);
+                _ -> Arg
+            end
+        end,
+        Args
+    ),
+
+    RegArgs0 = Args1,
+    RegArgsRegs = lists:flatmap(fun arg_to_reg_list/1, RegArgs0),
+
+    % We pushed registers to stack, so we can use these registers we saved
+    % and the currently available registers
+    SetArgsRegsOnlyAvailableArgs = (UsedRegs1 -- RegArgsRegs) ++ AvailableRegs0,
+    State1 = State0#state{
+        available_regs = SetArgsRegsOnlyAvailableArgs,
+        used_regs = ?AVAILABLE_REGS -- SetArgsRegsOnlyAvailableArgs,
+        stream = Stream1
+    },
+
+    ParameterRegs = parameter_regs(RegArgs0),
+    {Stream3, SetArgsAvailableRegs, FuncPtrReg, RegArgs} =
+        case FuncPtrTuple of
+            {free, FuncPtrReg0} ->
+                % If FuncPtrReg is in parameter regs, we must swap it with a free reg.
+                case lists:member(FuncPtrReg0, ParameterRegs) of
+                    true ->
+                        case SetArgsRegsOnlyAvailableArgs -- ParameterRegs of
+                            [] ->
+                                % Swap SetArgsRegsOnlyAvailableArgs with a reg used in RegArgs0
+                                % that is not in ParameterRegs
+                                [NewArgReg | _] = SetArgsRegsOnlyAvailableArgs,
+                                [FuncPtrReg1 | _] = RegArgsRegs -- ParameterRegs,
+                                MovInstr1 = jit_riscv32_asm:mv(NewArgReg, FuncPtrReg1),
+                                MovInstr2 = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0),
+                                SetArgsAvailableArgs1 =
+                                    (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++
+                                        [FuncPtrReg0],
+                                RegArgs1 = replace_reg(RegArgs0, FuncPtrReg1, NewArgReg),
+                                {
+                                    StreamModule:append(
+                                        State1#state.stream, <<MovInstr1/binary, MovInstr2/binary>>
+                                    ),
+                                    SetArgsAvailableArgs1,
+                                    FuncPtrReg1,
+                                    RegArgs1
+                                };
+                            [FuncPtrReg1 | _] ->
+                                MovInstr = jit_riscv32_asm:mv(FuncPtrReg1, FuncPtrReg0),
+                                SetArgsAvailableArgs1 =
+                                    (SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg1]) ++
+                                        [FuncPtrReg0],
+                                {
+                                    StreamModule:append(State1#state.stream, MovInstr),
+                                    SetArgsAvailableArgs1,
+                                    FuncPtrReg1,
+                                    RegArgs0
+                                }
+                        end;
+                    false ->
+                        SetArgsAvailableArgs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0],
+                        {State1#state.stream, SetArgsAvailableArgs1, FuncPtrReg0, RegArgs0}
+                end;
+            {primitive, Primitive} ->
+                [FuncPtrReg0 | _] = SetArgsRegsOnlyAvailableArgs -- ParameterRegs,
+                SetArgsAvailableRegs1 = SetArgsRegsOnlyAvailableArgs -- [FuncPtrReg0],
+                PrepCall = load_primitive_ptr(Primitive, FuncPtrReg0),
+                Stream2 = StreamModule:append(State1#state.stream, PrepCall),
+                {Stream2, SetArgsAvailableRegs1, FuncPtrReg0, RegArgs0}
+        end,
+
+    State3 = State1#state{
+        available_regs = SetArgsAvailableRegs,
+        used_regs = ?AVAILABLE_REGS -- SetArgsAvailableRegs,
+        stream = Stream3
+    },
+
+    StackOffset = AlignedStackBytes,
+    State4 = set_registers_args(State3, RegArgs, ParameterRegs, StackOffset),
+    Stream4 = State4#state.stream,
+
+    % Call the function pointer (using JALR for call with return)
+    Call = jit_riscv32_asm:jalr(ra, FuncPtrReg, 0),
+    Stream5 = StreamModule:append(Stream4, Call),
+
+    % For result, we need a free register (including FuncPtrReg).
+    % If none are available (all registers were pushed to the stack),
+    % we write the result to the stack position of FuncPtrReg
+    {Stream6, UsedRegs2} =
+        case length(SavedRegs) of
+            N when N >= 7 andalso element(1, FuncPtrTuple) =:= free ->
+                % We use original FuncPtrReg then as we know it's available.
+                % Calculate stack offset: find register index in SavedRegs * 4 bytes
+                ResultReg = element(2, FuncPtrTuple),
+                RegIndex = index_of(ResultReg, SavedRegs),
+                case RegIndex >= 0 of
+                    true ->
+                        StoreResultStackOffset = RegIndex * 4,
+                        StoreResult = jit_riscv32_asm:sw(sp, a0, StoreResultStackOffset),
+                        {StreamModule:append(Stream5, StoreResult), [ResultReg | UsedRegs1]};
+                    false ->
+                        % FuncPtrReg was not in SavedRegs, use an available register
+                        [ResultReg1 | _] = AvailableRegs1 -- SavedRegs,
+                        MoveResult = jit_riscv32_asm:mv(ResultReg1, a0),
+                        {StreamModule:append(Stream5, MoveResult), [ResultReg1 | UsedRegs1]}
+                end;
+            _ ->
+                % Use any free that is not in SavedRegs
+                [ResultReg | _] = AvailableRegs1 -- SavedRegs,
+                MoveResult = jit_riscv32_asm:mv(ResultReg, a0),
+                {StreamModule:append(Stream5, MoveResult), [ResultReg | UsedRegs1]}
+        end,
+
+    Stream8 = pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream6),
+
+    AvailableRegs2 = lists:delete(ResultReg, AvailableRegs1),
+    AvailableRegs3 = ?AVAILABLE_REGS -- (?AVAILABLE_REGS -- AvailableRegs2),
+    {
+        State4#state{
+            stream = Stream8,
+            available_regs = AvailableRegs3,
+            used_regs = UsedRegs2
+        },
+        ResultReg
+    }.
+
+arg_to_reg_list({free, {ptr, Reg}}) -> [Reg];
+arg_to_reg_list({free, Reg}) when is_atom(Reg) -> [Reg];
+arg_to_reg_list(Reg) when is_atom(Reg) -> [Reg];
+arg_to_reg_list(_) -> [].
+
+index_of(Item, List) -> index_of(Item, List, 0).
+
+index_of(_, [], _) -> -1;
+index_of(Item, [Item | _], Index) -> Index;
+index_of(Item, [_ | Rest], Index) -> index_of(Item, Rest, Index + 1).
+
+push_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 ->
+    % RISC-V: addi sp, sp, -AlignedStackBytes then sw reg, offset(sp) for each reg
+    StackAdjust = jit_riscv32_asm:addi(sp, sp, -AlignedStackBytes),
+    Stream1 = StreamModule:append(Stream0, StackAdjust),
+    {Stream2, _} = lists:foldl(
+        fun(Reg, {StreamAcc, Offset}) ->
+            Store = jit_riscv32_asm:sw(sp, Reg, Offset),
+            {StreamModule:append(StreamAcc, Store), Offset + 4}
+        end,
+        {Stream1, 0},
+        SavedRegs
+    ),
+    Stream2;
+push_registers([], _AlignedStackBytes, _StreamModule, Stream0) ->
+    Stream0.
+
+pop_registers(SavedRegs, AlignedStackBytes, StreamModule, Stream0) when length(SavedRegs) > 0 ->
+    % RISC-V: lw reg, offset(sp) for each reg then addi sp, sp, AlignedStackBytes
+    {Stream1, _} = lists:foldl(
+        fun(Reg, {StreamAcc, Offset}) ->
+            Load = jit_riscv32_asm:lw(Reg, sp, Offset),
+            {StreamModule:append(StreamAcc, Load), Offset + 4}
+        end,
+        {Stream0, 0},
+        SavedRegs
+    ),
+    StackAdjust = jit_riscv32_asm:addi(sp, sp, AlignedStackBytes),
+    StreamModule:append(Stream1, StackAdjust);
+pop_registers([], _AlignedStackBytes, _StreamModule, Stream0) ->
+    Stream0.
+
+set_registers_args(State0, Args, StackOffset) ->
+    ParamRegs = parameter_regs(Args),
+    set_registers_args(State0, Args, ParamRegs, StackOffset).
+
+set_registers_args(
+    #state{used_regs = UsedRegs} = State0,
+    Args,
+    ParamRegs,
+    StackOffset
+) ->
+    ArgsRegs = args_regs(Args),
+    AvailableScratchGP = ((?SCRATCH_REGS -- ParamRegs) -- ArgsRegs) -- UsedRegs,
+    State1 = set_registers_args0(
+        State0, Args, ArgsRegs, ParamRegs, AvailableScratchGP, StackOffset
+    ),
+    Stream1 = State1#state.stream,
+    NewUsedRegs = lists:foldl(
+        fun
+            ({free, {ptr, Reg}}, AccUsed) -> lists:delete(Reg, AccUsed);
+            ({free, Reg}, AccUsed) -> lists:delete(Reg, AccUsed);
+            (_, AccUsed) -> AccUsed
+        end,
+        UsedRegs,
+        Args
+    ),
+    State1#state{
+        stream = Stream1,
+        available_regs = ?AVAILABLE_REGS -- ParamRegs -- NewUsedRegs,
+        used_regs = ParamRegs ++ (NewUsedRegs -- ParamRegs)
+    }.
+
+parameter_regs(Args) ->
+    parameter_regs0(Args, ?PARAMETER_REGS, []).
+
+% ILP32: 64-bit arguments require double-word alignment (even register number)
+parameter_regs0([], _, Acc) ->
+    lists:reverse(Acc);
+parameter_regs0([{avm_int64_t, _} | T], [a0, a1 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [a1, a0 | Acc]);
+parameter_regs0([{avm_int64_t, _} | T], [a1, a2 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [a2, a1 | Acc]);
+parameter_regs0([{avm_int64_t, _} | T], [a2, a3 | Rest], Acc) ->
+    parameter_regs0(T, Rest, [a3, a2 | Acc]);
+parameter_regs0([_Other | T], [Reg | Rest], Acc) ->
+    parameter_regs0(T, Rest, [Reg | Acc]).
+
+replace_reg(Args, Reg1, Reg2) ->
+    replace_reg0(Args, Reg1, Reg2, []).
+
+replace_reg0([Reg | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([{free, Reg} | T], Reg, Replacement, Acc) ->
+    lists:reverse(Acc, [Replacement | T]);
+replace_reg0([Other | T], Reg, Replacement, Acc) ->
+    replace_reg0(T, Reg, Replacement, [Other | Acc]).
+
+set_registers_args0(State, [], [], [], _AvailGP, _StackOffset) ->
+    State;
+set_registers_args0(State, [{free, FreeVal} | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset) ->
+    set_registers_args0(State, [FreeVal | ArgsT], ArgsRegs, ParamRegs, AvailGP, StackOffset);
+set_registers_args0(
+    State, [ctx | ArgsT], [?CTX_REG | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset
+) ->
+    set_registers_args0(State, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset);
+% Handle 64-bit arguments that need two registers according to ILP32
+set_registers_args0(
+    State,
+    [{avm_int64_t, Value} | ArgsT],
+    ArgsRegs,
+    ParamRegs,
+    AvailGP,
+    StackOffset
+) when is_integer(Value) ->
+    LowPartUnsigned = Value band 16#FFFFFFFF,
+    HighPartUnsigned = (Value bsr 32) band 16#FFFFFFFF,
+    % Convert to signed 32-bit values for RISC-V li instruction
+    LowPart =
+        if
+            LowPartUnsigned > 16#7FFFFFFF -> LowPartUnsigned - 16#100000000;
+            true -> LowPartUnsigned
+        end,
+    HighPart =
+        if
+            HighPartUnsigned > 16#7FFFFFFF -> HighPartUnsigned - 16#100000000;
+            true -> HighPartUnsigned
+        end,
+    set_registers_args0(
+        State, [LowPart, HighPart | ArgsT], [imm | ArgsRegs], ParamRegs, AvailGP, StackOffset
+    );
+% ctx is special as we need it to access x_reg/y_reg/fp_reg and we don't
+% want to replace it
+set_registers_args0(
+    State, [Arg | ArgsT], [_ArgReg | ArgsRegs], [?CTX_REG | ParamRegs], AvailGP, StackOffset
+) ->
+    false = lists:member(?CTX_REG, ArgsRegs),
+    State1 = set_registers_args1(State, Arg, ?CTX_REG, StackOffset),
+    set_registers_args0(State1, ArgsT, ArgsRegs, ParamRegs, AvailGP, StackOffset);
+set_registers_args0(
+    #state{stream_module = StreamModule} = State0,
+    [Arg | ArgsT],
+    [_ArgReg | ArgsRegsT],
+    [ParamReg | ParamRegsT],
+    AvailGP,
+    StackOffset
+) ->
+    case lists:member(ParamReg, ArgsRegsT) of
+        false ->
+            State1 = set_registers_args1(State0, Arg, ParamReg, StackOffset),
+            set_registers_args0(State1, ArgsT, ArgsRegsT, ParamRegsT, AvailGP, StackOffset);
+        true ->
+            [Avail | AvailGPT] = AvailGP,
+            I = jit_riscv32_asm:mv(Avail, ParamReg),
+            Stream1 = StreamModule:append(State0#state.stream, I),
+            State1 = set_registers_args1(
+                State0#state{stream = Stream1}, Arg, ParamReg, StackOffset
+            ),
+            NewArgsT = replace_reg(ArgsT, ParamReg, Avail),
+            set_registers_args0(
+                State1, NewArgsT, ArgsRegsT, ParamRegsT, AvailGPT, StackOffset
+            )
+    end.
+
+set_registers_args1(State, Reg, Reg, _Offset) ->
+    State;
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State,
+    jit_state,
+    ParamReg,
+    _StackOffset
+) ->
+    % jit_state is always in a1, so we only need to move it if the param reg is different
+    case ParamReg of
+        a1 ->
+            State;
+        _ ->
+            I = jit_riscv32_asm:mv(ParamReg, a1),
+            Stream1 = StreamModule:append(Stream0, I),
+            State#state{stream = Stream1}
+    end;
+% For tail calls, jit_state is already in a1
+set_registers_args1(State, jit_state_tail_call, a1, _StackOffset) ->
+    State;
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State,
+    {x_reg, extra},
+    Reg,
+    _StackOffset
+) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I = jit_riscv32_asm:lw(Reg, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, Reg, _StackOffset
+) ->
+    {XReg, X_REGOffset} = ?X_REG(X),
+    I = jit_riscv32_asm:lw(Reg, XReg, X_REGOffset),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Source}, Reg, _StackOffset
+) ->
+    I = jit_riscv32_asm:lw(Reg, Source, 0),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = AvailRegs} = State,
+    {y_reg, X},
+    Reg,
+    _StackOffset
+) ->
+    Code = ldr_y_reg(Reg, X, AvailRegs),
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+set_registers_args1(
+    #state{stream_module = StreamModule, stream = Stream0} = State, ArgReg, Reg, _StackOffset
+) when
+    ?IS_GPR(ArgReg)
+->
+    I = jit_riscv32_asm:mv(Reg, ArgReg),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+set_registers_args1(State, Value, Reg, _StackOffset) when ?IS_SIGNED_OR_UNSIGNED_INT32_T(Value) ->
+    mov_immediate(State, Reg, Value).
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move to a vm register (x_reg, y_reg, fpreg or a pointer on x_reg)
+%% from an immediate, a native register or another vm register.
+%% @end
+%% @param State current backend state
+%% @param Src value to move to vm register
+%% @param Dest vm register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_to_vm_register(state(), Src :: value() | vm_register(), Dest :: vm_register()) ->
+    state().
+% Native register to VM register
+move_to_vm_register(State0, Src, {x_reg, extra}) when is_atom(Src) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:sw(BaseReg, Src, Off),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {x_reg, X}) when is_atom(Src) ->
+    {BaseReg, Off} = ?X_REG(X),
+    I1 = jit_riscv32_asm:sw(BaseReg, Src, Off),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(State0, Src, {ptr, Reg}) when is_atom(Src) ->
+    I1 = jit_riscv32_asm:sw(Reg, Src, 0),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State0#state{stream = Stream1};
+move_to_vm_register(#state{available_regs = [Temp1 | AT]} = State0, Src, {y_reg, Y}) when
+    is_atom(Src)
+->
+    Code = str_y_reg(Src, Y, Temp1, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, Code),
+    State0#state{stream = Stream1};
+% Source is an integer to y_reg (optimized: ldr first, then movs)
+move_to_vm_register(#state{available_regs = [Temp1, Temp2 | AT]} = State0, N, {y_reg, Y}) when
+    is_integer(N), N >= 0, N =< 255
+->
+    I1 = jit_riscv32_asm:li(Temp2, N),
+    YCode = str_y_reg(Temp2, Y, Temp1, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, <<I1/binary, YCode/binary>>),
+    State0#state{stream = Stream1};
+% Source is an integer (0-255 for movs, negative values need different handling)
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when
+    is_integer(N), N >= 0, N =< 255
+->
+    I1 = jit_riscv32_asm:li(Temp, N),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+%% Handle large values using simple literal pool (branch-over pattern)
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, N, Dest) when
+    is_integer(N)
+->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, N),
+    State2 = move_to_vm_register(State1, Temp, Dest),
+    State2#state{available_regs = AR0};
+% Source is a VM register
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, extra}, Dest) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:lw(Temp, BaseReg, Off),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {x_reg, X}, Dest) ->
+    {XReg, X_REGOffset} = ?X_REG(X),
+    I1 = jit_riscv32_asm:lw(Temp, XReg, X_REGOffset),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {ptr, Reg}, Dest) ->
+    I1 = jit_riscv32_asm:lw(Temp, Reg, 0),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, I1),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+move_to_vm_register(#state{available_regs = [Temp | AT] = AR0} = State0, {y_reg, Y}, Dest) ->
+    Code = ldr_y_reg(Temp, Y, AT),
+    Stream1 = (State0#state.stream_module):append(State0#state.stream, Code),
+    State1 = move_to_vm_register(State0#state{stream = Stream1, available_regs = AT}, Temp, Dest),
+    State1#state{available_regs = AR0};
+% term_to_float
+move_to_vm_register(
+    #state{
+        stream_module = StreamModule,
+        available_regs = [Temp1, Temp2 | _],
+        stream = Stream0,
+        variant = Variant
+    } =
+        State0,
+    {free, {ptr, Reg, 1}},
+    {fp_reg, F}
+) ->
+    {BaseReg, Off} = ?FP_REGS,
+    I1 = jit_riscv32_asm:lw(Temp1, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(Temp2, Reg, 4),
+    case Variant band ?JIT_VARIANT_FLOAT32 of
+        0 ->
+            % Double precision: write both 32-bit parts
+            I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8),
+            I4 = jit_riscv32_asm:lw(Temp2, Reg, 8),
+            I5 = jit_riscv32_asm:sw(Temp1, Temp2, F * 8 + 4),
+            Code = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>;
+        _ ->
+            % Single precision: write only first 32-bit part
+            I3 = jit_riscv32_asm:sw(Temp1, Temp2, F * 4),
+            Code = <<I1/binary, I2/binary, I3/binary>>
+    end,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State1 = free_native_register(State0, Reg),
+    State1#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Emit a move of an array element (reg[x]) to a vm or a native register.
+%% @end
+%% @param State current backend state
+%% @param Reg base register of the array
+%% @param Index index in the array, as an integer or a native register
+%% @param Dest vm or native register to move to
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec move_array_element(
+    state(),
+    riscv32_register(),
+    non_neg_integer() | riscv32_register(),
+    vm_register() | riscv32_register()
+) -> state().
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4),
+    {BaseReg, Off} = ?X_REG(X),
+    I2 = jit_riscv32_asm:sw(BaseReg, Temp, Off),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    Reg,
+    Index,
+    {ptr, Dest}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Temp, Reg, Index * 4),
+    I2 = jit_riscv32_asm:sw(Dest, Temp, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp1, Temp2 | AT]} =
+        State,
+    Reg,
+    Index,
+    {y_reg, Y}
+) when is_atom(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Temp2, Reg, Index * 4),
+    YCode = str_y_reg(Temp2, Y, Temp1, AT),
+    Code = <<I1/binary, YCode/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | AT]} =
+        State,
+    {free, Reg},
+    Index,
+    {y_reg, Y}
+) when is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4),
+    YCode = str_y_reg(Reg, Y, Temp, AT),
+    Code = <<I1/binary, YCode/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State, Reg, Index, Dest
+) when is_atom(Dest) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:lw(Dest, Reg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {x_reg, X}
+) when X < ?MAX_REG andalso is_atom(IndexReg) ->
+    I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2),
+    I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg),
+    I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0),
+    {BaseReg, Off} = ?X_REG(X),
+    I4 = jit_riscv32_asm:sw(BaseReg, IndexReg, Off),
+    {AvailableRegs1, UsedRegs1} = free_reg(AvailableRegs0, UsedRegs0, IndexReg),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {ptr, PtrReg}
+) when is_atom(IndexReg) ->
+    I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2),
+    I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg),
+    I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0),
+    I4 = jit_riscv32_asm:sw(PtrReg, IndexReg, 0),
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, IndexReg
+    ),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    };
+move_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | AT] = AvailableRegs0,
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    {free, IndexReg},
+    {y_reg, Y}
+) when is_atom(IndexReg) ->
+    I1 = jit_riscv32_asm:slli(IndexReg, IndexReg, 2),
+    I2 = jit_riscv32_asm:add(IndexReg, Reg, IndexReg),
+    I3 = jit_riscv32_asm:lw(IndexReg, IndexReg, 0),
+    Code = str_y_reg(IndexReg, Y, Temp, AT),
+    I4 = Code,
+    {AvailableRegs1, UsedRegs1} = free_reg(
+        AvailableRegs0, UsedRegs0, IndexReg
+    ),
+    Stream1 = StreamModule:append(
+        Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    ),
+    State#state{
+        available_regs = AvailableRegs1,
+        used_regs = UsedRegs1,
+        stream = Stream1
+    }.
+
+%% @doc move reg[x] to a vm or native register
+-spec get_array_element(
+    state(), riscv32_register() | {free, riscv32_register()}, non_neg_integer()
+) ->
+    {state(), riscv32_register()}.
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    {free, Reg},
+    Index
+) ->
+    I1 = jit_riscv32_asm:lw(Reg, Reg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {State#state{stream = Stream1}, Reg};
+get_array_element(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [ElemReg | AvailableT],
+        used_regs = UsedRegs0
+    } = State,
+    Reg,
+    Index
+) ->
+    I1 = jit_riscv32_asm:lw(ElemReg, Reg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary>>),
+    {
+        State#state{
+            stream = Stream1, available_regs = AvailableT, used_regs = [ElemReg | UsedRegs0]
+        },
+        ElemReg
+    }.
+
+%% @doc move an integer, a vm or native register to reg[x]
+-spec move_to_array_element(
+    state(), integer() | vm_register() | riscv32_register(), riscv32_register(), non_neg_integer()
+) -> state().
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0} = State0,
+    ValueReg,
+    Reg,
+    Index
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso is_integer(Index) ->
+    I1 = jit_riscv32_asm:sw(Reg, ValueReg, Index * 4),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    ValueReg,
+    Reg,
+    IndexReg
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(Reg) andalso ?IS_GPR(IndexReg) ->
+    I1 = jit_riscv32_asm:mv(Temp, IndexReg),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 2),
+    I3 = jit_riscv32_asm:add(Temp, Reg, Temp),
+    I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State0#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    Reg,
+    Index
+) ->
+    {State1, Temp} = copy_to_native_register(State0, Value),
+    State2 = move_to_array_element(State1, Temp, Reg, Index),
+    free_native_register(State2, Temp).
+
+move_to_array_element(
+    State,
+    Value,
+    BaseReg,
+    IndexReg,
+    Offset
+) when is_integer(IndexReg) andalso is_integer(Offset) andalso Offset div 8 =:= 0 ->
+    move_to_array_element(State, Value, BaseReg, IndexReg + (Offset div 8));
+move_to_array_element(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State,
+    ValueReg,
+    BaseReg,
+    IndexReg,
+    Offset
+) when ?IS_GPR(ValueReg) andalso ?IS_GPR(IndexReg) andalso is_integer(Offset) ->
+    I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 2),
+    I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp),
+    I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary, I4/binary>>),
+    State#state{stream = Stream1};
+move_to_array_element(
+    State0,
+    Value,
+    BaseReg,
+    IndexReg,
+    Offset
+) ->
+    {State1, ValueReg} = copy_to_native_register(State0, Value),
+    [Temp | _] = State1#state.available_regs,
+    I1 = jit_riscv32_asm:addi(Temp, IndexReg, Offset),
+    I2 = jit_riscv32_asm:slli(Temp, Temp, 2),
+    I3 = jit_riscv32_asm:add(Temp, BaseReg, Temp),
+    I4 = jit_riscv32_asm:sw(Temp, ValueReg, 0),
+    Stream1 = (State1#state.stream_module):append(
+        State1#state.stream, <<I1/binary, I2/binary, I3/binary, I4/binary>>
+    ),
+    State2 = State1#state{stream = Stream1},
+    free_native_register(State2, ValueReg).
+
+-spec move_to_native_register(state(), value() | cp) -> {state(), riscv32_register()}.
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    cp
+) ->
+    {BaseReg, Off} = ?CP,
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(State, Reg) when is_atom(Reg) ->
+    {State, Reg};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_riscv32_asm:lw(Reg, Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1}, Reg};
+move_to_native_register(
+    #state{
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State0,
+    Imm
+) when
+    is_integer(Imm)
+->
+    State1 = State0#state{used_regs = [Reg | Used], available_regs = AvailT},
+    {move_to_native_register(State1, Imm, Reg), Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, extra}
+) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {x_reg, X}
+) when
+    X < ?MAX_REG
+->
+    {BaseReg, Offset} = ?X_REG(X),
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg, Offset),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    {y_reg, Y}
+) ->
+    Code = ldr_y_reg(Reg, Y, AvailT),
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [Reg | Used]}, Reg};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [RegA, RegB | AvailT],
+        used_regs = Used
+    } = State,
+    {fp_reg, F}
+) ->
+    {BaseReg, Off} = ?FP_REGS,
+    I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8),
+    I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {
+        State#state{stream = Stream1, available_regs = AvailT, used_regs = [RegB, RegA | Used]},
+        {fp, RegA, RegB}
+    }.
+
+-spec move_to_native_register(state(), value(), riscv32_register()) -> state().
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, RegSrc, RegDst
+) when is_atom(RegSrc) ->
+    I = jit_riscv32_asm:mv(RegDst, RegSrc),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+move_to_native_register(State, ValSrc, RegDst) when is_integer(ValSrc) ->
+    mov_immediate(State, RegDst, ValSrc);
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {ptr, Reg}, RegDst
+) when ?IS_GPR(Reg) ->
+    I1 = jit_riscv32_asm:lw(RegDst, Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, extra}, RegDst
+) ->
+    {BaseReg, Off} = ?X_REG(?MAX_REG),
+    I1 = jit_riscv32_asm:lw(RegDst, BaseReg, Off),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0} = State, {x_reg, X}, RegDst
+) when
+    X < ?MAX_REG
+->
+    {XReg, X_REGOffset} = ?X_REG(X),
+    I1 = jit_riscv32_asm:lw(RegDst, XReg, X_REGOffset),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = AT} = State,
+    {y_reg, Y},
+    RegDst
+) ->
+    Code = ldr_y_reg(RegDst, Y, AT),
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1};
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0
+    } = State,
+    {fp_reg, F},
+    {fp, RegA, RegB}
+) ->
+    {BaseReg, Off} = ?FP_REGS,
+    I1 = jit_riscv32_asm:lw(RegB, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(RegA, RegB, F * 8),
+    I3 = jit_riscv32_asm:lw(RegB, RegB, F * 8 + 4),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+-spec copy_to_native_register(state(), value()) -> {state(), riscv32_register()}.
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    Reg
+) when is_atom(Reg) ->
+    I1 = jit_riscv32_asm:mv(SaveReg, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [SaveReg | AvailT],
+        used_regs = Used
+    } = State,
+    {ptr, Reg}
+) when is_atom(Reg) ->
+    I1 = jit_riscv32_asm:lw(SaveReg, Reg, 0),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, available_regs = AvailT, used_regs = [SaveReg | Used]}, SaveReg};
+copy_to_native_register(State, Reg) ->
+    move_to_native_register(State, Reg).
+
+move_to_cp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | AvailT]} = State,
+    {y_reg, Y}
+) ->
+    I1 = ldr_y_reg(Reg, Y, AvailT),
+    {BaseReg, Off} = ?CP,
+    I2 = jit_riscv32_asm:sw(BaseReg, Reg, Off),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+increment_sp(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Reg | _]} = State,
+    Offset
+) ->
+    {BaseReg1, Off1} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(Reg, BaseReg1, Off1),
+    I2 = jit_riscv32_asm:addi(Reg, Reg, Offset * 4),
+    {BaseReg2, Off2} = ?Y_REGS,
+    I3 = jit_riscv32_asm:sw(BaseReg2, Reg, Off2),
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1}.
+
+set_continuation_to_label(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        branches = Branches
+    } = State,
+    Label
+) ->
+    % Similar to AArch64: use pc_relative_address with a relocation that will be
+    % resolved to point directly to the label's actual address (not the jump table entry)
+    Offset = StreamModule:offset(Stream0),
+    % Emit placeholder for pc_relative_address (auipc + addi)
+    % Reserve 8 bytes (2 x 32-bit instructions) with all-1s placeholder for flash programming
+    % The relocation will replace these with the correct offset
+    I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>,
+    Reloc = {Label, Offset, {adr, Temp}},
+    % Store continuation (jit_state is in a1)
+    I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State#state{stream = Stream1, branches = [Reloc | Branches]}.
+
+%% @doc Set the contination to a given offset
+%% Return a reference so the offset will be updated with update_branches
+%% This is only used with OP_WAIT_TIMEOUT and the offset is after the current
+%% code and not too far, so on Thumb we can use adr instruction.
+set_continuation_to_offset(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _],
+        branches = Branches
+    } = State
+) ->
+    OffsetRef = make_ref(),
+    Offset = StreamModule:offset(Stream0),
+    % Reserve 8 bytes with all-1s placeholder for flash programming
+    I1 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>,
+    Reloc = {OffsetRef, Offset, {adr, Temp}},
+    % Store continuation (jit_state is in a1)
+    I2 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {State#state{stream = Stream1, branches = [Reloc | Branches]}, OffsetRef}.
+
+%% @doc Implement a continuation entry point.
+-spec continuation_entry_point(#state{}) -> #state{}.
+continuation_entry_point(State) ->
+    State.
+
+get_module_index(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailableT],
+        used_regs = UsedRegs0
+    } = State
+) ->
+    % Load module from jit_state (which is in a1)
+    I1 = jit_riscv32_asm:lw(Reg, ?JITSTATE_REG, ?JITSTATE_MODULE_OFFSET),
+    I2 = jit_riscv32_asm:lw(Reg, Reg, 0),
+    Code = <<I1/binary, I2/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    {
+        State#state{
+            stream = Stream1,
+            available_regs = AvailableT,
+            used_regs = [Reg | UsedRegs0]
+        },
+        Reg
+    }.
+
+%% @doc Perform an AND of a register with an immediate.
+%% JIT currentl calls this with two values: ?TERM_PRIMARY_CLEAR_MASK (-4) to
+%% clear bits and ?TERM_BOXED_TAG_MASK (0x3F). We can avoid any literal pool
+%% by using BICS for -4.
+and_(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, 16#FFFFFF) ->
+    I1 = jit_riscv32_asm:slli(Reg, Reg, 8),
+    I2 = jit_riscv32_asm:srli(Reg, Reg, 8),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    State0#state{stream = Stream1};
+and_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) when Val < 0 andalso Val >= -256 ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, bnot (Val)),
+    Stream1 = State1#state.stream,
+    % RISC-V doesn't have bics, use not + and
+    I1 = jit_riscv32_asm:not_(Temp, Temp),
+    I2 = jit_riscv32_asm:and_(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, <<I1/binary, I2/binary>>),
+    State1#state{available_regs = [Temp | AT], stream = Stream2};
+and_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:and_(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2};
+and_(
+    #state{stream_module = StreamModule, available_regs = []} = State0,
+    Reg,
+    Val
+) when Val < 0 andalso Val >= -256 ->
+    % No available registers, use a0 as temp and save it to t3
+    Stream0 = State0#state.stream,
+    % Save a0 to t3
+    Save = jit_riscv32_asm:mv(?IP_REG, a0),
+    Stream1 = StreamModule:append(Stream0, Save),
+    % Load immediate value into a0
+    State1 = mov_immediate(State0#state{stream = Stream1}, a0, bnot (Val)),
+    Stream2 = State1#state.stream,
+    % Perform BICS operation (RISC-V: not + and)
+    I1 = jit_riscv32_asm:not_(a0, a0),
+    I2 = jit_riscv32_asm:and_(Reg, Reg, a0),
+    Stream3 = StreamModule:append(Stream2, <<I1/binary, I2/binary>>),
+    % Restore a0 from t3
+    Restore = jit_riscv32_asm:mv(a0, ?IP_REG),
+    Stream4 = StreamModule:append(Stream3, Restore),
+    State0#state{stream = Stream4};
+and_(
+    #state{stream_module = StreamModule, available_regs = []} = State0,
+    Reg,
+    Val
+) ->
+    % No available registers, use a0 as temp and save it to t3
+    Stream0 = State0#state.stream,
+    % Save a0 to t3
+    Save = jit_riscv32_asm:mv(?IP_REG, a0),
+    Stream1 = StreamModule:append(Stream0, Save),
+    % Load immediate value into a0
+    State1 = mov_immediate(State0#state{stream = Stream1}, a0, Val),
+    Stream2 = State1#state.stream,
+    % Perform ANDS operation
+    I = jit_riscv32_asm:and_(Reg, Reg, a0),
+    Stream3 = StreamModule:append(Stream2, I),
+    % Restore a0 from t3
+    Restore = jit_riscv32_asm:mv(a0, ?IP_REG),
+    Stream4 = StreamModule:append(Stream3, Restore),
+    State0#state{stream = Stream4}.
+
+or_(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:or_(Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when
+    Val >= 0 andalso Val =< 255
+->
+    I = jit_riscv32_asm:addi(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State0#state{stream = Stream1};
+add(#state{stream_module = StreamModule, stream = Stream0} = State0, Reg, Val) when
+    is_atom(Val)
+->
+    I = jit_riscv32_asm:add(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State0#state{stream = Stream1};
+add(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:add(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    Val >= -16#800, Val =< 16#7FF
+->
+    % RISC-V li can handle 12-bit signed immediates in a single instruction (addi)
+    I = jit_riscv32_asm:li(Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+mov_immediate(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) ->
+    % For values outside 12-bit range, li will use lui + addi (2 instructions)
+    % which is efficient enough, no need for literal pool
+    I = jit_riscv32_asm:li(Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1}.
+
+sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    Val >= 0 andalso Val =< 255
+->
+    I1 = jit_riscv32_asm:addi(Reg, Reg, -Val),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1};
+sub(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Val) when
+    is_atom(Val)
+->
+    I = jit_riscv32_asm:sub(Reg, Reg, Val),
+    Stream1 = StreamModule:append(Stream0, I),
+    State#state{stream = Stream1};
+sub(#state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0, Reg, Val) ->
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:sub(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{available_regs = [Temp | AT], stream = Stream2}.
+
+mul(State, _Reg, 1) ->
+    State;
+mul(State, Reg, 2) ->
+    shift_left(State, Reg, 1);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 3) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 1),
+    I2 = jit_riscv32_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 4) ->
+    shift_left(State, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 5) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 2),
+    I2 = jit_riscv32_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 6) ->
+    State1 = mul(State0, Reg, 3),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 7) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 3),
+    I2 = jit_riscv32_asm:sub(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 8) ->
+    shift_left(State, Reg, 3);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 9) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 3),
+    I2 = jit_riscv32_asm:add(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State0, Reg, 10) ->
+    State1 = mul(State0, Reg, 5),
+    mul(State1, Reg, 2);
+mul(#state{available_regs = [Temp | _]} = State, Reg, 15) ->
+    I1 = jit_riscv32_asm:slli(Temp, Reg, 4),
+    I2 = jit_riscv32_asm:sub(Reg, Temp, Reg),
+    Stream1 = (State#state.stream_module):append(State#state.stream, <<I1/binary, I2/binary>>),
+    State#state{stream = Stream1};
+mul(State, Reg, 16) ->
+    shift_left(State, Reg, 4);
+mul(State, Reg, 32) ->
+    shift_left(State, Reg, 5);
+mul(State, Reg, 64) ->
+    shift_left(State, Reg, 6);
+mul(
+    #state{stream_module = StreamModule, available_regs = [Temp | AT]} = State0,
+    Reg,
+    Val
+) ->
+    % multiply by decomposing by power of 2
+    State1 = mov_immediate(State0#state{available_regs = AT}, Temp, Val),
+    Stream1 = State1#state.stream,
+    I = jit_riscv32_asm:mul(Reg, Reg, Temp),
+    Stream2 = StreamModule:append(Stream1, I),
+    State1#state{stream = Stream2, available_regs = [Temp | State1#state.available_regs]}.
+
+%%
+%% Analysis of AArch64 pattern and RISC-V32 implementation:
+%%
+%% AArch64 layout (from call_ext_only_test):
+%%   0x0-0x8:  Decrement reductions, store back
+%%   0xc:      b.ne 0x20   ; Branch if reductions != 0 to continuation
+%%   0x10-0x1c: adr/str/ldr/br sequence for scheduling next process
+%%   0x20:     [CONTINUATION POINT] - Actual function starts here
+%%
+%% RISC-V32 implementation (no prolog/epilog needed due to 32 registers):
+%%   0x0-0x8:  Decrement reductions, store back
+%%   0xc:      bne continuation ; Branch if reductions != 0 to continuation
+%%   0x10-0x?:  adr/sw/ldr/jalr sequence for scheduling next process
+%%   continuation: [actual function body]
+%%
+%% Key insight: With 32 registers, RISC-V32 doesn't need prolog/epilog like ARM Thumb.
+%% When reductions != 0, we branch directly to continue execution.
+%% When reductions == 0, we schedule the next process, and resume at the continuation point.
+%%
+-spec decrement_reductions_and_maybe_schedule_next(state()) -> state().
+decrement_reductions_and_maybe_schedule_next(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0
+) ->
+    % Load reduction count
+    I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    % Decrement reduction count
+    I2 = jit_riscv32_asm:addi(Temp, Temp, -1),
+    % Store back the decremented value
+    I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    BNEOffset = StreamModule:offset(Stream1),
+    % Branch if reduction count is not zero
+    I4 = jit_riscv32_asm:bne(Temp, zero, 0),
+    % Set continuation to the next instruction
+    ADROffset = BNEOffset + byte_size(I4),
+    % Use 8-byte placeholder (2 words of 0xFFFFFFFF) for pc_relative_address
+    % This ensures we can always rewrite with either auipc alone (4 bytes) or auipc+addi (8 bytes)
+    I5 = <<16#FFFFFFFF:32/little, 16#FFFFFFFF:32/little>>,
+    I6 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_CONTINUATION_OFFSET),
+    % Append the instructions to the stream
+    Stream2 = StreamModule:append(Stream1, <<I4/binary, I5/binary, I6/binary>>),
+    State1 = State0#state{stream = Stream2},
+    State2 = call_primitive_last(State1, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]),
+    % Rewrite the branch and adr instructions
+    #state{stream = Stream3} = State2,
+    NewOffset = StreamModule:offset(Stream3),
+    NewI4 = jit_riscv32_asm:bne(Temp, zero, NewOffset - BNEOffset),
+    NewI5Offset = NewOffset - ADROffset,
+    % Generate the new pc_relative_address instruction, padding with NOP if needed
+    NewI5 =
+        case pc_relative_address(Temp, NewI5Offset) of
+            I when byte_size(I) =:= 4 ->
+                % Only auipc, pad with NOP (4 bytes)
+                <<I/binary, (jit_riscv32_asm:nop())/binary>>;
+            I when byte_size(I) =:= 6 ->
+                % auipc + c.addi, pad with c.nop (2 bytes)
+                <<I/binary, (jit_riscv32_asm:c_nop())/binary>>;
+            I when byte_size(I) =:= 8 ->
+                % auipc + addi, no padding needed
+                I
+        end,
+    Stream4 = StreamModule:replace(
+        Stream3, BNEOffset, <<NewI4/binary, NewI5/binary>>
+    ),
+    merge_used_regs(State2#state{stream = Stream4}, State1#state.used_regs).
+
+-spec call_or_schedule_next(state(), non_neg_integer()) -> state().
+call_or_schedule_next(State0, Label) ->
+    {State1, RewriteOffset, TempReg} = set_cp(State0),
+    State2 = call_only_or_schedule_next(State1, Label),
+    rewrite_cp_offset(State2, RewriteOffset, TempReg).
+
+call_only_or_schedule_next(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Temp | _]
+    } = State0,
+    Label
+) ->
+    % Load reduction count (jit_state is in a1)
+    I1 = jit_riscv32_asm:lw(Temp, ?JITSTATE_REG, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    % Decrement reduction count
+    I2 = jit_riscv32_asm:addi(Temp, Temp, -1),
+    % Store back the decremented value
+    I3 = jit_riscv32_asm:sw(?JITSTATE_REG, Temp, ?JITSTATE_REDUCTIONCOUNT_OFFSET),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    % Use trampoline technique: branch if zero (eq) to skip over the long branch
+    % If not zero, we want to continue execution at Label
+    % If zero, we want to fall through to scheduling code
+
+    % Look up label once to avoid duplicate lookup in helper
+    LabelLookupResult = lists:keyfind(Label, 1, State0#state.labels),
+
+    BccOffset = StreamModule:offset(Stream1),
+
+    State4 =
+        case LabelLookupResult of
+            {Label, LabelOffset} ->
+                % Label is known, check if we can optimize the conditional branch
+                % After branch instruction
+                Rel = LabelOffset - BccOffset,
+
+                if
+                    Rel >= -4096 andalso Rel =< 4094 andalso (Rel rem 2) =:= 0 ->
+                        % Near branch: use direct conditional branch (RISC-V has ±4KB range)
+
+                        % Branch if NOT zero (temp != 0)
+                        I4 = jit_riscv32_asm:bne(Temp, zero, Rel),
+                        Stream2 = StreamModule:append(Stream1, I4),
+                        State0#state{stream = Stream2};
+                    true ->
+                        % Far branch: use trampoline with helper
+                        % Get the code block size for the far branch sequence that will follow
+
+                        % RISC-V branch is 4 bytes
+                        FarSeqOffset = BccOffset + 4,
+                        {State1, FarCodeBlock} = branch_to_label_code(
+                            State0, FarSeqOffset, Label, LabelLookupResult
+                        ),
+                        FarSeqSize = byte_size(FarCodeBlock),
+                        % Skip over the far branch sequence if zero (temp == 0)
+                        I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4),
+                        Stream2 = StreamModule:append(Stream1, I4),
+                        Stream3 = StreamModule:append(Stream2, FarCodeBlock),
+                        State1#state{stream = Stream3}
+                end;
+            false ->
+                % Label not known, get the far branch size for the skip
+
+                % RISC-V branch is 4 bytes
+                FarSeqOffset = BccOffset + 4,
+                {State1, FarCodeBlock} = branch_to_label_code(State0, FarSeqOffset, Label, false),
+                FarSeqSize = byte_size(FarCodeBlock),
+                I4 = jit_riscv32_asm:beq(Temp, zero, FarSeqSize + 4),
+                Stream2 = StreamModule:append(Stream1, I4),
+                Stream3 = StreamModule:append(Stream2, FarCodeBlock),
+                State1#state{stream = Stream3}
+        end,
+    State5 = set_continuation_to_label(State4, Label),
+    call_primitive_last(State5, ?PRIM_SCHEDULE_NEXT_CP, [ctx, jit_state]).
+
+call_primitive_with_cp(State0, Primitive, Args) ->
+    {State1, RewriteOffset, TempReg} = set_cp(State0),
+    State2 = call_primitive_last(State1, Primitive, Args),
+    rewrite_cp_offset(State2, RewriteOffset, TempReg).
+
+-spec set_cp(state()) -> {state(), non_neg_integer(), riscv32_register()}.
+set_cp(#state{available_regs = [TempReg | AvailT], used_regs = UsedRegs} = State0) ->
+    % Reserve a temporary register for the offset BEFORE calling get_module_index
+    % to avoid running out of available registers
+    State0b = State0#state{available_regs = AvailT, used_regs = [TempReg | UsedRegs]},
+    % get module index (dynamically)
+    {
+        #state{stream_module = StreamModule, stream = Stream0} = State1,
+        Reg
+    } = get_module_index(
+        State0b
+    ),
+
+    Offset = StreamModule:offset(Stream0),
+    % build cp with module_index << 24
+    I1 = jit_riscv32_asm:slli(Reg, Reg, 24),
+    % Reserve space for offset load instruction
+    % li can generate 1 instruction (4 bytes) for small immediates (< 2048)
+    % or 2 instructions (8 bytes) for large immediates
+    % Since we don't know the final CP value yet (it depends on code size),
+    % we must always reserve 2 instructions (8 bytes) to be safe
+    % The final CP value is (final_offset << 2), and final_offset is unknown
+    % Use 0xFFFFFFFF placeholders for flash compatibility (can only flip 1->0)
+    I2 = <<16#FFFFFFFF:32/little>>,
+    I3 = <<16#FFFFFFFF:32/little>>,
+    MOVOffset = Offset + byte_size(I1),
+    % OR the module index with the offset (loaded in temp register)
+    I4 = jit_riscv32_asm:or_(Reg, TempReg),
+    {BaseReg, Off} = ?CP,
+    I5 = jit_riscv32_asm:sw(BaseReg, Reg, Off),
+    Code = <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    State2 = State1#state{stream = Stream1},
+    State3 = free_native_register(State2, Reg),
+    State4 = free_native_register(State3, TempReg),
+    {State4, MOVOffset, TempReg}.
+
+-spec rewrite_cp_offset(state(), non_neg_integer(), riscv32_register()) -> state().
+rewrite_cp_offset(
+    #state{stream_module = StreamModule, stream = Stream0, offset = CodeOffset} = State0,
+    RewriteOffset,
+    TempReg
+) ->
+    NewOffset = StreamModule:offset(Stream0) - CodeOffset,
+    CPValue = NewOffset bsl 2,
+    NewMoveInstr = jit_riscv32_asm:li(TempReg, CPValue),
+    % We reserved 8 bytes (2 instructions) for the CP value
+    % Pad with NOP if needed to maintain alignment
+    PaddedInstr =
+        case byte_size(NewMoveInstr) of
+            4 -> <<NewMoveInstr/binary, (jit_riscv32_asm:nop())/binary>>;
+            6 -> <<NewMoveInstr/binary, (jit_riscv32_asm:c_nop())/binary>>;
+            8 -> NewMoveInstr
+        end,
+    Stream1 = StreamModule:replace(Stream0, RewriteOffset, PaddedInstr),
+    State0#state{stream = Stream1}.
+
+set_bs(
+    #state{stream_module = StreamModule, stream = Stream0, available_regs = [Temp | _]} = State0,
+    TermReg
+) ->
+    {BaseReg1, Off1} = ?BS,
+    I1 = jit_riscv32_asm:sw(BaseReg1, TermReg, Off1),
+    I2 = jit_riscv32_asm:li(Temp, 0),
+    {BaseReg2, Off2} = ?BS_OFFSET,
+    I3 = jit_riscv32_asm:sw(BaseReg2, Temp, Off2),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary, I3/binary>>),
+    State0#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @param State current state
+%% @param SortedLines line information, sorted by offset
+%% @doc Build labels and line tables and encode a function that returns it.
+%% In this case, the function returns the effective address of what immediately
+%% follows.
+%% @end
+%% @return New state
+%%-----------------------------------------------------------------------------
+return_labels_and_lines(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        labels = Labels
+    } = State,
+    SortedLines
+) ->
+    SortedLabels = lists:keysort(2, [
+        {Label, LabelOffset}
+     || {Label, LabelOffset} <- Labels, is_integer(Label)
+    ]),
+
+    I2 = jit_riscv32_asm:ret(),
+    % Assume total size is 10 bytes (8-byte I1 + 2-byte c.ret)
+    % If actual is 8 bytes (6-byte I1 + 2-byte c.ret), we'll pad with 2 bytes
+    I1 = pc_relative_address(a0, 10),
+    Prologue = <<I1/binary, I2/binary>>,
+    ProloguePadded =
+        case byte_size(Prologue) of
+            10 -> Prologue;
+            % 2-byte padding
+            8 -> <<Prologue/binary, 16#FFFF:16>>
+        end,
+    LabelsTable = <<<<Label:16, Offset:32>> || {Label, Offset} <- SortedLabels>>,
+    LinesTable = <<<<Line:16, Offset:32>> || {Line, Offset} <- SortedLines>>,
+    Stream1 = StreamModule:append(
+        Stream0,
+        <<ProloguePadded/binary, (length(SortedLabels)):16, LabelsTable/binary,
+            (length(SortedLines)):16, LinesTable/binary>>
+    ),
+    State#state{stream = Stream1}.
+
+%% @doc Generate PC-relative address calculation using AUIPC + ADDI
+%% This replaces the ARM-style 'adr' pseudo-instruction with native RISC-V instructions
+-spec pc_relative_address(riscv32_register(), integer()) -> binary().
+pc_relative_address(Rd, 0) ->
+    % Simple case: just get current PC
+    jit_riscv32_asm:auipc(Rd, 0);
+pc_relative_address(Rd, Offset) ->
+    % PC-relative address calculation
+    % Split offset into upper 20 bits and lower 12 bits
+    % AUIPC can represent offsets in range: (-524288 << 12) to (524287 << 12)
+    % Combined with ADDI: (-524288 << 12) - 2048 to (524287 << 12) + 2047
+    Lower = Offset band 16#FFF,
+    % Sign extend lower 12 bits
+    LowerSigned =
+        if
+            Lower >= 16#800 -> Lower - 16#1000;
+            true -> Lower
+        end,
+    % Compute upper 20 bits, adjusting if lower is negative
+    % Use arithmetic right shift (bsr) which preserves sign in Erlang
+    Upper =
+        if
+            LowerSigned < 0 ->
+                (Offset bsr 12) + 1;
+            true ->
+                Offset bsr 12
+        end,
+    % Validate that Upper is in valid range for AUIPC
+    if
+        Upper < -16#80000; Upper > 16#7FFFF ->
+            error({offset_out_of_range, Offset, Upper, -16#80000, 16#7FFFF});
+        true ->
+            ok
+    end,
+    case {Upper, LowerSigned} of
+        {0, 0} ->
+            % Zero offset
+            jit_riscv32_asm:auipc(Rd, 0);
+        {0, _} ->
+            % Only lower bits needed: auipc + addi
+            AuipcInstr = jit_riscv32_asm:auipc(Rd, 0),
+            AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned),
+            <<AuipcInstr/binary, AddiInstr/binary>>;
+        {_, 0} ->
+            % Only upper bits needed
+            jit_riscv32_asm:auipc(Rd, Upper);
+        {_, _} ->
+            % Both upper and lower bits
+            AuipcInstr = jit_riscv32_asm:auipc(Rd, Upper),
+            AddiInstr = jit_riscv32_asm:addi(Rd, Rd, LowerSigned),
+            <<AuipcInstr/binary, AddiInstr/binary>>
+    end.
+
+%% Helper function to generate str instruction with y_reg offset, handling large offsets
+str_y_reg(SrcReg, Y, TempReg, _AvailableRegs) when Y * 4 =< 124 ->
+    % Small offset - use immediate addressing
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:sw(TempReg, SrcReg, Y * 4),
+    <<I1/binary, I2/binary>>;
+str_y_reg(SrcReg, Y, TempReg1, [TempReg2 | _]) ->
+    % Large offset - use register arithmetic with second available register
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off),
+    I2 = jit_riscv32_asm:li(TempReg2, Offset),
+    I3 = jit_riscv32_asm:add(TempReg2, TempReg2, TempReg1),
+    I4 = jit_riscv32_asm:sw(TempReg2, SrcReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary>>;
+str_y_reg(SrcReg, Y, TempReg1, []) ->
+    % Large offset - no additional registers available, use IP_REG as second temp
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg1, BaseReg, Off),
+    I2 = jit_riscv32_asm:mv(?IP_REG, TempReg1),
+    I3 = jit_riscv32_asm:li(TempReg1, Offset),
+    I4 = jit_riscv32_asm:add(TempReg1, TempReg1, ?IP_REG),
+    I5 = jit_riscv32_asm:sw(TempReg1, SrcReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>.
+
+%% Helper function to generate ldr instruction with y_reg offset, handling large offsets
+ldr_y_reg(DstReg, Y, [TempReg | _]) when Y * 4 =< 124 ->
+    % Small offset - use immediate addressing
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(DstReg, TempReg, Y * 4),
+    <<I1/binary, I2/binary>>;
+ldr_y_reg(DstReg, Y, [TempReg | _]) ->
+    % Large offset - use DstReg as second temp register for arithmetic
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(TempReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:li(DstReg, Offset),
+    I3 = jit_riscv32_asm:add(DstReg, DstReg, TempReg),
+    I4 = jit_riscv32_asm:lw(DstReg, DstReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary>>;
+ldr_y_reg(DstReg, Y, []) when Y * 4 =< 124 ->
+    % Small offset, no registers available - use DstReg as temp
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:lw(DstReg, DstReg, Y * 4),
+    <<I1/binary, I2/binary>>;
+ldr_y_reg(DstReg, Y, []) ->
+    % Large offset, no registers available - use IP_REG as temp register
+    % Note: IP_REG (t3) can only be used with mov, not ldr directly
+    Offset = Y * 4,
+    {BaseReg, Off} = ?Y_REGS,
+    I1 = jit_riscv32_asm:lw(DstReg, BaseReg, Off),
+    I2 = jit_riscv32_asm:mv(?IP_REG, DstReg),
+    I3 = jit_riscv32_asm:li(DstReg, Offset),
+    I4 = jit_riscv32_asm:add(DstReg, DstReg, ?IP_REG),
+    I5 = jit_riscv32_asm:lw(DstReg, DstReg, 0),
+    <<I1/binary, I2/binary, I3/binary, I4/binary, I5/binary>>.
+
+free_reg(AvailableRegs0, UsedRegs0, Reg) when ?IS_GPR(Reg) ->
+    AvailableRegs1 = free_reg0(?AVAILABLE_REGS, AvailableRegs0, Reg, []),
+    true = lists:member(Reg, UsedRegs0),
+    UsedRegs1 = lists:delete(Reg, UsedRegs0),
+    {AvailableRegs1, UsedRegs1}.
+
+free_reg0([Reg | _SortedT], PrevRegs0, Reg, Acc) ->
+    lists:reverse(Acc, [Reg | PrevRegs0]);
+free_reg0([PrevReg | SortedT], [PrevReg | PrevT], Reg, Acc) ->
+    free_reg0(SortedT, PrevT, Reg, [PrevReg | Acc]);
+free_reg0([_Other | SortedT], PrevRegs, Reg, Acc) ->
+    free_reg0(SortedT, PrevRegs, Reg, Acc).
+
+args_regs(Args) ->
+    lists:map(
+        fun
+            ({free, {ptr, Reg}}) -> Reg;
+            ({free, Reg}) when is_atom(Reg) -> Reg;
+            ({free, Imm}) when is_integer(Imm) -> imm;
+            (offset) -> imm;
+            (ctx) -> ?CTX_REG;
+            (jit_state) -> jit_state;
+            (jit_state_tail_call) -> jit_state;
+            (stack) -> stack;
+            (Reg) when is_atom(Reg) -> Reg;
+            (Imm) when is_integer(Imm) -> imm;
+            ({ptr, Reg}) -> Reg;
+            ({x_reg, _}) -> ?CTX_REG;
+            ({y_reg, _}) -> ?CTX_REG;
+            ({fp_reg, _}) -> ?CTX_REG;
+            ({free, {x_reg, _}}) -> ?CTX_REG;
+            ({free, {y_reg, _}}) -> ?CTX_REG;
+            ({free, {fp_reg, _}}) -> ?CTX_REG;
+            ({avm_int64_t, _}) -> imm
+        end,
+        Args
+    ).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at the current offset.
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference()) -> state().
+add_label(#state{stream_module = StreamModule, stream = Stream0} = State0, Label) ->
+    Offset0 = StreamModule:offset(Stream0),
+    add_label(State0, Label, Offset0).
+
+%%-----------------------------------------------------------------------------
+%% @doc Add a label at a specific offset
+%% @end
+%% @param State current backend state
+%% @param Label the label number or reference
+%% @param Offset the explicit offset for this label
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+-spec add_label(state(), integer() | reference(), integer()) -> state().
+add_label(#state{labels = Labels} = State, Label, Offset) ->
+    State#state{labels = [{Label, Offset} | Labels]}.
+
+-ifdef(JIT_DWARF).
+%%-----------------------------------------------------------------------------
+%% @doc Return the DWARF register number for the ctx parameter
+%% @returns The DWARF register number where ctx is passed (a0 in RISC-V)
+%% @end
+%%-----------------------------------------------------------------------------
+-spec dwarf_ctx_register() -> non_neg_integer().
+dwarf_ctx_register() ->
+    ?DWARF_A0_REG_RISCV32.
+-endif.
diff --git a/libs/jit/src/jit_riscv32_asm.erl b/libs/jit/src/jit_riscv32_asm.erl
new file mode 100644
index 0000000000..25bf1ff574
--- /dev/null
+++ b/libs/jit/src/jit_riscv32_asm.erl
@@ -0,0 +1,1802 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32_asm).
+
+-export([
+    % R-type arithmetic and logical instructions
+    add/3,
+    sub/3,
+    and_/3,
+    or_/2,
+    or_/3,
+    xor_/3,
+    sll/3,
+    srl/3,
+    sra/3,
+    slt/3,
+    sltu/3,
+    % I-type immediate instructions
+    addi/3,
+    andi/3,
+    ori/3,
+    xori/3,
+    slli/3,
+    srli/3,
+    srai/3,
+    slti/3,
+    sltiu/3,
+    % Load instructions
+    lw/2,
+    lw/3,
+    lh/2,
+    lh/3,
+    lhu/2,
+    lhu/3,
+    lb/2,
+    lb/3,
+    lbu/2,
+    lbu/3,
+    % Store instructions
+    sw/2,
+    sw/3,
+    sh/2,
+    sh/3,
+    sb/2,
+    sb/3,
+    % Branch instructions
+    beq/3,
+    bne/3,
+    blt/3,
+    bge/3,
+    bltu/3,
+    bgeu/3,
+    % Jump instructions
+    jal/2,
+    jalr/3,
+    jalr/2,
+    % Upper immediate instructions
+    lui/2,
+    auipc/2,
+    % Pseudo-instructions
+    nop/0,
+    li/2,
+    mv/2,
+    not_/2,
+    neg/2,
+    j/1,
+    jr/1,
+    ret/0,
+    call/2,
+    % M extension (multiply/divide)
+    mul/3,
+    % C extension (compressed) - arithmetic/logical
+    c_add/2,
+    c_sub/2,
+    c_and/2,
+    c_or/2,
+    c_xor/2,
+    c_mv/2,
+    % C extension - immediate instructions
+    c_addi/2,
+    c_andi/2,
+    c_li/2,
+    c_lui/2,
+    c_addi16sp/1,
+    c_addi4spn/2,
+    % C extension - shift instructions
+    c_slli/2,
+    c_srli/2,
+    c_srai/2,
+    % C extension - load/store
+    c_lw/2,
+    c_sw/2,
+    c_lwsp/2,
+    c_swsp/2,
+    % C extension - branches and jumps
+    c_beqz/2,
+    c_bnez/2,
+    c_j/1,
+    c_jal/1,
+    c_jr/1,
+    c_jalr/1,
+    % C extension - system instructions
+    c_ebreak/0,
+    % C extension - pseudo-instructions
+    c_nop/0
+]).
+
+-export_type([
+    riscv_register/0
+]).
+
+%% RISC-V 32-bit (RV32I) Assembler
+%%
+%% This module provides an assembler for the RISC-V 32-bit instruction set.
+%% It generates binary machine code for RISC-V instructions following the
+%% RV32I base integer instruction set architecture.
+%%
+%% RISC-V Register Set (32 registers):
+%%   x0  (zero) - Hardwired zero (reads as 0, writes ignored)
+%%   x1  (ra)   - Return address
+%%   x2  (sp)   - Stack pointer
+%%   x3  (gp)   - Global pointer
+%%   x4  (tp)   - Thread pointer
+%%   x5  (t0)   - Temporary register 0
+%%   x6  (t1)   - Temporary register 1
+%%   x7  (t2)   - Temporary register 2
+%%   x8  (s0/fp)- Saved register 0 / Frame pointer
+%%   x9  (s1)   - Saved register 1
+%%   x10 (a0)   - Function argument 0 / Return value 0
+%%   x11 (a1)   - Function argument 1 / Return value 1
+%%   x12 (a2)   - Function argument 2
+%%   x13 (a3)   - Function argument 3
+%%   x14 (a4)   - Function argument 4
+%%   x15 (a5)   - Function argument 5
+%%   x16 (a6)   - Function argument 6
+%%   x17 (a7)   - Function argument 7
+%%   x18 (s2)   - Saved register 2
+%%   x19 (s3)   - Saved register 3
+%%   x20 (s4)   - Saved register 4
+%%   x21 (s5)   - Saved register 5
+%%   x22 (s6)   - Saved register 6
+%%   x23 (s7)   - Saved register 7
+%%   x24 (s8)   - Saved register 8
+%%   x25 (s9)   - Saved register 9
+%%   x26 (s10)  - Saved register 10
+%%   x27 (s11)  - Saved register 11
+%%   x28 (t3)   - Temporary register 3
+%%   x29 (t4)   - Temporary register 4
+%%   x30 (t5)   - Temporary register 5
+%%   x31 (t6)   - Temporary register 6
+%%
+%% RISC-V Calling Convention (ILP32):
+%%   - Arguments: a0-a7 (x10-x17)
+%%   - Return values: a0-a1 (x10-x11)
+%%   - Caller-saved: t0-t6, a0-a7
+%%   - Callee-saved: s0-s11, sp, ra
+%%   - Stack grows downward
+%%   - Stack must be 16-byte aligned at function call boundaries
+%%
+%% Instruction Encoding:
+%%   All RV32I instructions are 32 bits (4 bytes).
+%%   Bit ordering is little-endian within each 32-bit word.
+%%
+%% See: RISC-V Instruction Set Manual, Volume I: User-Level ISA
+%% https://riscv.org/technical/specifications/
+%% https://github.com/riscv/riscv-isa-manual/
+
+-type riscv_register() ::
+    zero
+    | ra
+    | sp
+    | gp
+    | tp
+    | t0
+    | t1
+    | t2
+    | s0
+    | fp
+    | s1
+    | a0
+    | a1
+    | a2
+    | a3
+    | a4
+    | a5
+    | a6
+    | a7
+    | s2
+    | s3
+    | s4
+    | s5
+    | s6
+    | s7
+    | s8
+    | s9
+    | s10
+    | s11
+    | t3
+    | t4
+    | t5
+    | t6.
+
+%%-----------------------------------------------------------------------------
+%% Helper functions
+%%-----------------------------------------------------------------------------
+
+%% Convert register atoms to register numbers (0-31)
+-spec reg_to_num(riscv_register()) -> 0..31.
+% ABI names
+reg_to_num(zero) -> 0;
+reg_to_num(ra) -> 1;
+reg_to_num(sp) -> 2;
+reg_to_num(gp) -> 3;
+reg_to_num(tp) -> 4;
+reg_to_num(t0) -> 5;
+reg_to_num(t1) -> 6;
+reg_to_num(t2) -> 7;
+reg_to_num(s0) -> 8;
+reg_to_num(fp) -> 8;
+reg_to_num(s1) -> 9;
+reg_to_num(a0) -> 10;
+reg_to_num(a1) -> 11;
+reg_to_num(a2) -> 12;
+reg_to_num(a3) -> 13;
+reg_to_num(a4) -> 14;
+reg_to_num(a5) -> 15;
+reg_to_num(a6) -> 16;
+reg_to_num(a7) -> 17;
+reg_to_num(s2) -> 18;
+reg_to_num(s3) -> 19;
+reg_to_num(s4) -> 20;
+reg_to_num(s5) -> 21;
+reg_to_num(s6) -> 22;
+reg_to_num(s7) -> 23;
+reg_to_num(s8) -> 24;
+reg_to_num(s9) -> 25;
+reg_to_num(s10) -> 26;
+reg_to_num(s11) -> 27;
+reg_to_num(t3) -> 28;
+reg_to_num(t4) -> 29;
+reg_to_num(t5) -> 30;
+reg_to_num(t6) -> 31.
+
+%%-----------------------------------------------------------------------------
+%% R-type instruction encoding
+%%-----------------------------------------------------------------------------
+
+%% R-type instruction format:
+%% funct7 (7) | rs2 (5) | rs1 (5) | funct3 (3) | rd (5) | opcode (7)
+%% Bits:  31-25     24-20     19-15     14-12      11-7      6-0
+
+-spec encode_r_type(
+    Opcode :: 0..127,
+    Rd :: riscv_register(),
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Funct7 :: 0..127
+) -> binary().
+encode_r_type(Opcode, Rd, Funct3, Rs1, Rs2, Funct7) ->
+    RdNum = reg_to_num(Rd),
+    Rs1Num = reg_to_num(Rs1),
+    Rs2Num = reg_to_num(Rs2),
+    Instr =
+        (Funct7 bsl 25) bor
+            (Rs2Num bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (RdNum bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% R-type arithmetic and logical instructions
+%%-----------------------------------------------------------------------------
+
+%% ADD - Add
+%% rd = rs1 + rs2
+-spec add(riscv_register(), riscv_register(), riscv_register()) -> binary().
+add(Rd, Rs1, Rs2) when Rd =:= Rs1, Rd =/= zero, Rs2 =/= zero ->
+    % Use c.add when rd == rs1 and neither register is zero
+    c_add(Rd, Rs2);
+add(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#00).
+
+%% SUB - Subtract
+%% rd = rs1 - rs2
+-spec sub(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sub(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_sub(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20)
+    end;
+sub(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0100000
+    encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#20).
+
+%% AND - Bitwise AND
+%% rd = rs1 & rs2
+-spec and_(riscv_register(), riscv_register(), riscv_register()) -> binary().
+and_(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_and(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00)
+    end;
+and_(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 111, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#7, Rs1, Rs2, 16#00).
+
+%% OR - Bitwise OR
+%% rd = rs1 | rs2
+-spec or_(riscv_register(), riscv_register(), riscv_register()) -> binary().
+or_(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_or(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00)
+    end;
+or_(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 110, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#6, Rs1, Rs2, 16#00).
+
+%% OR - Bitwise OR (in-place)
+%% rd = rd | rs
+-spec or_(riscv_register(), riscv_register()) -> binary().
+or_(Rd, Rs) ->
+    or_(Rd, Rd, Rs).
+
+%% XOR - Bitwise XOR
+%% rd = rs1 ^ rs2
+-spec xor_(riscv_register(), riscv_register(), riscv_register()) -> binary().
+xor_(Rd, Rs1, Rs2) when Rd =:= Rs1 ->
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs2) of
+        true -> c_xor(Rd, Rs2);
+        false -> encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00)
+    end;
+xor_(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 100, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#4, Rs1, Rs2, 16#00).
+
+%% SLL - Shift Left Logical
+%% rd = rs1 << rs2[4:0]
+-spec sll(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sll(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 001, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#1, Rs1, Rs2, 16#00).
+
+%% SRL - Shift Right Logical
+%% rd = rs1 >> rs2[4:0] (zero-extend)
+-spec srl(riscv_register(), riscv_register(), riscv_register()) -> binary().
+srl(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#00).
+
+%% SRA - Shift Right Arithmetic
+%% rd = rs1 >> rs2[4:0] (sign-extend)
+-spec sra(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sra(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 101, Funct7: 0100000
+    encode_r_type(16#33, Rd, 16#5, Rs1, Rs2, 16#20).
+
+%% SLT - Set Less Than
+%% rd = (rs1 < rs2) ? 1 : 0 (signed)
+-spec slt(riscv_register(), riscv_register(), riscv_register()) -> binary().
+slt(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 010, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#2, Rs1, Rs2, 16#00).
+
+%% SLTU - Set Less Than Unsigned
+%% rd = (rs1 < rs2) ? 1 : 0 (unsigned)
+-spec sltu(riscv_register(), riscv_register(), riscv_register()) -> binary().
+sltu(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 011, Funct7: 0000000
+    encode_r_type(16#33, Rd, 16#3, Rs1, Rs2, 16#00).
+
+%%-----------------------------------------------------------------------------
+%% I-type instruction encoding
+%%-----------------------------------------------------------------------------
+
+%% I-type instruction format:
+%% imm[11:0] (12) | rs1 (5) | funct3 (3) | rd (5) | opcode (7)
+%% Bits:  31-20       19-15     14-12      11-7      6-0
+
+-spec encode_i_type(
+    Opcode :: 0..127,
+    Rd :: riscv_register(),
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Imm :: integer()
+) -> binary().
+encode_i_type(Opcode, Rd, Funct3, Rs1, Imm) ->
+    RdNum = reg_to_num(Rd),
+    Rs1Num = reg_to_num(Rs1),
+    % Sign-extend and mask to 12 bits
+    ImmMasked = Imm band 16#FFF,
+    Instr =
+        (ImmMasked bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (RdNum bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% I-type immediate arithmetic and logical instructions
+%%-----------------------------------------------------------------------------
+
+%% ADDI - Add Immediate
+%% rd = rs1 + imm
+-spec addi(riscv_register(), riscv_register(), integer()) -> binary().
+addi(Rd, Rs1, Imm) when Rd =:= Rs1, Rd =/= zero, Imm >= -32, Imm =< 31 ->
+    % Use c.addi when rd == rs1, rd != zero, and imm fits in 6 bits (signed)
+    c_addi(Rd, Imm);
+addi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 000
+    encode_i_type(16#13, Rd, 16#0, Rs1, Imm);
+addi(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% ANDI - AND Immediate
+%% rd = rs1 & imm
+-spec andi(riscv_register(), riscv_register(), integer()) -> binary().
+andi(Rd, Rs1, Imm) when Rd =:= Rs1, Imm >= -32, Imm =< 31 ->
+    case is_compressed_reg(Rd) of
+        true -> c_andi(Rd, Imm);
+        false -> encode_i_type(16#13, Rd, 16#7, Rs1, Imm)
+    end;
+andi(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 111
+    encode_i_type(16#13, Rd, 16#7, Rs1, Imm);
+andi(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% ORI - OR Immediate
+%% rd = rs1 | imm
+-spec ori(riscv_register(), riscv_register(), integer()) -> binary().
+ori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 110
+    encode_i_type(16#13, Rd, 16#6, Rs1, Imm);
+ori(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% XORI - XOR Immediate
+%% rd = rs1 ^ imm
+-spec xori(riscv_register(), riscv_register(), integer()) -> binary().
+xori(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 100
+    encode_i_type(16#13, Rd, 16#4, Rs1, Imm);
+xori(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% SLTI - Set Less Than Immediate
+%% rd = (rs1 < imm) ? 1 : 0 (signed)
+-spec slti(riscv_register(), riscv_register(), integer()) -> binary().
+slti(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 010
+    encode_i_type(16#13, Rd, 16#2, Rs1, Imm);
+slti(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%% SLTIU - Set Less Than Immediate Unsigned
+%% rd = (rs1 < imm) ? 1 : 0 (unsigned)
+-spec sltiu(riscv_register(), riscv_register(), integer()) -> binary().
+sltiu(Rd, Rs1, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Opcode: 0010011 (0x13), Funct3: 011
+    encode_i_type(16#13, Rd, 16#3, Rs1, Imm);
+sltiu(_Rd, _Rs1, Imm) ->
+    error({immediate_out_of_range, Imm, -2048, 2047}).
+
+%%-----------------------------------------------------------------------------
+%% I-type immediate shift instructions
+%%-----------------------------------------------------------------------------
+
+%% SLLI - Shift Left Logical Immediate
+%% rd = rs1 << shamt
+-spec slli(riscv_register(), riscv_register(), 0..31) -> binary().
+slli(Rd, Rs1, Shamt) when Rd =:= Rs1, Rd =/= zero, Shamt >= 1, Shamt =< 31 ->
+    % Use c.slli when rd == rs1, rd != zero, and shamt != 0 (c.slli with shamt=0 is reserved)
+    c_slli(Rd, Shamt);
+slli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 ->
+    % Opcode: 0010011 (0x13), Funct3: 001, Imm[11:5] = 0000000
+    encode_i_type(16#13, Rd, 16#1, Rs1, Shamt);
+slli(_Rd, _Rs1, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 31}).
+
+%% SRLI - Shift Right Logical Immediate
+%% rd = rs1 >> shamt (zero-extend)
+-spec srli(riscv_register(), riscv_register(), 0..31) -> binary().
+srli(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 ->
+    case is_compressed_reg(Rd) of
+        true -> c_srli(Rd, Shamt);
+        false -> encode_i_type(16#13, Rd, 16#5, Rs1, Shamt)
+    end;
+srli(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 ->
+    % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0000000
+    encode_i_type(16#13, Rd, 16#5, Rs1, Shamt);
+srli(_Rd, _Rs1, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 31}).
+
+%% SRAI - Shift Right Arithmetic Immediate
+%% rd = rs1 >> shamt (sign-extend)
+-spec srai(riscv_register(), riscv_register(), 0..31) -> binary().
+srai(Rd, Rs1, Shamt) when Rd =:= Rs1, Shamt >= 0, Shamt =< 31 ->
+    case is_compressed_reg(Rd) of
+        true ->
+            c_srai(Rd, Shamt);
+        false ->
+            ImmWithBit30 = Shamt bor (1 bsl 10),
+            encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30)
+    end;
+srai(Rd, Rs1, Shamt) when Shamt >= 0, Shamt =< 31 ->
+    % Opcode: 0010011 (0x13), Funct3: 101, Imm[11:5] = 0100000
+    % The encoding uses bit 30 (Imm[10]) to distinguish SRAI from SRLI
+    ImmWithBit30 = Shamt bor (1 bsl 10),
+    encode_i_type(16#13, Rd, 16#5, Rs1, ImmWithBit30);
+srai(_Rd, _Rs1, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 31}).
+
+%%-----------------------------------------------------------------------------
+%% Load instructions (I-type)
+%%-----------------------------------------------------------------------------
+
+%% LW - Load Word
+%% rd = mem[rs1 + offset] (32-bit)
+-spec lw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lw(Rd, {Rs1, Offset}) ->
+    lw(Rd, Rs1, Offset);
+lw(Rd, Rs1) when is_atom(Rs1) ->
+    lw(Rd, Rs1, 0).
+
+-spec lw(riscv_register(), riscv_register(), integer()) -> binary().
+lw(Rd, sp, Offset) when Rd =/= zero, Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 ->
+    % Use c.lwsp for loads from sp with aligned offset in range
+    c_lwsp(Rd, Offset);
+lw(Rd, Rs1, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 ->
+    % Use c.lw when both registers are in compressed set and offset is aligned
+    case is_compressed_reg(Rd) andalso is_compressed_reg(Rs1) of
+        true -> c_lw(Rd, {Rs1, Offset});
+        false -> encode_i_type(16#03, Rd, 16#2, Rs1, Offset)
+    end;
+lw(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 010
+    encode_i_type(16#03, Rd, 16#2, Rs1, Offset);
+lw(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LH - Load Halfword (sign-extended)
+%% rd = sign_extend(mem[rs1 + offset][15:0])
+-spec lh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lh(Rd, {Rs1, Offset}) ->
+    lh(Rd, Rs1, Offset);
+lh(Rd, Rs1) when is_atom(Rs1) ->
+    lh(Rd, Rs1, 0).
+
+-spec lh(riscv_register(), riscv_register(), integer()) -> binary().
+lh(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 001
+    encode_i_type(16#03, Rd, 16#1, Rs1, Offset);
+lh(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LHU - Load Halfword Unsigned (zero-extended)
+%% rd = zero_extend(mem[rs1 + offset][15:0])
+-spec lhu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lhu(Rd, {Rs1, Offset}) ->
+    lhu(Rd, Rs1, Offset);
+lhu(Rd, Rs1) when is_atom(Rs1) ->
+    lhu(Rd, Rs1, 0).
+
+-spec lhu(riscv_register(), riscv_register(), integer()) -> binary().
+lhu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 101
+    encode_i_type(16#03, Rd, 16#5, Rs1, Offset);
+lhu(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LB - Load Byte (sign-extended)
+%% rd = sign_extend(mem[rs1 + offset][7:0])
+-spec lb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lb(Rd, {Rs1, Offset}) ->
+    lb(Rd, Rs1, Offset);
+lb(Rd, Rs1) when is_atom(Rs1) ->
+    lb(Rd, Rs1, 0).
+
+-spec lb(riscv_register(), riscv_register(), integer()) -> binary().
+lb(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 000
+    encode_i_type(16#03, Rd, 16#0, Rs1, Offset);
+lb(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% LBU - Load Byte Unsigned (zero-extended)
+%% rd = zero_extend(mem[rs1 + offset][7:0])
+-spec lbu({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+lbu(Rd, {Rs1, Offset}) ->
+    lbu(Rd, Rs1, Offset);
+lbu(Rd, Rs1) when is_atom(Rs1) ->
+    lbu(Rd, Rs1, 0).
+
+-spec lbu(riscv_register(), riscv_register(), integer()) -> binary().
+lbu(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0000011 (0x03), Funct3: 100
+    encode_i_type(16#03, Rd, 16#4, Rs1, Offset);
+lbu(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%%-----------------------------------------------------------------------------
+%% S-type instruction encoding (for stores)
+%%-----------------------------------------------------------------------------
+
+%% S-type instruction format:
+%% imm[11:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:0] (5) | opcode (7)
+%% Bits:  31-25      24-20     19-15     14-12      11-7          6-0
+
+-spec encode_s_type(
+    Opcode :: 0..127,
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Imm :: integer()
+) -> binary().
+encode_s_type(Opcode, Funct3, Rs1, Rs2, Imm) ->
+    Rs1Num = reg_to_num(Rs1),
+    Rs2Num = reg_to_num(Rs2),
+    % Split immediate: imm[11:5] goes to bits 31-25, imm[4:0] goes to bits 11-7
+    ImmMasked = Imm band 16#FFF,
+    Imm11_5 = (ImmMasked bsr 5) band 16#7F,
+    Imm4_0 = ImmMasked band 16#1F,
+    Instr =
+        (Imm11_5 bsl 25) bor
+            (Rs2Num bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (Imm4_0 bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% Store instructions (S-type)
+%%-----------------------------------------------------------------------------
+
+%% SW - Store Word
+%% mem[rs1 + offset] = rs2[31:0]
+-spec sw({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+sw(Rs2, {Rs1, Offset}) ->
+    sw(Rs1, Rs2, Offset);
+sw(Rs2, Rs1) when is_atom(Rs1) ->
+    sw(Rs1, Rs2, 0).
+
+-spec sw(riscv_register(), riscv_register(), integer()) -> binary().
+sw(sp, Rs2, Offset) when Offset >= 0, Offset =< 252, Offset rem 4 =:= 0 ->
+    % Use c.swsp for stores to sp with aligned offset in range
+    c_swsp(Rs2, Offset);
+sw(Rs1, Rs2, Offset) when Offset >= 0, Offset =< 124, Offset rem 4 =:= 0 ->
+    % Use c.sw when both registers are in compressed set and offset is aligned
+    case is_compressed_reg(Rs1) andalso is_compressed_reg(Rs2) of
+        true -> c_sw(Rs2, {Rs1, Offset});
+        false -> encode_s_type(16#23, 16#2, Rs1, Rs2, Offset)
+    end;
+sw(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0100011 (0x23), Funct3: 010
+    encode_s_type(16#23, 16#2, Rs1, Rs2, Offset);
+sw(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% SH - Store Halfword
+%% mem[rs1 + offset][15:0] = rs2[15:0]
+-spec sh({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+sh(Rs2, {Rs1, Offset}) ->
+    sh(Rs1, Rs2, Offset);
+sh(Rs2, Rs1) when is_atom(Rs1) ->
+    sh(Rs1, Rs2, 0).
+
+-spec sh(riscv_register(), riscv_register(), integer()) -> binary().
+sh(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0100011 (0x23), Funct3: 001
+    encode_s_type(16#23, 16#1, Rs1, Rs2, Offset);
+sh(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% SB - Store Byte
+%% mem[rs1 + offset][7:0] = rs2[7:0]
+-spec sb({riscv_register(), integer()} | riscv_register(), riscv_register() | integer()) ->
+    binary().
+sb(Rs2, {Rs1, Offset}) ->
+    sb(Rs1, Rs2, Offset);
+sb(Rs2, Rs1) when is_atom(Rs1) ->
+    sb(Rs1, Rs2, 0).
+
+-spec sb(riscv_register(), riscv_register(), integer()) -> binary().
+sb(Rs1, Rs2, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 0100011 (0x23), Funct3: 000
+    encode_s_type(16#23, 16#0, Rs1, Rs2, Offset);
+sb(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%%-----------------------------------------------------------------------------
+%% B-type instruction encoding (for branches)
+%%-----------------------------------------------------------------------------
+
+%% B-type instruction format:
+%% imm[12|10:5] (7) | rs2 (5) | rs1 (5) | funct3 (3) | imm[4:1|11] (5) | opcode (7)
+%% Bits:  31-25         24-20     19-15     14-12      11-7              6-0
+%%
+%% The immediate is split across the instruction and represents a signed offset
+%% in multiples of 2 bytes (must be 2-byte aligned).
+%% Range: ±4 KiB (±4096 bytes)
+
+-spec encode_b_type(
+    Opcode :: 0..127,
+    Funct3 :: 0..7,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Offset :: integer()
+) -> binary().
+encode_b_type(Opcode, Funct3, Rs1, Rs2, Offset) ->
+    Rs1Num = reg_to_num(Rs1),
+    Rs2Num = reg_to_num(Rs2),
+    % Offset must be 2-byte aligned and in range [-4096, 4094]
+    % Extract bits: imm[12], imm[10:5], imm[4:1], imm[11]
+    OffsetMasked = Offset band 16#1FFF,
+    % imm[12] -> bit 31
+    Imm12 = (OffsetMasked bsr 12) band 1,
+    % imm[10:5] -> bits 30-25
+    Imm10_5 = (OffsetMasked bsr 5) band 16#3F,
+    % imm[4:1] -> bits 11-8
+    Imm4_1 = (OffsetMasked bsr 1) band 16#F,
+    % imm[11] -> bit 7
+    Imm11 = (OffsetMasked bsr 11) band 1,
+    Instr =
+        (Imm12 bsl 31) bor
+            (Imm10_5 bsl 25) bor
+            (Rs2Num bsl 20) bor
+            (Rs1Num bsl 15) bor
+            (Funct3 bsl 12) bor
+            (Imm4_1 bsl 8) bor
+            (Imm11 bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% Branch instructions (B-type)
+%%-----------------------------------------------------------------------------
+
+%% BEQ - Branch if Equal
+%% if (rs1 == rs2) pc += offset
+-spec beq(riscv_register(), riscv_register(), integer()) -> binary().
+beq(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 ->
+    % Use c.beqz when comparing with zero and offset fits
+    case is_compressed_reg(Rs1) of
+        true -> c_beqz(Rs1, Offset);
+        false -> encode_b_type(16#63, 16#0, Rs1, zero, Offset)
+    end;
+beq(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 000
+    encode_b_type(16#63, 16#0, Rs1, Rs2, Offset);
+beq(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+beq(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BNE - Branch if Not Equal
+%% if (rs1 != rs2) pc += offset
+-spec bne(riscv_register(), riscv_register(), integer()) -> binary().
+bne(Rs1, zero, Offset) when Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0 ->
+    % Use c.bnez when comparing with zero and offset fits
+    case is_compressed_reg(Rs1) of
+        true -> c_bnez(Rs1, Offset);
+        false -> encode_b_type(16#63, 16#1, Rs1, zero, Offset)
+    end;
+bne(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 001
+    encode_b_type(16#63, 16#1, Rs1, Rs2, Offset);
+bne(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bne(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BLT - Branch if Less Than (signed)
+%% if (rs1 < rs2) pc += offset
+-spec blt(riscv_register(), riscv_register(), integer()) -> binary().
+blt(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 100
+    encode_b_type(16#63, 16#4, Rs1, Rs2, Offset);
+blt(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+blt(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BGE - Branch if Greater or Equal (signed)
+%% if (rs1 >= rs2) pc += offset
+-spec bge(riscv_register(), riscv_register(), integer()) -> binary().
+bge(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 101
+    encode_b_type(16#63, 16#5, Rs1, Rs2, Offset);
+bge(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bge(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BLTU - Branch if Less Than Unsigned
+%% if (rs1 < rs2) pc += offset (unsigned)
+-spec bltu(riscv_register(), riscv_register(), integer()) -> binary().
+bltu(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 110
+    encode_b_type(16#63, 16#6, Rs1, Rs2, Offset);
+bltu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bltu(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%% BGEU - Branch if Greater or Equal Unsigned
+%% if (rs1 >= rs2) pc += offset (unsigned)
+-spec bgeu(riscv_register(), riscv_register(), integer()) -> binary().
+bgeu(Rs1, Rs2, Offset) when
+    Offset >= -4096, Offset =< 4094, (Offset rem 2) =:= 0
+->
+    % Opcode: 1100011 (0x63), Funct3: 111
+    encode_b_type(16#63, 16#7, Rs1, Rs2, Offset);
+bgeu(_Rs1, _Rs2, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+bgeu(_Rs1, _Rs2, Offset) ->
+    error({offset_out_of_range, Offset, -4096, 4094}).
+
+%%-----------------------------------------------------------------------------
+%% J-type instruction encoding (for JAL)
+%%-----------------------------------------------------------------------------
+
+%% J-type instruction format (JAL):
+%% imm[20|10:1|11|19:12] (20) | rd (5) | opcode (7)
+%% Bits:  31-12                  11-7     6-0
+%%
+%% The immediate represents a signed offset in multiples of 2 bytes.
+%% Range: ±1 MiB (±1048576 bytes)
+
+-spec encode_j_type(
+    Opcode :: 0..127, Rd :: riscv_register(), Offset :: integer()
+) -> binary().
+encode_j_type(Opcode, Rd, Offset) ->
+    RdNum = reg_to_num(Rd),
+    % Extract immediate bits: imm[20], imm[10:1], imm[11], imm[19:12]
+    OffsetMasked = Offset band 16#1FFFFF,
+    % imm[20] -> bit 31
+    Imm20 = (OffsetMasked bsr 20) band 1,
+    % imm[10:1] -> bits 30-21
+    Imm10_1 = (OffsetMasked bsr 1) band 16#3FF,
+    % imm[11] -> bit 20
+    Imm11 = (OffsetMasked bsr 11) band 1,
+    % imm[19:12] -> bits 19-12
+    Imm19_12 = (OffsetMasked bsr 12) band 16#FF,
+    Instr =
+        (Imm20 bsl 31) bor
+            (Imm10_1 bsl 21) bor
+            (Imm11 bsl 20) bor
+            (Imm19_12 bsl 12) bor
+            (RdNum bsl 7) bor
+            Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% U-type instruction encoding (for LUI, AUIPC)
+%%-----------------------------------------------------------------------------
+
+%% U-type instruction format:
+%% imm[31:12] (20) | rd (5) | opcode (7)
+%% Bits:  31-12        11-7     6-0
+
+-spec encode_u_type(
+    Opcode :: 0..127, Rd :: riscv_register(), Imm :: integer()
+) -> binary().
+encode_u_type(Opcode, Rd, Imm) ->
+    RdNum = reg_to_num(Rd),
+    % Upper 20 bits of immediate
+    ImmUpper = (Imm bsr 12) band 16#FFFFF,
+    Instr = (ImmUpper bsl 12) bor (RdNum bsl 7) bor Opcode,
+    <<Instr:32/little>>.
+
+%%-----------------------------------------------------------------------------
+%% Jump and link instructions
+%%-----------------------------------------------------------------------------
+
+%% JAL - Jump and Link
+%% rd = pc + 4; pc += offset
+-spec jal(riscv_register(), integer()) -> binary().
+jal(zero, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 ->
+    % Use c.j when rd is zero (no link) and offset fits
+    c_j(Offset);
+jal(ra, Offset) when Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0 ->
+    % Use c.jal when rd is ra and offset fits (RV32C only)
+    c_jal(Offset);
+jal(Rd, Offset) when
+    Offset >= -1048576, Offset =< 1048574, (Offset rem 2) =:= 0
+->
+    % Opcode: 1101111 (0x6F)
+    encode_j_type(16#6F, Rd, Offset);
+jal(_Rd, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+jal(_Rd, Offset) ->
+    error({offset_out_of_range, Offset, -1048576, 1048574}).
+
+%% JALR - Jump and Link Register
+%% rd = pc + 4; pc = (rs1 + offset) & ~1
+-spec jalr(riscv_register(), riscv_register(), integer()) -> binary().
+jalr(zero, Rs1, 0) when Rs1 =/= zero ->
+    % Use c.jr for jump to register without link (rd=zero, offset=0)
+    c_jr(Rs1);
+jalr(ra, Rs1, 0) when Rs1 =/= zero ->
+    % Use c.jalr for jump to register with link (rd=ra, offset=0)
+    c_jalr(Rs1);
+jalr(Rd, Rs1, Offset) when Offset >= -2048, Offset =< 2047 ->
+    % Opcode: 1100111 (0x67), Funct3: 000
+    encode_i_type(16#67, Rd, 16#0, Rs1, Offset);
+jalr(_Rd, _Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2047}).
+
+%% JALR - Jump and Link Register (no offset)
+%% rd = pc + 4; pc = rs1 & ~1
+-spec jalr(riscv_register(), riscv_register()) -> binary().
+jalr(Rd, Rs1) ->
+    jalr(Rd, Rs1, 0).
+
+%%-----------------------------------------------------------------------------
+%% Upper immediate instructions
+%%-----------------------------------------------------------------------------
+
+%% LUI - Load Upper Immediate
+%% rd = imm << 12
+-spec lui(riscv_register(), integer()) -> binary().
+lui(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31, Imm =/= 0 ->
+    % Use c.lui when rd != zero and imm fits in 6 bits (signed) and imm != 0
+    c_lui(Rd, Imm);
+lui(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF ->
+    % Opcode: 0110111 (0x37)
+    encode_u_type(16#37, Rd, Imm bsl 12);
+lui(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}).
+
+%% AUIPC - Add Upper Immediate to PC
+%% rd = pc + (imm << 12)
+-spec auipc(riscv_register(), integer()) -> binary().
+auipc(Rd, Imm) when Imm >= -16#80000, Imm =< 16#7FFFF ->
+    % Opcode: 0010111 (0x17)
+    encode_u_type(16#17, Rd, Imm bsl 12);
+auipc(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -16#80000, 16#7FFFF}).
+
+%%-----------------------------------------------------------------------------
+%% Pseudo-instructions
+%%-----------------------------------------------------------------------------
+%% These are convenience instructions that map to actual RV32I instructions
+
+%% NOP - No Operation
+%% Expands to: addi x0, x0, 0
+-spec nop() -> binary().
+nop() ->
+    addi(zero, zero, 0).
+
+%% LI - Load Immediate
+%% Load a 32-bit immediate value into a register
+%% For small immediates (-2048 to 2047): addi rd, x0, imm
+%% For larger immediates: lui + addi sequence
+-spec li(riscv_register(), integer()) -> binary().
+li(Rd, Imm) when Rd =/= zero, Imm >= -32, Imm =< 31 ->
+    % Use c.li when rd != zero and imm fits in 6 bits (signed)
+    c_li(Rd, Imm);
+li(Rd, Imm) when Imm >= -2048, Imm =< 2047 ->
+    % Small immediate: addi rd, x0, imm
+    addi(Rd, zero, Imm);
+li(Rd, Imm) when Imm >= -16#80000000, Imm =< 16#7FFFFFFF ->
+    % Large immediate: lui + addi
+    % Split into upper 20 bits and lower 12 bits
+    % Need to account for sign extension of lower 12 bits
+    Lower = Imm band 16#FFF,
+    % If lower 12 bits has sign bit set, we need to add 1 to upper
+    UpperRaw =
+        if
+            Lower >= 16#800 ->
+                (Imm bsr 12) + 1;
+            true ->
+                Imm bsr 12
+        end,
+    % Mask to 20 bits first, then sign extend if needed
+    UpperMasked = UpperRaw band 16#FFFFF,
+    Upper =
+        if
+            UpperMasked band 16#80000 =/= 0 ->
+                % Bit 19 is set, so this is negative in 20-bit representation
+                % Sign extend from 20 bits
+                UpperMasked - 16#100000;
+            true ->
+                % Positive value
+                UpperMasked
+        end,
+    % Sign extend lower 12 bits
+    LowerSigned =
+        if
+            Lower >= 16#800 -> Lower - 16#1000;
+            true -> Lower
+        end,
+    LuiInstr = lui(Rd, Upper),
+    AddiInstr = addi(Rd, Rd, LowerSigned),
+    <<LuiInstr/binary, AddiInstr/binary>>;
+li(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -16#80000000, 16#7FFFFFFF}).
+
+%% MV - Move (copy register)
+%% Expands to: addi rd, rs, 0 or c.mv rd, rs
+-spec mv(riscv_register(), riscv_register()) -> binary().
+mv(Rd, Rs) when Rd =/= zero, Rs =/= zero ->
+    % Use c.mv when both rd and rs are not zero
+    c_mv(Rd, Rs);
+mv(Rd, Rs) ->
+    addi(Rd, Rs, 0).
+
+%% NOT - Bitwise NOT
+%% Expands to: xori rd, rs, -1
+-spec not_(riscv_register(), riscv_register()) -> binary().
+not_(Rd, Rs) ->
+    xori(Rd, Rs, -1).
+
+%% NEG - Negate (two's complement)
+%% Expands to: sub rd, x0, rs
+-spec neg(riscv_register(), riscv_register()) -> binary().
+neg(Rd, Rs) ->
+    sub(Rd, zero, Rs).
+
+%% J - Unconditional Jump
+%% Expands to: jal x0, offset
+-spec j(integer()) -> binary().
+j(Offset) ->
+    jal(zero, Offset).
+
+%% JR - Jump Register
+%% Expands to: jalr x0, rs, 0
+-spec jr(riscv_register()) -> binary().
+jr(Rs) ->
+    jalr(zero, Rs, 0).
+
+%% RET - Return from subroutine
+%% Expands to: jalr x0, ra, 0
+-spec ret() -> binary().
+ret() ->
+    jalr(zero, ra, 0).
+
+%% CALL - Call function (far call using AUIPC + JALR)
+%% This is a two-instruction sequence for calling functions beyond JAL range
+%% Expands to: auipc ra, offset[31:12]; jalr ra, ra, offset[11:0]
+-spec call(riscv_register(), integer()) -> binary().
+call(Rd, Offset) when Offset >= -16#80000000, Offset =< 16#7FFFFFFF ->
+    % Split offset into upper 20 bits and lower 12 bits
+    Lower = Offset band 16#FFF,
+    % If lower 12 bits has sign bit set, we need to add 1 to upper
+    Upper =
+        if
+            Lower >= 16#800 ->
+                ((Offset bsr 12) + 1) band 16#FFFFF;
+            true ->
+                (Offset bsr 12) band 16#FFFFF
+        end,
+    % Sign extend lower 12 bits
+    LowerSigned =
+        if
+            Lower >= 16#800 -> Lower - 16#1000;
+            true -> Lower
+        end,
+    AuipcInstr = auipc(Rd, Upper),
+    JalrInstr = jalr(ra, Rd, LowerSigned),
+    <<AuipcInstr/binary, JalrInstr/binary>>;
+call(_Rd, Offset) ->
+    error({offset_out_of_range, Offset, -16#80000000, 16#7FFFFFFF}).
+
+%% MUL - Multiply (RV32M extension)
+%% Multiplies rs1 by rs2 and places the lower 32 bits in rd
+%% Format: mul rd, rs1, rs2
+%% Encoding: R-type with opcode=0x33, funct3=0x0, funct7=0x01
+-spec mul(riscv_register(), riscv_register(), riscv_register()) -> binary().
+mul(Rd, Rs1, Rs2) ->
+    % Opcode: 0110011 (0x33), Funct3: 000, Funct7: 0000001
+    encode_r_type(16#33, Rd, 16#0, Rs1, Rs2, 16#01).
+
+%%-----------------------------------------------------------------------------
+%% C Extension (RV32C) - Compressed Instructions
+%%-----------------------------------------------------------------------------
+%% The C extension adds 16-bit compressed instructions to reduce code size.
+%% All compressed instructions are 16 bits (2 bytes) and use a different
+%% encoding format from the base 32-bit instructions.
+%%
+%% Register encoding for compressed instructions:
+%% - Some instructions use the full 5-bit register encoding (x0-x31)
+%% - Others use 3-bit encoding for registers x8-x15 (s0, s1, a0-a5)
+%%   This is called the "compressed register set" or "C register set"
+%%
+%% Instruction formats:
+%% - CR (Register): funct4 | rd/rs1 | rs2 | op
+%% - CI (Immediate): funct3 | imm | rd/rs1 | imm | op
+%% - CSS (Stack Store): funct3 | imm | rs2 | op
+%% - CIW (Wide Immediate): funct3 | imm | rd' | op
+%% - CL (Load): funct3 | imm | rs1' | imm | rd' | op
+%% - CS (Store): funct3 | imm | rs1' | imm | rs2' | op
+%% - CA (Arithmetic): funct6 | rd'/rs1' | funct2 | rs2' | op
+%% - CB (Branch): funct3 | offset | rs1' | offset | op
+%% - CJ (Jump): funct3 | jump target | op
+%%
+%% See: RISC-V Instruction Set Manual, Volume I, Chapter 16
+%%-----------------------------------------------------------------------------
+
+%% Convert register to compressed register encoding (3 bits for x8-x15)
+%% Returns the 3-bit encoding (0-7 maps to x8-x15)
+-spec reg_to_c_num(riscv_register()) -> 0..7.
+reg_to_c_num(s0) -> 0;
+reg_to_c_num(fp) -> 0;
+reg_to_c_num(s1) -> 1;
+reg_to_c_num(a0) -> 2;
+reg_to_c_num(a1) -> 3;
+reg_to_c_num(a2) -> 4;
+reg_to_c_num(a3) -> 5;
+reg_to_c_num(a4) -> 6;
+reg_to_c_num(a5) -> 7;
+reg_to_c_num(Reg) -> error({register_not_in_compressed_set, Reg, 's0/fp, s1, a0-a5'}).
+
+%% Check if a register is in the compressed register set (s0/fp, s1, a0-a5)
+-spec is_compressed_reg(riscv_register()) -> boolean().
+is_compressed_reg(s0) -> true;
+is_compressed_reg(fp) -> true;
+is_compressed_reg(s1) -> true;
+is_compressed_reg(a0) -> true;
+is_compressed_reg(a1) -> true;
+is_compressed_reg(a2) -> true;
+is_compressed_reg(a3) -> true;
+is_compressed_reg(a4) -> true;
+is_compressed_reg(a5) -> true;
+is_compressed_reg(_) -> false.
+
+%%-----------------------------------------------------------------------------
+%% CR-type instruction encoding (Compressed Register format)
+%%-----------------------------------------------------------------------------
+%% CR format: funct4 (4) | rd/rs1 (5) | rs2 (5) | op (2)
+%% Bits:      15-12         11-7          6-2       1-0
+
+-spec encode_cr_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Funct4 :: 0..15
+) -> binary().
+encode_cr_type(Opcode, Rd, Rs2, Funct4) ->
+    RdNum = reg_to_num(Rd),
+    Rs2Num = reg_to_num(Rs2),
+    Instr =
+        (Funct4 bsl 12) bor
+            (RdNum bsl 7) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CI-type instruction encoding (Compressed Immediate format)
+%%-----------------------------------------------------------------------------
+%% CI format: funct3 (3) | imm[5] (1) | rd/rs1 (5) | imm[4:0] (5) | op (2)
+%% Bits:      15-13        12            11-7          6-2            1-0
+
+-spec encode_ci_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_ci_type(Opcode, Rd, Imm, Funct3) ->
+    RdNum = reg_to_num(Rd),
+    % Extract immediate bits
+    ImmMasked = Imm band 16#3F,
+    Imm5 = (ImmMasked bsr 5) band 1,
+    Imm4_0 = ImmMasked band 16#1F,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Imm5 bsl 12) bor
+            (RdNum bsl 7) bor
+            (Imm4_0 bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CSS-type instruction encoding (Compressed Stack Store format)
+%%-----------------------------------------------------------------------------
+%% CSS format: funct3 (3) | imm[5:0] (6) | rs2 (5) | op (2)
+%% Bits:       15-13        12-7           6-2       1-0
+
+-spec encode_css_type(
+    Opcode :: 0..3,
+    Rs2 :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_css_type(Opcode, Rs2, Imm, Funct3) ->
+    Rs2Num = reg_to_num(Rs2),
+    % Extract immediate bits (typically scaled for word access)
+    ImmMasked = Imm band 16#3F,
+    Instr =
+        (Funct3 bsl 13) bor
+            (ImmMasked bsl 7) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CIW-type instruction encoding (Compressed Wide Immediate format)
+%%-----------------------------------------------------------------------------
+%% CIW format: funct3 (3) | imm[7:0] (8) | rd' (3) | op (2)
+%% Bits:       15-13        12-5           4-2       1-0
+
+-spec encode_ciw_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_ciw_type(Opcode, Rd, Imm, Funct3) ->
+    RdNum = reg_to_c_num(Rd),
+    ImmMasked = Imm band 16#FF,
+    Instr =
+        (Funct3 bsl 13) bor
+            (ImmMasked bsl 5) bor
+            (RdNum bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CL-type instruction encoding (Compressed Load format)
+%%-----------------------------------------------------------------------------
+%% CL format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rd' (3) | op (2)
+%% Bits:      15-13        12-10     9-7        6-5       4-2       1-0
+
+-spec encode_cl_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Rs1 :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_cl_type(Opcode, Rd, Rs1, Imm, Funct3) ->
+    RdNum = reg_to_c_num(Rd),
+    Rs1Num = reg_to_c_num(Rs1),
+    % For LW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5
+    ImmMasked = Imm band 16#7F,
+    Imm5_3 = (ImmMasked bsr 3) band 7,
+    Imm2 = (ImmMasked bsr 2) band 1,
+    Imm6 = (ImmMasked bsr 6) band 1,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Imm5_3 bsl 10) bor
+            (Rs1Num bsl 7) bor
+            (Imm2 bsl 6) bor
+            (Imm6 bsl 5) bor
+            (RdNum bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CS-type instruction encoding (Compressed Store format)
+%%-----------------------------------------------------------------------------
+%% CS format: funct3 (3) | imm (3) | rs1' (3) | imm (2) | rs2' (3) | op (2)
+%% Bits:      15-13        12-10     9-7        6-5       4-2        1-0
+
+-spec encode_cs_type(
+    Opcode :: 0..3,
+    Rs1 :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Imm :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_cs_type(Opcode, Rs1, Rs2, Imm, Funct3) ->
+    Rs1Num = reg_to_c_num(Rs1),
+    Rs2Num = reg_to_c_num(Rs2),
+    % For SW: imm[5:3] goes to bits 12-10, imm[2] goes to bit 6, imm[6] goes to bit 5
+    ImmMasked = Imm band 16#7F,
+    Imm5_3 = (ImmMasked bsr 3) band 7,
+    Imm2 = (ImmMasked bsr 2) band 1,
+    Imm6 = (ImmMasked bsr 6) band 1,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Imm5_3 bsl 10) bor
+            (Rs1Num bsl 7) bor
+            (Imm2 bsl 6) bor
+            (Imm6 bsl 5) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CA-type instruction encoding (Compressed Arithmetic format)
+%%-----------------------------------------------------------------------------
+%% CA format: funct6 (6) | rd'/rs1' (3) | funct2 (2) | rs2' (3) | op (2)
+%% Bits:      15-10        9-7             6-5          4-2        1-0
+
+-spec encode_ca_type(
+    Opcode :: 0..3,
+    Rd :: riscv_register(),
+    Rs2 :: riscv_register(),
+    Funct2 :: 0..3,
+    Funct6 :: 0..63
+) -> binary().
+encode_ca_type(Opcode, Rd, Rs2, Funct2, Funct6) ->
+    RdNum = reg_to_c_num(Rd),
+    Rs2Num = reg_to_c_num(Rs2),
+    Instr =
+        (Funct6 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Funct2 bsl 5) bor
+            (Rs2Num bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CB-type instruction encoding (Compressed Branch format)
+%%-----------------------------------------------------------------------------
+%% CB format: funct3 (3) | offset (8) | rs1' (3) | op (2)
+%% Bits:      15-13        12-5         4-2        1-0
+%% Offset encoding: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2]
+
+-spec encode_cb_type(
+    Opcode :: 0..3,
+    Rs1 :: riscv_register(),
+    Offset :: integer(),
+    Funct3 :: 0..7
+) -> binary().
+encode_cb_type(Opcode, Rs1, Offset, Funct3) ->
+    Rs1Num = reg_to_c_num(Rs1),
+    % Extract offset bits: offset[8|4:3|7:6|2:1|5] -> bits [12|11:10|6:5|4:3|2]
+    OffsetMasked = Offset band 16#1FF,
+    Offset8 = (OffsetMasked bsr 8) band 1,
+    Offset4_3 = (OffsetMasked bsr 3) band 3,
+    Offset7_6 = (OffsetMasked bsr 6) band 3,
+    Offset2_1 = (OffsetMasked bsr 1) band 3,
+    Offset5 = (OffsetMasked bsr 5) band 1,
+    Instr =
+        (Funct3 bsl 13) bor
+            (Offset8 bsl 12) bor
+            (Offset4_3 bsl 10) bor
+            (Rs1Num bsl 7) bor
+            (Offset7_6 bsl 5) bor
+            (Offset2_1 bsl 3) bor
+            (Offset5 bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% CJ-type instruction encoding (Compressed Jump format)
+%%-----------------------------------------------------------------------------
+%% CJ format: funct3 (3) | jump target (11) | op (2)
+%% Bits:      15-13        12-2              1-0
+%% Target encoding: target[11|4|9:8|10|6|7|3:1|5] -> bits [12|11|10:9|8|7|6|5:3|2]
+
+-spec encode_cj_type(Opcode :: 0..3, Offset :: integer(), Funct3 :: 0..7) -> binary().
+encode_cj_type(Opcode, Offset, Funct3) ->
+    % Extract offset bits: offset[11|4|9:8|10|6|7|3:1|5]
+    OffsetMasked = Offset band 16#FFF,
+    Offset11 = (OffsetMasked bsr 11) band 1,
+    Offset4 = (OffsetMasked bsr 4) band 1,
+    Offset9_8 = (OffsetMasked bsr 8) band 3,
+    Offset10 = (OffsetMasked bsr 10) band 1,
+    Offset6 = (OffsetMasked bsr 6) band 1,
+    Offset7 = (OffsetMasked bsr 7) band 1,
+    Offset3_1 = (OffsetMasked bsr 1) band 7,
+    Offset5 = (OffsetMasked bsr 5) band 1,
+    OffsetBits =
+        (Offset11 bsl 10) bor
+            (Offset4 bsl 9) bor
+            (Offset9_8 bsl 7) bor
+            (Offset10 bsl 6) bor
+            (Offset6 bsl 5) bor
+            (Offset7 bsl 4) bor
+            (Offset3_1 bsl 1) bor
+            Offset5,
+    Instr =
+        (Funct3 bsl 13) bor
+            (OffsetBits bsl 2) bor
+            Opcode,
+    <<Instr:16/little>>.
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Arithmetic and Logical Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.ADD - Compressed Add
+%% rd = rd + rs2 (both rd and rs2 are full 5-bit registers)
+%% Format: CR-type
+%% Encoding: funct4=1001 (0x9), op=10 (0x2)
+-spec c_add(riscv_register(), riscv_register()) -> binary().
+c_add(Rd, Rs2) ->
+    encode_cr_type(16#2, Rd, Rs2, 16#9).
+
+%% C.MV - Compressed Move (copy register)
+%% rd = rs2 (both are full 5-bit registers)
+%% Format: CR-type
+%% Encoding: funct4=1000 (0x8), op=10 (0x2)
+-spec c_mv(riscv_register(), riscv_register()) -> binary().
+c_mv(Rd, Rs2) ->
+    encode_cr_type(16#2, Rd, Rs2, 16#8).
+
+%% C.SUB - Compressed Subtract
+%% rd' = rd' - rs2' (both use 3-bit compressed register encoding)
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=00, op=01 (0x1)
+-spec c_sub(riscv_register(), riscv_register()) -> binary().
+c_sub(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#0, 16#23).
+
+%% C.AND - Compressed Bitwise AND
+%% rd' = rd' & rs2'
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=11, op=01 (0x1)
+-spec c_and(riscv_register(), riscv_register()) -> binary().
+c_and(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#3, 16#23).
+
+%% C.OR - Compressed Bitwise OR
+%% rd' = rd' | rs2'
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=10, op=01 (0x1)
+-spec c_or(riscv_register(), riscv_register()) -> binary().
+c_or(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#2, 16#23).
+
+%% C.XOR - Compressed Bitwise XOR
+%% rd' = rd' ^ rs2'
+%% Format: CA-type
+%% Encoding: funct6=100011 (0x23), funct2=01, op=01 (0x1)
+-spec c_xor(riscv_register(), riscv_register()) -> binary().
+c_xor(Rd, Rs2) ->
+    encode_ca_type(16#1, Rd, Rs2, 16#1, 16#23).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Immediate Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.ADDI - Compressed Add Immediate
+%% rd = rd + imm (rd is full 5-bit register, imm is 6-bit signed)
+%% Format: CI-type
+%% Encoding: funct3=000, op=01 (0x1)
+-spec c_addi(riscv_register(), integer()) -> binary().
+c_addi(Rd, Imm) when Imm >= -32, Imm =< 31, Rd =/= zero ->
+    encode_ci_type(16#1, Rd, Imm, 16#0);
+c_addi(zero, _Imm) ->
+    error({invalid_compressed_instruction, c_addi, 'rd cannot be zero'});
+c_addi(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.ANDI - Compressed AND Immediate
+%% rd' = rd' & imm (rd' uses 3-bit encoding, imm is 6-bit signed)
+%% Format: CB-type (with special encoding)
+%% Encoding: funct3=100, imm[5]=bit12, funct2=10, imm[4:0]=bits 6:2, op=01
+-spec c_andi(riscv_register(), integer()) -> binary().
+c_andi(Rd, Imm) when Imm >= -32, Imm =< 31 ->
+    RdNum = reg_to_c_num(Rd),
+    ImmMasked = Imm band 16#3F,
+    Imm5 = (ImmMasked bsr 5) band 1,
+    Imm4_0 = ImmMasked band 16#1F,
+    Instr =
+        (16#4 bsl 13) bor
+            (Imm5 bsl 12) bor
+            (16#2 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Imm4_0 bsl 2) bor
+            16#1,
+    <<Instr:16/little>>;
+c_andi(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.LI - Compressed Load Immediate
+%% rd = imm (rd is full 5-bit register, imm is 6-bit signed)
+%% Format: CI-type
+%% Encoding: funct3=010, op=01 (0x1)
+-spec c_li(riscv_register(), integer()) -> binary().
+c_li(Rd, Imm) when Imm >= -32, Imm =< 31 ->
+    encode_ci_type(16#1, Rd, Imm, 16#2);
+c_li(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.LUI - Compressed Load Upper Immediate
+%% rd = imm << 12 (rd is full 5-bit register, imm is 6-bit signed non-zero)
+%% Format: CI-type
+%% Encoding: funct3=011, op=01 (0x1)
+-spec c_lui(riscv_register(), integer()) -> binary().
+c_lui(Rd, Imm) when Imm >= -32, Imm =< 31, Imm =/= 0, Rd =/= zero, Rd =/= sp ->
+    encode_ci_type(16#1, Rd, Imm, 16#3);
+c_lui(Rd, _Imm) when Rd =:= zero; Rd =:= sp ->
+    error({invalid_compressed_instruction, c_lui, 'rd cannot be zero or sp'});
+c_lui(_Rd, 0) ->
+    error({invalid_compressed_instruction, c_lui, 'immediate cannot be zero'});
+c_lui(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, -32, 31}).
+
+%% C.ADDI16SP - Compressed Add Immediate to SP (scaled by 16)
+%% sp = sp + imm (imm is 10-bit signed, must be multiple of 16, non-zero)
+%% Format: CI-type (special encoding)
+%% Encoding: funct3=011, rd/rs1=sp (x2), op=01
+-spec c_addi16sp(integer()) -> binary().
+c_addi16sp(Imm) when
+    Imm >= -512, Imm =< 496, (Imm rem 16) =:= 0, Imm =/= 0
+->
+    % Immediate encoding: nzimm[9|4|6|8:7|5] -> bits [12|6|5|4:3|2]
+    ImmMasked = Imm band 16#3FF,
+    Imm9 = (ImmMasked bsr 9) band 1,
+    Imm4 = (ImmMasked bsr 4) band 1,
+    Imm6 = (ImmMasked bsr 6) band 1,
+    Imm8_7 = (ImmMasked bsr 7) band 3,
+    Imm5 = (ImmMasked bsr 5) band 1,
+    ImmBits = (Imm9 bsl 5) bor (Imm4 bsl 4) bor (Imm6 bsl 3) bor (Imm8_7 bsl 1) bor Imm5,
+    encode_ci_type(16#1, sp, ImmBits, 16#3);
+c_addi16sp(0) ->
+    error({invalid_compressed_instruction, c_addi16sp, 'immediate cannot be zero'});
+c_addi16sp(Imm) when (Imm rem 16) =/= 0 ->
+    error({immediate_not_aligned, Imm, 16});
+c_addi16sp(Imm) ->
+    error({immediate_out_of_range, Imm, -512, 496}).
+
+%% C.ADDI4SPN - Compressed Add Immediate (scaled by 4) to SP, store in rd'
+%% rd' = sp + imm (imm is 10-bit unsigned, must be multiple of 4, non-zero)
+%% Format: CIW-type
+%% Encoding: funct3=000, op=00 (0x0)
+-spec c_addi4spn(riscv_register(), integer()) -> binary().
+c_addi4spn(Rd, Imm) when
+    Imm >= 4, Imm =< 1020, (Imm rem 4) =:= 0
+->
+    % Immediate encoding: nzuimm[5:4|9:6|2|3] -> bits [12:11|10:7|6|5]
+    ImmMasked = Imm band 16#3FF,
+    Imm5_4 = (ImmMasked bsr 4) band 3,
+    Imm9_6 = (ImmMasked bsr 6) band 15,
+    Imm2 = (ImmMasked bsr 2) band 1,
+    Imm3 = (ImmMasked bsr 3) band 1,
+    ImmBits = (Imm5_4 bsl 6) bor (Imm9_6 bsl 2) bor (Imm2 bsl 1) bor Imm3,
+    encode_ciw_type(16#0, Rd, ImmBits, 16#0);
+c_addi4spn(_Rd, Imm) when Imm =:= 0 ->
+    error({invalid_compressed_instruction, c_addi4spn, 'immediate cannot be zero'});
+c_addi4spn(_Rd, Imm) when (Imm rem 4) =/= 0 ->
+    error({immediate_not_aligned, Imm, 4});
+c_addi4spn(_Rd, Imm) ->
+    error({immediate_out_of_range, Imm, 4, 1020}).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Shift Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.SLLI - Compressed Shift Left Logical Immediate
+%% rd = rd << shamt (rd is full 5-bit register, shamt is 6-bit unsigned)
+%% Format: CI-type
+%% Encoding: funct3=000, op=10 (0x2)
+-spec c_slli(riscv_register(), 0..63) -> binary().
+c_slli(Rd, Shamt) when Shamt >= 0, Shamt =< 63, Rd =/= zero ->
+    encode_ci_type(16#2, Rd, Shamt, 16#0);
+c_slli(zero, _Shamt) ->
+    error({invalid_compressed_instruction, c_slli, 'rd cannot be zero'});
+c_slli(_Rd, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 63}).
+
+%% C.SRLI - Compressed Shift Right Logical Immediate
+%% rd' = rd' >> shamt (rd' uses 3-bit encoding, shamt is 6-bit unsigned)
+%% Format: CB-type (with special encoding)
+%% Encoding: funct3=100, shamt[5]=bit12, funct2=00, shamt[4:0]=bits 6:2, op=01
+-spec c_srli(riscv_register(), 0..63) -> binary().
+c_srli(Rd, Shamt) when Shamt >= 0, Shamt =< 63 ->
+    RdNum = reg_to_c_num(Rd),
+    Shamt5 = (Shamt bsr 5) band 1,
+    Shamt4_0 = Shamt band 16#1F,
+    Instr =
+        (16#4 bsl 13) bor
+            (Shamt5 bsl 12) bor
+            (16#0 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Shamt4_0 bsl 2) bor
+            16#1,
+    <<Instr:16/little>>;
+c_srli(_Rd, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 63}).
+
+%% C.SRAI - Compressed Shift Right Arithmetic Immediate
+%% rd' = rd' >> shamt (sign-extend, rd' uses 3-bit encoding, shamt is 6-bit unsigned)
+%% Format: CB-type (with special encoding)
+%% Encoding: funct3=100, shamt[5]=bit12, funct2=01, shamt[4:0]=bits 6:2, op=01
+-spec c_srai(riscv_register(), 0..63) -> binary().
+c_srai(Rd, Shamt) when Shamt >= 0, Shamt =< 63 ->
+    RdNum = reg_to_c_num(Rd),
+    Shamt5 = (Shamt bsr 5) band 1,
+    Shamt4_0 = Shamt band 16#1F,
+    Instr =
+        (16#4 bsl 13) bor
+            (Shamt5 bsl 12) bor
+            (16#1 bsl 10) bor
+            (RdNum bsl 7) bor
+            (Shamt4_0 bsl 2) bor
+            16#1,
+    <<Instr:16/little>>;
+c_srai(_Rd, Shamt) ->
+    error({shift_amount_out_of_range, Shamt, 0, 63}).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Load/Store Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.LW - Compressed Load Word
+%% rd' = mem[rs1' + offset] (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4)
+%% Format: CL-type
+%% Encoding: funct3=010, op=00 (0x0)
+-spec c_lw(riscv_register(), {riscv_register(), integer()}) -> binary().
+c_lw(Rd, {Rs1, Offset}) when
+    Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0
+->
+    encode_cl_type(16#0, Rd, Rs1, Offset, 16#2);
+c_lw(_Rd, {_Rs1, Offset}) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_lw(_Rd, {_Rs1, Offset}) ->
+    error({offset_out_of_range, Offset, 0, 124}).
+
+%% C.SW - Compressed Store Word
+%% mem[rs1' + offset] = rs2' (both use 3-bit encoding, offset is 7-bit unsigned, multiple of 4)
+%% Format: CS-type
+%% Encoding: funct3=110, op=00 (0x0)
+-spec c_sw(riscv_register(), {riscv_register(), integer()}) -> binary().
+c_sw(Rs2, {Rs1, Offset}) when
+    Offset >= 0, Offset =< 124, (Offset rem 4) =:= 0
+->
+    encode_cs_type(16#0, Rs1, Rs2, Offset, 16#6);
+c_sw(_Rs2, {_Rs1, Offset}) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_sw(_Rs2, {_Rs1, Offset}) ->
+    error({offset_out_of_range, Offset, 0, 124}).
+
+%% C.LWSP - Compressed Load Word from Stack Pointer
+%% rd = mem[sp + offset] (rd is full 5-bit register, offset is 8-bit unsigned, multiple of 4)
+%% Format: CI-type (special encoding)
+%% Encoding: funct3=010, op=10 (0x2)
+-spec c_lwsp(riscv_register(), integer()) -> binary().
+c_lwsp(Rd, Offset) when
+    Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0, Rd =/= zero
+->
+    % Offset encoding: offset[5|4:2|7:6] -> bits [12|6:4|3:2]
+    OffsetMasked = Offset band 16#FF,
+    Offset5 = (OffsetMasked bsr 5) band 1,
+    Offset4_2 = (OffsetMasked bsr 2) band 7,
+    Offset7_6 = (OffsetMasked bsr 6) band 3,
+    ImmBits = (Offset5 bsl 5) bor (Offset4_2 bsl 2) bor Offset7_6,
+    encode_ci_type(16#2, Rd, ImmBits, 16#2);
+c_lwsp(zero, _Offset) ->
+    error({invalid_compressed_instruction, c_lwsp, 'rd cannot be zero'});
+c_lwsp(_Rd, Offset) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_lwsp(_Rd, Offset) ->
+    error({offset_out_of_range, Offset, 0, 252}).
+
+%% C.SWSP - Compressed Store Word to Stack Pointer
+%% mem[sp + offset] = rs2 (rs2 is full 5-bit register, offset is 8-bit unsigned, multiple of 4)
+%% Format: CSS-type
+%% Encoding: funct3=110, op=10 (0x2)
+-spec c_swsp(riscv_register(), integer()) -> binary().
+c_swsp(Rs2, Offset) when
+    Offset >= 0, Offset =< 252, (Offset rem 4) =:= 0
+->
+    % Offset encoding: offset[5:2|7:6] -> bits [12:9|8:7]
+    OffsetMasked = Offset band 16#FF,
+    Offset5_2 = (OffsetMasked bsr 2) band 15,
+    Offset7_6 = (OffsetMasked bsr 6) band 3,
+    ImmBits = (Offset5_2 bsl 2) bor Offset7_6,
+    encode_css_type(16#2, Rs2, ImmBits, 16#6);
+c_swsp(_Rs2, Offset) when (Offset rem 4) =/= 0 ->
+    error({offset_not_aligned, Offset, 4});
+c_swsp(_Rs2, Offset) ->
+    error({offset_out_of_range, Offset, 0, 252}).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Branch and Jump Instructions
+%%-----------------------------------------------------------------------------
+
+%% C.BEQZ - Compressed Branch if Equal to Zero
+%% if (rs1' == 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2)
+%% Format: CB-type
+%% Encoding: funct3=110, op=01 (0x1)
+-spec c_beqz(riscv_register(), integer()) -> binary().
+c_beqz(Rs1, Offset) when
+    Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0
+->
+    encode_cb_type(16#1, Rs1, Offset, 16#6);
+c_beqz(_Rs1, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_beqz(_Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -256, 254}).
+
+%% C.BNEZ - Compressed Branch if Not Equal to Zero
+%% if (rs1' != 0) pc += offset (rs1' uses 3-bit encoding, offset is 9-bit signed, multiple of 2)
+%% Format: CB-type
+%% Encoding: funct3=111, op=01 (0x1)
+-spec c_bnez(riscv_register(), integer()) -> binary().
+c_bnez(Rs1, Offset) when
+    Offset >= -256, Offset =< 254, (Offset rem 2) =:= 0
+->
+    encode_cb_type(16#1, Rs1, Offset, 16#7);
+c_bnez(_Rs1, Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_bnez(_Rs1, Offset) ->
+    error({offset_out_of_range, Offset, -256, 254}).
+
+%% C.J - Compressed Unconditional Jump
+%% pc += offset (offset is 12-bit signed, multiple of 2)
+%% Format: CJ-type
+%% Encoding: funct3=101, op=01 (0x1)
+-spec c_j(integer()) -> binary().
+c_j(Offset) when
+    Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0
+->
+    encode_cj_type(16#1, Offset, 16#5);
+c_j(Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_j(Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2046}).
+
+%% C.JAL - Compressed Jump and Link (RV32C only, rd is implicitly ra)
+%% ra = pc + 2; pc += offset (offset is 12-bit signed, multiple of 2)
+%% Format: CJ-type
+%% Encoding: funct3=001 (0x1), op=01 (0x1)
+-spec c_jal(integer()) -> binary().
+c_jal(Offset) when
+    Offset >= -2048, Offset =< 2046, (Offset rem 2) =:= 0
+->
+    encode_cj_type(16#1, Offset, 16#1);
+c_jal(Offset) when (Offset rem 2) =/= 0 ->
+    error({offset_not_aligned, Offset, 2});
+c_jal(Offset) ->
+    error({offset_out_of_range, Offset, -2048, 2046}).
+
+%% C.JR - Compressed Jump Register
+%% pc = rs1 (rs1 is full 5-bit register, must not be zero)
+%% Format: CR-type
+%% Encoding: funct4=1000 (0x8), rs2=x0, op=10 (0x2)
+-spec c_jr(riscv_register()) -> binary().
+c_jr(Rs1) when Rs1 =/= zero ->
+    encode_cr_type(16#2, Rs1, zero, 16#8);
+c_jr(zero) ->
+    error({invalid_compressed_instruction, c_jr, 'rs1 cannot be zero'}).
+
+%% C.JALR - Compressed Jump and Link Register
+%% ra = pc + 2; pc = rs1 (rs1 is full 5-bit register, must not be zero)
+%% Format: CR-type
+%% Encoding: funct4=1001 (0x9), rs2=x0, op=10 (0x2)
+-spec c_jalr(riscv_register()) -> binary().
+c_jalr(Rs1) when Rs1 =/= zero ->
+    encode_cr_type(16#2, Rs1, zero, 16#9);
+c_jalr(zero) ->
+    error({invalid_compressed_instruction, c_jalr, 'rs1 cannot be zero'}).
+
+%% C.EBREAK - Compressed Environment Breakpoint
+%% Causes a breakpoint exception to be raised
+%% Format: CR-type
+%% Encoding: funct4=1001 (0x9), rs1/rd=x0, rs2=x0, op=10 (0x2)
+-spec c_ebreak() -> binary().
+c_ebreak() ->
+    encode_cr_type(16#2, zero, zero, 16#9).
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Pseudo-instructions
+%%-----------------------------------------------------------------------------
+
+%% C.NOP - Compressed No Operation
+%% Expands to: c.addi x0, 0
+%% Format: CI-type
+%% Encoding: funct3=000, rd/rs1=x0, imm=0, op=01 (0x1)
+-spec c_nop() -> binary().
+c_nop() ->
+    encode_ci_type(16#1, zero, 0, 16#0).
diff --git a/libs/jit/src/jit_x86_64.erl b/libs/jit/src/jit_x86_64.erl
index 5f54e6e512..9ef6c6441f 100644
--- a/libs/jit/src/jit_x86_64.erl
+++ b/libs/jit/src/jit_x86_64.erl
@@ -37,6 +37,8 @@
     call_primitive_with_cp/3,
     return_if_not_equal_to_ctx/2,
     jump_to_label/2,
+    jump_to_continuation/2,
+    jump_to_offset/2,
     if_block/3,
     if_else_block/4,
     shift_right/3,
@@ -70,10 +72,26 @@
     add_label/3
 ]).
 
+-ifdef(JIT_DWARF).
+-export([
+    dwarf_opcode/2,
+    dwarf_label/2,
+    dwarf_function/3,
+    dwarf_line/2,
+    dwarf_ctx_register/0
+]).
+-endif.
+
+-compile([warnings_as_errors]).
+
 -include_lib("jit.hrl").
 
 -include("primitives.hrl").
 
+-ifdef(JIT_DWARF).
+-include("jit_dwarf.hrl").
+-endif.
+
 -define(ASSERT(Expr), true = Expr).
 
 %% System V X86_64 calling conventions which we apply here.
@@ -114,7 +132,8 @@
     branches :: [{non_neg_integer(), non_neg_integer(), non_neg_integer()}],
     available_regs :: [x86_64_register()],
     used_regs :: [x86_64_register()],
-    labels :: [{integer() | reference(), integer()}]
+    labels :: [{integer() | reference(), integer()}],
+    variant :: non_neg_integer()
 }).
 
 -type state() :: #state{}.
@@ -138,7 +157,8 @@
     | {'(int)', maybe_free_x86_64_register(), '!=', x86_64_register() | integer()}
     | {'(bool)', maybe_free_x86_64_register(), '==', false}
     | {'(bool)', maybe_free_x86_64_register(), '!=', false}
-    | {maybe_free_x86_64_register(), '&', non_neg_integer(), '!=', integer()}.
+    | {maybe_free_x86_64_register(), '&', non_neg_integer(), '!=', integer()}
+    | {{free, x86_64_register()}, '==', {free, x86_64_register()}}.
 
 -define(WORD_SIZE, 8).
 
@@ -156,6 +176,13 @@
 -define(X_REG(N), {16#30 + (N * ?WORD_SIZE), ?CTX_REG}).
 -define(CP, {16#B8, ?CTX_REG}).
 -define(FP_REGS, {16#C0, ?CTX_REG}).
+-define(FP_REG_OFFSET(State, F),
+    (F *
+        case (State)#state.variant band ?JIT_VARIANT_FLOAT32 of
+            0 -> 8;
+            _ -> 4
+        end)
+).
 -define(BS, {16#C8, ?CTX_REG}).
 -define(BS_OFFSET, {16#D0, ?CTX_REG}).
 -define(JITSTATE_MODULE, {0, ?JITSTATE_REG}).
@@ -173,6 +200,8 @@
 -define(PARAMETER_REGS, [rdi, rsi, rdx, rcx, r8, r9]).
 -define(SCRATCH_REGS, [rdi, rsi, rdx, rcx, r8, r9, r10, r11]).
 
+-include("jit_backend_dwarf_impl.hrl").
+
 %%-----------------------------------------------------------------------------
 %% @doc Return the word size in bytes, i.e. the sizeof(term) i.e.
 %% sizeof(uintptr_t)
@@ -201,7 +230,7 @@ word_size() -> ?WORD_SIZE.
 %% @return New backend state
 %%-----------------------------------------------------------------------------
 -spec new(any(), module(), stream()) -> state().
-new(_Variant, StreamModule, Stream) ->
+new(Variant, StreamModule, Stream) ->
     #state{
         stream_module = StreamModule,
         stream = Stream,
@@ -209,7 +238,8 @@ new(_Variant, StreamModule, Stream) ->
         offset = StreamModule:offset(Stream),
         available_regs = ?AVAILABLE_REGS,
         used_regs = [],
-        labels = []
+        labels = [],
+        variant = Variant
     }.
 
 %%-----------------------------------------------------------------------------
@@ -513,6 +543,49 @@ jump_to_label(
             State#state{stream = Stream1, branches = [Reloc | AccBranches]}
     end.
 
+jump_to_offset(#state{stream_module = StreamModule, stream = Stream0} = State, TargetOffset) ->
+    Offset = StreamModule:offset(Stream0),
+    RelOffset = TargetOffset - Offset,
+    I1 = jit_x86_64_asm:jmp(RelOffset),
+    Stream1 = StreamModule:append(Stream0, I1),
+    State#state{stream = Stream1}.
+
+%%-----------------------------------------------------------------------------
+%% @doc Jump to a continuation address stored in a register.
+%% This is used for optimized intra-module returns.
+%% @end
+%% @param State current backend state
+%% @param OffsetReg register containing the continuation offset
+%% @return Updated backend state
+%%-----------------------------------------------------------------------------
+jump_to_continuation(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        offset = BaseOffset,
+        available_regs = [TempReg | _]
+    } = State,
+    {free, OffsetReg}
+) ->
+    % Calculate absolute address: native_code_base + target_offset
+    % where native_code_base = current_pc + (BaseOffset - CurrentStreamOffset)
+    % Similar to aarch64 approach but using leaq for PC-relative addressing
+    CurrentStreamOffset = StreamModule:offset(Stream0),
+    NetOffset = BaseOffset - CurrentStreamOffset - 7,
+
+    % Get native code base address using PC-relative lea: leaq NetOffset(%rip), TempReg
+    I1 = jit_x86_64_asm:leaq({rip, NetOffset}, TempReg),
+    7 = byte_size(I1),
+    % Add target offset to get final absolute address: addq OffsetReg, TempReg
+    I2 = jit_x86_64_asm:addq(OffsetReg, TempReg),
+    % Indirect jump to the calculated absolute address: jmpq *TempReg
+    I3 = jit_x86_64_asm:jmpq({TempReg}),
+
+    Code = <<I1/binary, I2/binary, I3/binary>>,
+    Stream1 = StreamModule:append(Stream0, Code),
+    % Free all registers since this is a tail jump
+    State#state{stream = Stream1, available_regs = ?AVAILABLE_REGS, used_regs = []}.
+
 %%-----------------------------------------------------------------------------
 %% @doc Emit an if block, i.e. emit a test of a condition and conditionnally
 %% execute a block.
@@ -722,6 +795,14 @@ if_block_cond0(
     {RelocJZOffset, I3} = jit_x86_64_asm:jnz_rel8(1),
     State1 = if_block_free_reg(RegOrTuple, State0),
     {State1, <<I1/binary, I2/binary, I3/binary>>, byte_size(I1) + byte_size(I2) + RelocJZOffset};
+if_block_cond0(State0, {{free, Reg1}, '==', {free, Reg2}}) ->
+    % Compare two free registers
+    I1 = jit_x86_64_asm:cmpq(Reg2, Reg1),
+    {RelocJNZOffset, I2} = jit_x86_64_asm:jnz_rel8(1),
+    % Free both registers
+    State1 = if_block_free_reg({free, Reg1}, State0),
+    State2 = if_block_free_reg({free, Reg2}, State1),
+    {State2, <<I1/binary, I2/binary>>, byte_size(I1) + RelocJNZOffset};
 if_block_cond0(
     State0,
     {'(int)', RegOrTuple, '==', Val}
@@ -822,12 +903,30 @@ merge_used_regs(State, []) ->
 %% @param Shift number of bits to shift
 %% @return new state
 %%-----------------------------------------------------------------------------
-shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, Reg, Shift) when
+-spec shift_right(#state{}, maybe_free_x86_64_register(), non_neg_integer()) ->
+    {#state{}, x86_64_register()}.
+shift_right(#state{stream_module = StreamModule, stream = Stream0} = State, {free, Reg}, Shift) when
     ?IS_GPR(Reg) andalso is_integer(Shift)
 ->
     I = jit_x86_64_asm:shrq(Shift, Reg),
     Stream1 = StreamModule:append(Stream0, I),
-    State#state{stream = Stream1}.
+    {State#state{stream = Stream1}, Reg};
+shift_right(
+    #state{
+        stream_module = StreamModule,
+        available_regs = [ResultReg | T],
+        used_regs = UR,
+        stream = Stream0
+    } = State,
+    Reg,
+    Shift
+) when
+    ?IS_GPR(Reg) andalso is_integer(Shift)
+->
+    I1 = jit_x86_64_asm:movq(Reg, ResultReg),
+    I2 = jit_x86_64_asm:shrq(Shift, ResultReg),
+    Stream1 = StreamModule:append(Stream0, <<I1/binary, I2/binary>>),
+    {State#state{stream = Stream1, available_regs = T, used_regs = [ResultReg | UR]}, ResultReg}.
 
 %%-----------------------------------------------------------------------------
 %% @doc Emit a shift register left by a fixed number of bits, effectively
@@ -1248,7 +1347,7 @@ move_to_vm_register(
 ) when is_atom(Reg) ->
     I1 = jit_x86_64_asm:movq({8, Reg}, Reg),
     I2 = jit_x86_64_asm:movq(?FP_REGS, Temp),
-    I3 = jit_x86_64_asm:movq(Reg, {F * 8, Temp}),
+    I3 = jit_x86_64_asm:movq(Reg, {?FP_REG_OFFSET(State0, F), Temp}),
     Code = <<I1/binary, I2/binary, I3/binary>>,
     Stream1 = StreamModule:append(Stream0, Code),
     State1 = free_native_register(State0, Reg),
@@ -1562,7 +1661,19 @@ move_to_array_element(
     Stream1 = StreamModule:append(Stream0, I1),
     State#state{stream = Stream1}.
 
--spec move_to_native_register(state(), value()) -> {state(), x86_64_register()}.
+-spec move_to_native_register(state(), value() | cp) -> {state(), x86_64_register()}.
+move_to_native_register(
+    #state{
+        stream_module = StreamModule,
+        stream = Stream0,
+        available_regs = [Reg | AvailT],
+        used_regs = Used
+    } = State,
+    cp
+) ->
+    I1 = jit_x86_64_asm:movq(?CP, Reg),
+    Stream1 = StreamModule:append(Stream0, I1),
+    {State#state{stream = Stream1, used_regs = [Reg | Used], available_regs = AvailT}, Reg};
 move_to_native_register(State, Reg) when is_atom(Reg) ->
     {State, Reg};
 move_to_native_register(
@@ -1961,3 +2072,14 @@ add_label(#state{stream_module = StreamModule, stream = Stream} = State, Label)
 -spec add_label(state(), integer() | reference(), integer()) -> state().
 add_label(#state{labels = Labels} = State, Label, Offset) ->
     State#state{labels = [{Label, Offset} | Labels]}.
+
+-ifdef(JIT_DWARF).
+%%-----------------------------------------------------------------------------
+%% @doc Return the DWARF register number for the ctx parameter
+%% @returns The DWARF register number where ctx is passed (rdi in x86_64)
+%% @end
+%%-----------------------------------------------------------------------------
+-spec dwarf_ctx_register() -> non_neg_integer().
+dwarf_ctx_register() ->
+    ?DWARF_RDI_REG_X86_64.
+-endif.
diff --git a/src/libAtomVM/CMakeLists.txt b/src/libAtomVM/CMakeLists.txt
index 2a5c58bd05..fec49b6bdf 100644
--- a/src/libAtomVM/CMakeLists.txt
+++ b/src/libAtomVM/CMakeLists.txt
@@ -179,6 +179,9 @@ endif()
 if (NOT AVM_DISABLE_JIT AND NOT AVM_ENABLE_PRECOMPILED)
     target_compile_definitions(libAtomVM PUBLIC AVM_NO_EMU)
 endif()
+if (AVM_DISABLE_JIT_DWARF OR AVM_DISABLE_JIT)
+    target_compile_definitions(libAtomVM PUBLIC AVM_NO_JIT_DWARF)
+endif()
 
 if(HAVE_PLATFORM_SMP_H)
     target_compile_definitions(libAtomVM PUBLIC HAVE_PLATFORM_SMP_H)
diff --git a/src/libAtomVM/defaultatoms.def b/src/libAtomVM/defaultatoms.def
index 04aff1f840..db7914438b 100644
--- a/src/libAtomVM/defaultatoms.def
+++ b/src/libAtomVM/defaultatoms.def
@@ -208,3 +208,5 @@ X(CODE_SERVER_ATOM, "\xB", "code_server")
 X(LOAD_ATOM, "\x4", "load")
 X(JIT_X86_64_ATOM, "\xA", "jit_x86_64")
 X(JIT_AARCH64_ATOM, "\xB", "jit_aarch64")
+X(JIT_ARMV6M_ATOM, "\xA", "jit_armv6m")
+X(JIT_RISCV32_ATOM, "\xB", "jit_riscv32")
diff --git a/src/libAtomVM/jit.c b/src/libAtomVM/jit.c
index 1d63f4b836..fb5014df87 100644
--- a/src/libAtomVM/jit.c
+++ b/src/libAtomVM/jit.c
@@ -38,8 +38,139 @@
 
 #include <math.h>
 #include <stddef.h>
+#ifndef AVM_NO_JIT_DWARF
+#include <stdlib.h>
+#include <string.h>
+
+#if TERM_BYTES == 4
+// ELF32 structures
+typedef struct
+{
+    unsigned char e_ident[16];
+    uint16_t e_type;
+    uint16_t e_machine;
+    uint32_t e_version;
+    uint32_t e_entry;
+    uint32_t e_phoff;
+    uint32_t e_shoff;
+    uint32_t e_flags;
+    uint16_t e_ehsize;
+    uint16_t e_phentsize;
+    uint16_t e_phnum;
+    uint16_t e_shentsize;
+    uint16_t e_shnum;
+    uint16_t e_shstrndx;
+} Elf_Ehdr;
+
+typedef struct
+{
+    uint32_t sh_name;
+    uint32_t sh_type;
+    uint32_t sh_flags;
+    uint32_t sh_addr;
+    uint32_t sh_offset;
+    uint32_t sh_size;
+    uint32_t sh_link;
+    uint32_t sh_info;
+    uint32_t sh_addralign;
+    uint32_t sh_entsize;
+} Elf_Shdr;
+
+typedef struct
+{
+    uint32_t st_name;
+    uint32_t st_value;
+    uint32_t st_size;
+    unsigned char st_info;
+    unsigned char st_other;
+    uint16_t st_shndx;
+} Elf_Sym;
+
+typedef struct
+{
+    uint32_t p_type;
+    uint32_t p_offset;
+    uint32_t p_vaddr;
+    uint32_t p_paddr;
+    uint32_t p_filesz;
+    uint32_t p_memsz;
+    uint32_t p_flags;
+    uint32_t p_align;
+} Elf_Phdr;
+#elif TERM_BYTES == 8
+// ELF64 structures
+typedef struct
+{
+    unsigned char e_ident[16];
+    uint16_t e_type;
+    uint16_t e_machine;
+    uint32_t e_version;
+    uint64_t e_entry;
+    uint64_t e_phoff;
+    uint64_t e_shoff;
+    uint32_t e_flags;
+    uint16_t e_ehsize;
+    uint16_t e_phentsize;
+    uint16_t e_phnum;
+    uint16_t e_shentsize;
+    uint16_t e_shnum;
+    uint16_t e_shstrndx;
+} Elf_Ehdr;
+
+typedef struct
+{
+    uint32_t sh_name;
+    uint32_t sh_type;
+    uint64_t sh_flags;
+    uint64_t sh_addr;
+    uint64_t sh_offset;
+    uint64_t sh_size;
+    uint32_t sh_link;
+    uint32_t sh_info;
+    uint64_t sh_addralign;
+    uint64_t sh_entsize;
+} Elf_Shdr;
+
+typedef struct
+{
+    uint32_t st_name;
+    unsigned char st_info;
+    unsigned char st_other;
+    uint16_t st_shndx;
+    uint64_t st_value;
+    uint64_t st_size;
+} Elf_Sym;
+
+typedef struct
+{
+    uint32_t p_type;
+    uint32_t p_flags;
+    uint64_t p_offset;
+    uint64_t p_vaddr;
+    uint64_t p_paddr;
+    uint64_t p_filesz;
+    uint64_t p_memsz;
+    uint64_t p_align;
+} Elf_Phdr;
+#else
+#error TERM_BYTES should be 4 or 8
+#endif
+
+// ELF constants
+#define SHT_SYMTAB 2
+#define SHT_STRTAB 3
+#define STT_FUNC 2
+#define STB_GLOBAL 1
+#define PT_LOAD 1
+#define PF_X 1 // Execute
+#define PF_R 4 // Read
+
+// ELF symbol type extraction
+#define ELF_ST_TYPE(info) ((info) & 0xf)
+
+#endif
 
-// #define ENABLE_TRACE
+#define ENABLE_TRACE
 #include "trace.h"
 
 // Verify matching atom index in default_atoms.hrl
@@ -83,10 +214,44 @@ _Static_assert(offsetof(Context, bs_offset) == 0xD0, "ctx->bs_offset is 0xD0 in
 _Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_aarch64.erl");
 _Static_assert(offsetof(JITState, continuation) == 0x8, "jit_state->continuation is 0x8 in jit/src/jit_aarch64.erl");
 _Static_assert(offsetof(JITState, remaining_reductions) == 0x10, "jit_state->remaining_reductions is 0x10 in jit/src/jit_aarch64.erl");
+#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M
+_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_armv6m.erl");
+_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_armv6m.erl");
+_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_armv6m.erl");
+_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_armv6m.erl");
+_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_armv6m.erl");
+_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_armv6m.erl");
+
+_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_armv6m.erl");
+_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_armv6m.erl");
+_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_armv6m.erl");
+
+_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits");
+
+#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32
+_Static_assert(offsetof(Context, e) == 0x14, "ctx->e is 0x14 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, x) == 0x18, "ctx->x is 0x18 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, cp) == 0x5C, "ctx->cp is 0x5C in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, fr) == 0x60, "ctx->fr is 0x60 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, bs) == 0x64, "ctx->bs is 0x64 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(Context, bs_offset) == 0x68, "ctx->bs_offset is 0x68 in jit/src/jit_riscv32.erl");
+
+_Static_assert(offsetof(JITState, module) == 0x0, "jit_state->module is 0x0 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(JITState, continuation) == 0x4, "jit_state->continuation is 0x4 in jit/src/jit_riscv32.erl");
+_Static_assert(offsetof(JITState, remaining_reductions) == 0x8, "jit_state->remaining_reductions is 0x8 in jit/src/jit_riscv32.erl");
+
+_Static_assert(sizeof(size_t) == 4, "size_t is expected to be 32 bits");
+
 #else
 #error Unknown jit target
 #endif
 
+#ifdef AVM_USE_SINGLE_PRECISION
+_Static_assert(sizeof(avm_float_t) == 0x4, "sizeof(avm_float_t) is 0x4 for single precision");
+#else
+_Static_assert(sizeof(avm_float_t) == 0x8, "sizeof(avm_float_t) is 0x8 for double precision");
+#endif
+
 #define PROCESS_MAYBE_TRAP_RETURN_VALUE(return_value, offset) \
     if (term_is_invalid_term(return_value)) {                 \
         if (UNLIKELY(!context_get_flags(ctx, Trap))) {        \
@@ -124,7 +289,7 @@ static void destroy_extended_registers(Context *ctx, unsigned int live)
 
 static void jit_trim_live_regs(Context *ctx, uint32_t live)
 {
-    TRACE("jit_trim_live_regs: ctx->process_id = %d, live = %d\n", ctx->process_id, live);
+    TRACE("jit_trim_live_regs: ctx->process_id = %" PRId32 ", live = %" PRIu32 "\n", ctx->process_id, live);
     if (UNLIKELY(!list_is_empty(&ctx->extended_x_regs))) {
         destroy_extended_registers(ctx, live);
     }
@@ -164,8 +329,8 @@ static Context *jit_return(Context *ctx, JITState *jit_state)
 
 static Context *jit_terminate_context(Context *ctx, JITState *jit_state)
 {
-    TRACE("jit_terminate_context: ctx->process_id = %d\n", ctx->process_id);
-    TRACE("-- Code execution finished for %i--\n", ctx->process_id);
+    TRACE("jit_terminate_context: ctx->process_id = %" PRId32 "\n", ctx->process_id);
+    TRACE("-- Code execution finished for %" PRId32 "--\n", ctx->process_id);
     GlobalContext *global = ctx->global;
     if (ctx->leader) {
         scheduler_stop_all(global);
@@ -177,7 +342,7 @@ static Context *jit_terminate_context(Context *ctx, JITState *jit_state)
 
 static Context *jit_handle_error(Context *ctx, JITState *jit_state, int offset)
 {
-    TRACE("jit_terminate_context: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_terminate_context: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     if (offset || term_is_invalid_term(ctx->x[2])) {
         ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, ctx->x[0]);
     }
@@ -244,14 +409,14 @@ static void set_error(Context *ctx, JITState *jit_state, int offset, term error_
 
 static Context *jit_raise_error(Context *ctx, JITState *jit_state, int offset, term error_type_atom)
 {
-    TRACE("jit_raise_error: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_raise_error: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     set_error(ctx, jit_state, offset, error_type_atom);
     return jit_handle_error(ctx, jit_state, 0);
 }
 
 static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int offset, term error_atom, term arg1)
 {
-    TRACE("jit_raise_error_tuple: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_raise_error_tuple: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     // We can gc as we are raising
     if (UNLIKELY(memory_ensure_free_with_roots(ctx, TUPLE_SIZE(2), 1, &arg1, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
         set_error(ctx, jit_state, offset, OUT_OF_MEMORY_ATOM);
@@ -268,7 +433,7 @@ static Context *jit_raise_error_tuple(Context *ctx, JITState *jit_state, int off
 
 static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term stacktrace, term exc_value)
 {
-    TRACE("jit_raise: ctx->process_id = %d, offset = %d\n", ctx->process_id, offset);
+    TRACE("jit_raise: ctx->process_id = %" PRId32 ", offset = %d\n", ctx->process_id, offset);
     ctx->x[0] = stacktrace_exception_class(stacktrace);
     ctx->x[1] = exc_value;
     ctx->x[2] = stacktrace_create_raw(ctx, jit_state->module, offset, stacktrace);
@@ -277,7 +442,7 @@ static Context *jit_raise(Context *ctx, JITState *jit_state, int offset, term st
 
 static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state)
 {
-    TRACE("jit_schedule_next_cp: ctx->process_id = %d\n", ctx->process_id);
+    TRACE("jit_schedule_next_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id);
     ctx->saved_function_ptr = jit_state->continuation;
     ctx->saved_module = jit_state->module;
     jit_state->remaining_reductions = 0;
@@ -286,7 +451,7 @@ static Context *jit_schedule_next_cp(Context *ctx, JITState *jit_state)
 
 static Context *jit_schedule_wait_cp(Context *ctx, JITState *jit_state)
 {
-    TRACE("jit_schedule_wait_cp: ctx->process_id = %d\n", ctx->process_id);
+    TRACE("jit_schedule_wait_cp: ctx->process_id = %" PRId32 "\n", ctx->process_id);
     ctx->saved_function_ptr = jit_state->continuation;
     ctx->saved_module = jit_state->module;
     jit_state->remaining_reductions = 0;
@@ -424,7 +589,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int
                     return_value = bif->bif2_ptr(ctx, 0, ctx->x[0], ctx->x[1]);
                     break;
                 default:
-                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity);
+                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity);
                     AVM_ABORT();
             }
             PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset);
@@ -454,7 +619,7 @@ static Context *jit_call_ext(Context *ctx, JITState *jit_state, int offset, int
                     return_value = gcbif->gcbif2_ptr(ctx, 0, 0, ctx->x[0], ctx->x[1]);
                     break;
                 default:
-                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", arity);
+                    fprintf(stderr, "Invalid arity %" PRIu32 " for bif\n", (uint32_t) arity);
                     AVM_ABORT();
             }
             PROCESS_MAYBE_TRAP_RETURN_VALUE_LAST(return_value, offset);
@@ -477,7 +642,7 @@ static term jit_module_get_atom_term_by_id(JITState *jit_state, int atom_index)
 
 static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need, uint32_t heap_need, uint32_t live)
 {
-    TRACE("jit_allocate: stack_need=%u heap_need=%u live=%u\n", stack_need, heap_need, live);
+    TRACE("jit_allocate: ENTRY ctx=%p jit_state=%p stack_need=%" PRIu32 " heap_need=%" PRIu32 " live=%" PRIu32 "\n", (void*)ctx, (void*)jit_state, stack_need, heap_need, live);
     if (ctx->heap.root->next || ((ctx->heap.heap_ptr + heap_need > ctx->e - (stack_need + 1)))) {
         TRIM_LIVE_REGS(live);
         if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need + stack_need + 1, live, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
@@ -492,7 +657,7 @@ static bool jit_allocate(Context *ctx, JITState *jit_state, uint32_t stack_need,
 
 static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif)
 {
-    TRACE("jit_get_imported_bif: bif=%u\n", bif);
+    TRACE("jit_get_imported_bif: bif=%" PRIu32 "\n", bif);
     const struct ExportedFunction *exported_bif = jit_state->module->imported_funcs[bif];
     const BifImpl0 result = EXPORTED_FUNCTION_TO_BIF(exported_bif)->bif0_ptr;
     return result;
@@ -500,7 +665,7 @@ static BifImpl0 jit_get_imported_bif(JITState *jit_state, uint32_t bif)
 
 static bool jit_deallocate(Context *ctx, JITState *jit_state, uint32_t n_words)
 {
-    TRACE("jit_deallocate: n_words=%u\n", n_words);
+    TRACE("jit_deallocate: n_words=%" PRIu32 "\n", n_words);
     ctx->cp = ctx->e[n_words];
     ctx->e += n_words + 1;
     // Hopefully, we only need x[0]
@@ -525,7 +690,7 @@ static TermCompareResult jit_term_compare(Context *ctx, JITState *jit_state, ter
 
 static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need, uint32_t live_registers)
 {
-    TRACE("jit_test_heap: heap_need=%u live_registers=%u\n", heap_need, live_registers);
+    TRACE("jit_test_heap: heap_need=%" PRIu32 " live_registers=%" PRIu32 "\n", heap_need, live_registers);
     size_t heap_free = context_avail_free_memory(ctx);
     // if we need more heap space than is currently free, then try to GC the needed space
     if (heap_free < heap_need) {
@@ -539,7 +704,7 @@ static bool jit_test_heap(Context *ctx, JITState *jit_state, uint32_t heap_need,
     } else if (heap_free > heap_need * HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF) {
         TRIM_LIVE_REGS(live_registers);
         if (UNLIKELY(memory_ensure_free_with_roots(ctx, heap_need * (HEAP_NEED_GC_SHRINK_THRESHOLD_COEFF / 2), live_registers, ctx->x, MEMORY_CAN_SHRINK) != MEMORY_GC_OK)) {
-            TRACE("Unable to ensure free memory.  heap_need=%i\n", heap_need);
+            TRACE("Unable to ensure free memory.  heap_need=%" PRIu32 "\n", heap_need);
             set_error(ctx, jit_state, 0, OUT_OF_MEMORY_ATOM);
             return false;
         }
@@ -620,13 +785,13 @@ static term maybe_alloc_boxed_integer_fragment(Context *ctx, avm_int64_t value)
 
 static term jit_term_alloc_tuple(Context *ctx, uint32_t size)
 {
-    TRACE("jit_term_alloc_tuple: size=%u\n", size);
+    TRACE("jit_term_alloc_tuple: size=%" PRIu32 "\n", size);
     return term_alloc_tuple(size, &ctx->heap);
 }
 
 static term jit_term_alloc_fun(Context *ctx, JITState *jit_state, uint32_t fun_index, uint32_t numfree)
 {
-    TRACE("jit_term_alloc_fun: fun_index=%u numfree=%u\n", fun_index, numfree);
+    TRACE("jit_term_alloc_fun: fun_index=%" PRIu32 " numfree=%" PRIu32 "\n", fun_index, numfree);
     size_t size = numfree + BOXED_FUN_SIZE;
     term *boxed_func = memory_heap_alloc(&ctx->heap, size);
 
@@ -832,7 +997,7 @@ static Context *jit_process_signal_messages(Context *ctx, JITState *jit_state)
 
 static term jit_mailbox_peek(Context *ctx)
 {
-    TRACE("jit_mailbox_peek: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_mailbox_peek: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     term out = term_invalid_term();
     mailbox_peek(ctx, &out);
     return out;
@@ -840,26 +1005,26 @@ static term jit_mailbox_peek(Context *ctx)
 
 static void jit_mailbox_remove_message(Context *ctx)
 {
-    TRACE("jit_mailbox_remove_message: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_mailbox_remove_message: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     mailbox_remove_message(&ctx->mailbox, &ctx->heap);
 }
 
 static void jit_timeout(Context *ctx)
 {
-    TRACE("jit_timeout: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags);
     mailbox_reset(&ctx->mailbox);
 }
 
 static void jit_mailbox_next(Context *ctx)
 {
-    TRACE("jit_mailbox_next: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_mailbox_next: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     mailbox_next(&ctx->mailbox);
 }
 
 static void jit_cancel_timeout(Context *ctx)
 {
-    TRACE("jit_cancel_timeout: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_cancel_timeout: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     if (context_get_flags(ctx, WaitingTimeout | WaitingTimeoutExpired)) {
         scheduler_cancel_timeout(ctx);
     }
@@ -867,7 +1032,7 @@ static void jit_cancel_timeout(Context *ctx)
 
 static void jit_clear_timeout_flag(Context *ctx)
 {
-    TRACE("jit_clear_timeout_flag: ctx->process_id=%d\n", ctx->process_id);
+    TRACE("jit_clear_timeout_flag: ctx->process_id=%" PRId32 "\n", ctx->process_id);
     context_update_flags(ctx, ~WaitingTimeoutExpired, NoFlags);
 }
 
@@ -1729,3 +1894,806 @@ const ModuleNativeInterface module_native_interface = {
 };
 
 #endif
+
+#ifndef AVM_NO_JIT_DWARF
+
+// GDB JIT interface structures and constants
+typedef enum
+{
+    JIT_NOACTION = 0,
+    JIT_REGISTER_FN,
+    JIT_UNREGISTER_FN
+} jit_actions_t;
+
+struct jit_code_entry
+{
+    struct jit_code_entry *next_entry;
+    struct jit_code_entry *prev_entry;
+    const char *symfile_addr;
+    uint64_t symfile_size;
+};
+
+struct jit_descriptor
+{
+    uint32_t version;
+    uint32_t action_flag;
+    struct jit_code_entry *relevant_entry;
+    struct jit_code_entry *first_entry;
+};
+
+// Global GDB JIT interface descriptor
+// This must have C linkage and specific symbol names for GDB to find it
+struct jit_descriptor __jit_debug_descriptor = { 1, 0, NULL, NULL };
+
+// GDB sets breakpoint on this function to be notified of new JIT code
+void __attribute__((noinline)) __jit_debug_register_code(void)
+{
+    // GDB will set a breakpoint here
+}
+
+// DWARF parsing helpers for address patching
+
+// Read unsigned LEB128 (used in DWARF for variable-length integers)
+static size_t read_uleb128(const uint8_t *data, size_t *offset, uint64_t *value)
+{
+    *value = 0;
+    int shift = 0;
+    size_t start = *offset;
+
+    while (1) {
+        uint8_t byte = data[(*offset)++];
+        *value |= ((uint64_t)(byte & 0x7f)) << shift;
+        if ((byte & 0x80) == 0) {
+            break;
+        }
+        shift += 7;
+    }
+
+    return *offset - start;
+}
+
+// Structure to hold parsed abbreviation entry
+typedef struct {
+    uint64_t code;
+    uint64_t tag;
+    uint8_t has_children;
+    // Attributes stored as pairs of (name, form)
+    uint64_t *attrs;  // Dynamic array of attribute name/form pairs
+    size_t attr_count;
+} dwarf_abbrev_t;
+
+// Parse a single abbreviation from .debug_abbrev
+static bool parse_abbrev(const uint8_t *abbrev_data, size_t abbrev_size, size_t *offset, dwarf_abbrev_t *abbrev)
+{
+    if (*offset >= abbrev_size) {
+        return false;
+    }
+
+    // Read abbreviation code
+    read_uleb128(abbrev_data, offset, &abbrev->code);
+    if (abbrev->code == 0) {
+        return false;  // End of abbreviation table
+    }
+
+    // Read tag
+    read_uleb128(abbrev_data, offset, &abbrev->tag);
+
+    // Read has_children flag
+    abbrev->has_children = abbrev_data[(*offset)++];
+
+    // Count attributes first
+    size_t temp_offset = *offset;
+    size_t count = 0;
+    while (temp_offset < abbrev_size) {
+        uint64_t name, form;
+        read_uleb128(abbrev_data, &temp_offset, &name);
+        read_uleb128(abbrev_data, &temp_offset, &form);
+        if (name == 0 && form == 0) {
+            break;
+        }
+        count++;
+    }
+
+    // Allocate and read attributes
+    abbrev->attr_count = count;
+    if (count > 0) {
+        abbrev->attrs = malloc(count * 2 * sizeof(uint64_t));
+        for (size_t i = 0; i < count; i++) {
+            read_uleb128(abbrev_data, offset, &abbrev->attrs[i * 2]);      // name
+            read_uleb128(abbrev_data, offset, &abbrev->attrs[i * 2 + 1]);  // form
+        }
+    } else {
+        abbrev->attrs = NULL;
+    }
+
+    // Skip terminator (0, 0)
+    (*offset) += 2;
+
+    return true;
+}
+
+// Parse all abbreviations from .debug_abbrev
+static dwarf_abbrev_t *parse_abbrev_table(const uint8_t *abbrev_data, size_t abbrev_size, size_t *count)
+{
+    // First pass: count abbreviations
+    size_t offset = 0;
+    size_t abbrev_count = 0;
+
+    while (offset < abbrev_size) {
+        uint64_t code;
+        read_uleb128(abbrev_data, &offset, &code);
+        if (code == 0) {
+            break;
+        }
+
+        // Skip tag
+        uint64_t tag;
+        read_uleb128(abbrev_data, &offset, &tag);
+        offset++;  // has_children
+
+        // Skip attributes
+        while (offset < abbrev_size) {
+            uint64_t name, form;
+            read_uleb128(abbrev_data, &offset, &name);
+            read_uleb128(abbrev_data, &offset, &form);
+            if (name == 0 && form == 0) {
+                break;
+            }
+        }
+
+        abbrev_count++;
+    }
+
+    if (abbrev_count == 0) {
+        *count = 0;
+        return NULL;
+    }
+
+    // Second pass: parse abbreviations
+    dwarf_abbrev_t *abbrevs = calloc(abbrev_count, sizeof(dwarf_abbrev_t));
+    offset = 0;
+    size_t i = 0;
+
+    while (offset < abbrev_size && i < abbrev_count) {
+        if (!parse_abbrev(abbrev_data, abbrev_size, &offset, &abbrevs[i])) {
+            break;
+        }
+        i++;
+    }
+
+    *count = i;
+    return abbrevs;
+}
+
+// Free abbreviation table
+static void free_abbrev_table(dwarf_abbrev_t *abbrevs, size_t count)
+{
+    for (size_t i = 0; i < count; i++) {
+        free(abbrevs[i].attrs);
+    }
+    free(abbrevs);
+}
+
+// Find abbreviation by code
+static const dwarf_abbrev_t *find_abbrev(const dwarf_abbrev_t *abbrevs, size_t count, uint64_t code)
+{
+    for (size_t i = 0; i < count; i++) {
+        if (abbrevs[i].code == code) {
+            return &abbrevs[i];
+        }
+    }
+    return NULL;
+}
+
+// Get size of a DWARF form value
+static size_t get_form_size(uint64_t form, uint8_t addr_size, const uint8_t *data, size_t offset)
+{
+    switch (form) {
+        case 0x01:  // DW_FORM_addr
+            return addr_size;
+        case 0x03:  // DW_FORM_block2
+            return 2 + (data[offset] | (data[offset + 1] << 8));
+        case 0x04:  // DW_FORM_block4
+            return 4 + (data[offset] | (data[offset + 1] << 8) | (data[offset + 2] << 16) | (data[offset + 3] << 24));
+        case 0x05:  // DW_FORM_data2
+            return 2;
+        case 0x06:  // DW_FORM_data4
+            return 4;
+        case 0x07:  // DW_FORM_data8
+            return 8;
+        case 0x08:  // DW_FORM_string
+            return strlen((const char *)&data[offset]) + 1;
+        case 0x09:  // DW_FORM_block
+        case 0x18:  // DW_FORM_exprloc
+            // Variable length - LEB128 size followed by data
+            {
+                uint64_t block_len;
+                size_t temp = offset;
+                size_t leb_size = read_uleb128(data, &temp, &block_len);
+                return leb_size + block_len;  // LEB128 size + block data
+            }
+        case 0x0f:  // DW_FORM_udata
+            // Just a LEB128 value
+            {
+                uint64_t val;
+                size_t temp = offset;
+                return read_uleb128(data, &temp, &val);
+            }
+        case 0x13:  // DW_FORM_ref4
+            return 4;
+        case 0x0b:  // DW_FORM_data1
+            return 1;
+        case 0x0e:  // DW_FORM_strp
+            return 4;
+        case 0x10:  // DW_FORM_ref_addr
+            return addr_size;
+        case 0x11:  // DW_FORM_ref1
+            return 1;
+        case 0x12:  // DW_FORM_ref2
+            return 2;
+        case 0x14:  // DW_FORM_ref8
+            return 8;
+        case 0x17:  // DW_FORM_sec_offset
+            return 4;
+        case 0x19:  // DW_FORM_flag_present
+            return 0;
+        default:
+            TRACE("Unknown DWARF form: 0x%llx\n", (unsigned long long)form);
+            return 0;
+    }
+}
+
+// Patch addresses in .debug_info using parsed abbreviations
+static void patch_debug_info_addresses(uint8_t *debug_info, size_t debug_info_size,
+                                       const dwarf_abbrev_t *abbrevs, size_t abbrev_count,
+                                       uintptr_t load_address)
+{
+    if (debug_info_size < 11) {
+        return;
+    }
+
+    // Parse compile unit header
+    uint8_t addr_size = debug_info[10];
+    TRACE("Patching .debug_info with addr_size=%d\n", addr_size);
+
+    // Skip: length(4) + version(2) + abbrev_offset(4) + addr_size(1) = 11 bytes
+    size_t offset = 11;
+    int patch_count = 0;
+
+    // Parse DIEs
+    while (offset < debug_info_size) {
+        uint64_t abbrev_code;
+        size_t code_size = read_uleb128(debug_info, &offset, &abbrev_code);
+
+        if (abbrev_code == 0) {
+            // Null DIE - end of siblings
+            continue;
+        }
+
+        const dwarf_abbrev_t *abbrev = find_abbrev(abbrevs, abbrev_count, abbrev_code);
+        if (!abbrev) {
+            TRACE("Warning: Unknown abbreviation code %llu at offset %zu\n",
+                  (unsigned long long)abbrev_code, offset - code_size);
+            break;
+        }
+
+        // Process attributes
+        for (size_t i = 0; i < abbrev->attr_count; i++) {
+            uint64_t attr_name = abbrev->attrs[i * 2];
+            uint64_t attr_form = abbrev->attrs[i * 2 + 1];
+
+            // Check if this is an address attribute (DW_FORM_addr)
+            if (attr_form == 0x01) {  // DW_FORM_addr
+                // This is an address - patch it
+                if (addr_size == 8) {
+                    uint64_t *addr = (uint64_t *)&debug_info[offset];
+                    uint64_t old_val = *addr;
+                    *addr += load_address;
+                    TRACE("Patched .debug_info[%zu] (attr 0x%llx): 0x%llx -> 0x%llx\n",
+                          offset, (unsigned long long)attr_name, (unsigned long long)old_val, (unsigned long long)*addr);
+                    patch_count++;
+                } else if (addr_size == 4) {
+                    uint32_t *addr = (uint32_t *)&debug_info[offset];
+                    uint32_t old_val = *addr;
+                    *addr += (uint32_t)load_address;
+                    TRACE("Patched .debug_info[%zu] (attr 0x%llx): 0x%x -> 0x%x\n",
+                          offset, (unsigned long long)attr_name, old_val, *addr);
+                    patch_count++;
+                }
+            }
+
+            // Skip to next attribute
+            size_t form_size = get_form_size(attr_form, addr_size, debug_info, offset);
+            if (form_size == 0) {
+                TRACE("Failed to get form size for form 0x%llx at offset %zu\n",
+                      (unsigned long long)attr_form, offset);
+                return;
+            }
+            offset += form_size;
+
+            if (offset > debug_info_size) {
+                TRACE("Offset exceeded debug_info size\n");
+                return;
+            }
+        }
+    }
+
+    TRACE("Total .debug_info patches: %d\n", patch_count);
+}
+
+// Create a minimal ELF file for debugging with proper PIE support
+static uint8_t *create_minimal_elf_for_debugging(const uint8_t *original_elf_data, size_t original_elf_size,
+    uintptr_t load_address, size_t *new_elf_size)
+{
+    TRACE("create_minimal_elf_for_debugging: original_elf_size=%zu, load_address=0x%lx\n",
+        original_elf_size, load_address);
+
+    // Extract symbol table, string table, and DWARF sections from original ELF
+    const char *symtab_data = NULL;
+    size_t symtab_size = 0;
+    const char *strtab_data = NULL;
+    size_t strtab_size = 0;
+    const char *debug_info_data = NULL;
+    size_t debug_info_size = 0;
+    const char *debug_line_data = NULL;
+    size_t debug_line_size = 0;
+    const char *debug_abbrev_data = NULL;
+    size_t debug_abbrev_size = 0;
+    const char *debug_str_data = NULL;
+    size_t debug_str_size = 0;
+    const char *debug_aranges_data = NULL;
+    size_t debug_aranges_size = 0;
+
+    // Parse original ELF to extract symbol, string, and DWARF tables
+    if (original_elf_size < sizeof(Elf_Ehdr)) {
+        fprintf(stderr, "ERROR: Original ELF too small for header\n");
+        return NULL;
+    }
+
+    const Elf_Ehdr *ehdr = (const Elf_Ehdr *) original_elf_data;
+    const Elf_Shdr *shdrs = (const Elf_Shdr *) (original_elf_data + ehdr->e_shoff);
+    const char *shstrtab = (const char *) (original_elf_data + shdrs[ehdr->e_shstrndx].sh_offset);
+
+    // Find .symtab, .strtab, and .debug_* sections
+    for (int i = 0; i < ehdr->e_shnum; i++) {
+        const char *section_name = shstrtab + shdrs[i].sh_name;
+
+        if (shdrs[i].sh_type == SHT_SYMTAB) {
+            symtab_data = (const char *) original_elf_data + shdrs[i].sh_offset;
+            symtab_size = shdrs[i].sh_size;
+        } else if (shdrs[i].sh_type == SHT_STRTAB && i != ehdr->e_shstrndx) {
+            strtab_data = (const char *) original_elf_data + shdrs[i].sh_offset;
+            strtab_size = shdrs[i].sh_size;
+        } else if (strcmp(section_name, ".debug_info") == 0) {
+            debug_info_data = (const char *) original_elf_data + shdrs[i].sh_offset;
+            debug_info_size = shdrs[i].sh_size;
+        } else if (strcmp(section_name, ".debug_line") == 0) {
+            debug_line_data = (const char *) original_elf_data + shdrs[i].sh_offset;
+            debug_line_size = shdrs[i].sh_size;
+        } else if (strcmp(section_name, ".debug_abbrev") == 0) {
+            debug_abbrev_data = (const char *) original_elf_data + shdrs[i].sh_offset;
+            debug_abbrev_size = shdrs[i].sh_size;
+        } else if (strcmp(section_name, ".debug_str") == 0) {
+            debug_str_data = (const char *) original_elf_data + shdrs[i].sh_offset;
+            debug_str_size = shdrs[i].sh_size;
+        } else if (strcmp(section_name, ".debug_aranges") == 0) {
+            debug_aranges_data = (const char *) original_elf_data + shdrs[i].sh_offset;
+            debug_aranges_size = shdrs[i].sh_size;
+        }
+    }
+
+    if (!symtab_data || !strtab_data) {
+        fprintf(stderr, "ERROR: Could not find symbol or string table in original ELF\n");
+        return NULL;
+    }
+
+    TRACE("Found DWARF sections: .debug_info=%zu .debug_line=%zu .debug_abbrev=%zu .debug_str=%zu .debug_aranges=%zu\n",
+        debug_info_size, debug_line_size, debug_abbrev_size, debug_str_size, debug_aranges_size);
+
+    // Section name strings: "\0.text\0.symtab\0.strtab\0.shstrtab\0.debug_info\0.debug_line\0.debug_abbrev\0.debug_str\0.debug_aranges\0"
+    const char *section_names = "\0.text\0.symtab\0.strtab\0.shstrtab\0.debug_info\0.debug_line\0.debug_abbrev\0.debug_str\0.debug_aranges\0";
+    size_t shstrtab_size = 103; // strlen of section_names
+
+    // Count how many sections we have (null + .text + .symtab + .strtab + .shstrtab + debug sections)
+    int section_count = 5; // Base sections
+    if (debug_info_data) section_count++;
+    if (debug_line_data) section_count++;
+    if (debug_abbrev_data) section_count++;
+    if (debug_str_data) section_count++;
+    if (debug_aranges_data) section_count++;
+
+    // Find the actual .text section size from the original ELF
+    const Elf_Ehdr *orig_ehdr = (const Elf_Ehdr *) original_elf_data;
+    const Elf_Shdr *orig_shdrs = (const Elf_Shdr *) (original_elf_data + orig_ehdr->e_shoff);
+
+    size_t code_size = 0;
+
+    // Look for .text section in original ELF
+    for (int i = 0; i < orig_ehdr->e_shnum; i++) {
+        const Elf_Shdr *shdr = &orig_shdrs[i];
+        if (shdr->sh_type == 1 && (shdr->sh_flags & 6) == 6) { // SHT_PROGBITS + SHF_ALLOC + SHF_EXECINSTR
+            code_size = shdr->sh_size;
+            break;
+        }
+    }
+
+    if (code_size == 0) {
+        fprintf(stderr, "ERROR: Could not find .text section in original ELF\n");
+        return NULL;
+    }
+
+    // Calculate size of new minimal ELF (ELF header + 1 program header + section headers + data)
+    // IMPORTANT: We now include code_size so we can copy the actual JIT code into the file
+    size_t elf_size = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr)) +
+                     code_size + symtab_size + strtab_size + shstrtab_size +
+                     debug_info_size + debug_line_size + debug_abbrev_size + debug_str_size + debug_aranges_size;
+
+    uint8_t *new_elf = (uint8_t *) malloc(elf_size);
+    if (!new_elf) {
+        fprintf(stderr, "ERROR: Failed to allocate memory for new ELF\n");
+        return NULL;
+    }
+    memset(new_elf, 0, elf_size);
+
+    // Create ELF header
+    Elf_Ehdr *new_ehdr = (Elf_Ehdr *) new_elf;
+    memcpy(new_ehdr->e_ident, orig_ehdr->e_ident, 16);
+    // Use ET_EXEC for JIT debugging - code is loaded at fixed address
+    // ET_EXEC is the correct type for executables with PT_LOAD at specific addresses
+    new_ehdr->e_type = 2; // ET_EXEC
+    new_ehdr->e_machine = orig_ehdr->e_machine;
+    new_ehdr->e_version = orig_ehdr->e_version;
+    new_ehdr->e_entry = 0;
+    new_ehdr->e_phoff = sizeof(Elf_Ehdr);
+    new_ehdr->e_shoff = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr);
+    new_ehdr->e_flags = orig_ehdr->e_flags;
+    new_ehdr->e_ehsize = sizeof(Elf_Ehdr);
+    new_ehdr->e_phentsize = sizeof(Elf_Phdr);
+    new_ehdr->e_phnum = 1;
+    new_ehdr->e_shentsize = sizeof(Elf_Shdr);
+    new_ehdr->e_shnum = section_count;
+    new_ehdr->e_shstrndx = 4; // .shstrtab is the section name string table (always section 4)
+
+    // Create program header (PT_LOAD segment)
+    Elf_Phdr *new_phdr = (Elf_Phdr *) (new_elf + sizeof(Elf_Ehdr));
+    new_phdr->p_type = PT_LOAD;
+    new_phdr->p_flags = PF_R | PF_X;
+
+    // PT_LOAD will start where code is in the file and map to load_address in memory
+    // p_offset will be set after we know where code is
+    new_phdr->p_offset = 0;  // Will be set after we copy code
+    new_phdr->p_vaddr = load_address;
+    new_phdr->p_paddr = load_address;
+    new_phdr->p_filesz = 0;  // Will be set after we copy data
+    new_phdr->p_memsz = 0;   // Will be set later after we know total size
+    new_phdr->p_align = 1;
+
+    // Create section headers
+    Elf_Shdr *new_shdrs = (Elf_Shdr *) (new_elf + sizeof(Elf_Ehdr) + sizeof(Elf_Phdr));
+    size_t current_offset = sizeof(Elf_Ehdr) + sizeof(Elf_Phdr) + (section_count * sizeof(Elf_Shdr));
+
+    // Copy the actual JIT code into the file right after section headers
+    // This allows GDB's BFD to recognize it as a valid object file
+    uint8_t *code_dest = new_elf + current_offset;
+    memcpy(code_dest, (void*)load_address, code_size);
+    size_t code_file_offset = current_offset;
+    current_offset += code_size;
+
+    // Section 0: null section (required)
+    new_shdrs[0] = (Elf_Shdr){ 0 };
+
+    // Section 1: .text section
+    new_shdrs[1].sh_name = 1; // ".text\0" at offset 1 in section names
+    new_shdrs[1].sh_type = 1; // SHT_PROGBITS
+    new_shdrs[1].sh_flags = 6; // SHF_ALLOC | SHF_EXECINSTR
+    new_shdrs[1].sh_addr = load_address;
+    new_shdrs[1].sh_offset = code_file_offset; // Point to code we copied into the file
+    new_shdrs[1].sh_size = code_size;
+    new_shdrs[1].sh_addralign = 1;
+
+    // Section 2: .symtab
+    new_shdrs[2].sh_name = 7; // ".symtab\0" at offset 7 in section names
+    new_shdrs[2].sh_type = SHT_SYMTAB;
+    new_shdrs[2].sh_offset = current_offset;
+    new_shdrs[2].sh_size = symtab_size;
+    new_shdrs[2].sh_link = 3; // Points to .strtab
+
+#if TERM_BYTES == 8
+    new_shdrs[2].sh_addralign = 8;
+#else
+    new_shdrs[2].sh_addralign = 4;
+#endif
+
+    new_shdrs[2].sh_entsize = sizeof(Elf_Sym);
+    current_offset += symtab_size;
+
+    // Section 3: .strtab
+    new_shdrs[3].sh_name = 15; // ".strtab\0" at offset 15 in section names
+    new_shdrs[3].sh_type = SHT_STRTAB;
+    new_shdrs[3].sh_offset = current_offset;
+    new_shdrs[3].sh_size = strtab_size;
+    new_shdrs[3].sh_addralign = 1;
+    current_offset += strtab_size;
+
+    // Section 4: .shstrtab (section name string table)
+    new_shdrs[4].sh_name = 23; // ".shstrtab\0" at offset 23 in section names
+    new_shdrs[4].sh_type = SHT_STRTAB;
+    new_shdrs[4].sh_offset = current_offset;
+    new_shdrs[4].sh_size = shstrtab_size;
+    new_shdrs[4].sh_addralign = 1;
+    current_offset += shstrtab_size;
+
+    // Add DWARF sections if present
+    // DWARF sections don't need SHF_ALLOC - they're debug info only, not loaded at runtime
+    int next_section = 5;
+
+    // Section 5: .debug_info (if present)
+    if (debug_info_data) {
+        new_shdrs[next_section].sh_name = 33; // ".debug_info\0" at offset 33 in section names
+        new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS
+        new_shdrs[next_section].sh_flags = 0; // No ALLOC - debug info only
+        new_shdrs[next_section].sh_addr = 0;
+        new_shdrs[next_section].sh_offset = current_offset;
+        new_shdrs[next_section].sh_size = debug_info_size;
+        new_shdrs[next_section].sh_addralign = 1;
+        current_offset += debug_info_size;
+        next_section++;
+    }
+
+    // Section 6: .debug_line (if present)
+    if (debug_line_data) {
+        new_shdrs[next_section].sh_name = 45; // ".debug_line\0" at offset 45 in section names
+        new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS
+        new_shdrs[next_section].sh_flags = 0;
+        new_shdrs[next_section].sh_addr = 0;
+        new_shdrs[next_section].sh_offset = current_offset;
+        new_shdrs[next_section].sh_size = debug_line_size;
+        new_shdrs[next_section].sh_addralign = 1;
+        current_offset += debug_line_size;
+        next_section++;
+    }
+
+    // Section 7: .debug_abbrev (if present)
+    if (debug_abbrev_data) {
+        new_shdrs[next_section].sh_name = 57; // ".debug_abbrev\0" at offset 57 in section names
+        new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS
+        new_shdrs[next_section].sh_flags = 0;
+        new_shdrs[next_section].sh_addr = 0;
+        new_shdrs[next_section].sh_offset = current_offset;
+        new_shdrs[next_section].sh_size = debug_abbrev_size;
+        new_shdrs[next_section].sh_addralign = 1;
+        current_offset += debug_abbrev_size;
+        next_section++;
+    }
+
+    // Section 8: .debug_str (if present)
+    if (debug_str_data) {
+        new_shdrs[next_section].sh_name = 71; // ".debug_str\0" at offset 71 in section names
+        new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS
+        new_shdrs[next_section].sh_flags = 0;
+        new_shdrs[next_section].sh_addr = 0;
+        new_shdrs[next_section].sh_offset = current_offset;
+        new_shdrs[next_section].sh_size = debug_str_size;
+        new_shdrs[next_section].sh_addralign = 1;
+        current_offset += debug_str_size;
+        next_section++;
+    }
+
+    // PT_LOAD covers only the .text section (code)
+    // DWARF sections are not loadable - they're debug info only
+    new_phdr->p_offset = code_file_offset;
+    new_phdr->p_memsz = code_size;
+    new_phdr->p_filesz = code_size;
+    TRACE("PT_LOAD covers 0x%lx to 0x%lx (size=0x%lx), filesz=0x%lx\n",
+          (unsigned long)load_address, (unsigned long)(load_address + code_size),
+          (unsigned long)new_phdr->p_memsz, (unsigned long)new_phdr->p_filesz);
+
+    // Section 9: .debug_aranges (if present)
+    // DISABLED: LLDB uses symbols for breakpoints, not .debug_aranges
+    // Keeping this corrupted actually made breakpoints work better!
+    if (false && debug_aranges_data) {
+        new_shdrs[next_section].sh_name = 82; // ".debug_aranges\0" at offset 82 in section names
+        new_shdrs[next_section].sh_type = 1; // SHT_PROGBITS
+        new_shdrs[next_section].sh_offset = current_offset;
+        new_shdrs[next_section].sh_size = debug_aranges_size;
+        new_shdrs[next_section].sh_addralign = 1;
+        current_offset += debug_aranges_size;
+        next_section++;
+    }
+
+    // Copy symbol table data
+    uint8_t *new_symtab = new_elf + new_shdrs[2].sh_offset;
+    memcpy(new_symtab, symtab_data, symtab_size);
+
+    // Copy string table data
+    uint8_t *new_strtab = new_elf + new_shdrs[3].sh_offset;
+    memcpy(new_strtab, strtab_data, strtab_size);
+
+    TRACE("Copied symbol table: %zu bytes, %zu symbols\n", symtab_size, symtab_size / sizeof(Elf_Sym));
+
+    // Debug: print first few function symbols
+    Elf_Sym *syms = (Elf_Sym *)new_symtab;
+    size_t num_syms = symtab_size / sizeof(Elf_Sym);
+    for (size_t i = 0; i < num_syms && i < 10; i++) {
+        if (ELF_ST_TYPE(syms[i].st_info) == STT_FUNC) {
+            const char *sym_name = (const char *)(new_strtab + syms[i].st_name);
+            TRACE("  Symbol[%zu]: %s @ 0x%lx (size=%zu)\n", i, sym_name,
+                  (unsigned long)syms[i].st_value, (size_t)syms[i].st_size);
+        }
+    }
+
+    // With PT_LOAD program header, the debugger should automatically apply the base address
+
+    // Copy section name string table data
+    uint8_t *new_shstrtab = new_elf + new_shdrs[4].sh_offset;
+    memcpy(new_shstrtab, section_names, shstrtab_size);
+
+    // Copy DWARF section data
+    next_section = 5;
+
+    if (debug_info_data) {
+        uint8_t *new_debug_info = new_elf + new_shdrs[next_section].sh_offset;
+        memcpy(new_debug_info, debug_info_data, debug_info_size);
+
+        // No need to patch DWARF addresses - PT_LOAD handles relocation automatically
+        // since DWARF sections now have SHF_ALLOC and virtual addresses
+        TRACE("DWARF sections in PT_LOAD - debugger will apply base address\n");
+
+        next_section++;
+    }
+
+    if (debug_line_data) {
+        uint8_t *new_debug_line = new_elf + new_shdrs[next_section].sh_offset;
+        memcpy(new_debug_line, debug_line_data, debug_line_size);
+        next_section++;
+    }
+
+    if (debug_abbrev_data) {
+        uint8_t *new_debug_abbrev = new_elf + new_shdrs[next_section].sh_offset;
+        memcpy(new_debug_abbrev, debug_abbrev_data, debug_abbrev_size);
+        next_section++;
+    }
+
+    if (debug_str_data) {
+        uint8_t *new_debug_str = new_elf + new_shdrs[next_section].sh_offset;
+        memcpy(new_debug_str, debug_str_data, debug_str_size);
+        next_section++;
+    }
+
+    // DISABLED: .debug_aranges not needed for LLDB breakpoints
+    if (false && debug_aranges_data) {
+        uint8_t *new_debug_aranges = new_elf + new_shdrs[next_section].sh_offset;
+        memcpy(new_debug_aranges, debug_aranges_data, debug_aranges_size);
+
+        // Patch .debug_aranges addresses to absolute addresses
+        // Structure: [length:4][version:2][debug_info_offset:4][addr_size:1][seg_size:1][padding:variable]
+        //            [address:addr_size][length:addr_size][terminator:addr_size*2]
+        // Header is 4+2+4+1+1 = 12 bytes, then padding to align to 2*addr_size
+        if (debug_aranges_size >= 12) {
+            uint8_t addr_size = new_debug_aranges[10];  // Address size field at offset 4+2+4 = 10
+            TRACE(".debug_aranges addr_size=%d\n", addr_size);
+
+            // Calculate padding: header is 8 bytes (after the length field), align to 2*addr_size
+            size_t header_size = 8;  // version(2) + debug_info_offset(4) + addr_size(1) + seg_size(1)
+            size_t tuple_alignment = 2 * addr_size;
+            size_t padding_size = (tuple_alignment - (header_size % tuple_alignment)) % tuple_alignment;
+            size_t descriptor_offset = 4 + header_size + padding_size;  // Skip length field + header + padding
+
+            TRACE(".debug_aranges descriptor at offset %zu (header=%zu, padding=%zu)\n",
+                  descriptor_offset, header_size, padding_size);
+
+            if (debug_aranges_size >= descriptor_offset + addr_size * 2) {
+                if (addr_size == 8) {
+                    // Patch the address range start address (64-bit)
+                    uint64_t *range_start = (uint64_t *)(new_debug_aranges + descriptor_offset);
+                    uint64_t old_addr = *range_start;
+                    *range_start += load_address;
+                    TRACE("Patched .debug_aranges: 0x%llx -> 0x%llx\n", (unsigned long long)old_addr, (unsigned long long)*range_start);
+                } else if (addr_size == 4) {
+                    // Patch the address range start address (32-bit)
+                    uint32_t *range_start = (uint32_t *)(new_debug_aranges + descriptor_offset);
+                    uint32_t old_addr = *range_start;
+                    *range_start += (uint32_t)load_address;
+                    TRACE("Patched .debug_aranges: 0x%x -> 0x%x\n", old_addr, *range_start);
+                }
+            }
+        }
+
+        next_section++;
+    }
+
+    *new_elf_size = elf_size;
+    return new_elf;
+}
+
+void jit_debug_register_code(Module *mod, const void *native_code, size_t native_size, ModuleNativeEntryPoint entry_point)
+{
+    UNUSED(mod);
+
+    if (!native_code || native_size < 8) {
+        fprintf(stderr, "jit_debug_register_code: no native code or too small\n");
+        return;
+    }
+
+    // Parse the NativeCodeChunk header to find where the ELF starts
+    const uint8_t *data = (const uint8_t *) native_code;
+    uint32_t info_size = READ_32_UNALIGNED(data);
+
+    if (info_size + 4 > native_size) {
+        fprintf(stderr, "jit_debug_register_code: invalid info_size\n");
+        return;
+    }
+
+    // Check if there's an ELF header after the NativeCodeChunk header
+    const uint8_t *elf_start = data + 4 + info_size;
+    size_t elf_size = native_size - (4 + info_size);
+
+    if (elf_size < 16) {
+        fprintf(stderr, "jit_debug_register_code: no space for ELF header\n");
+        return;
+    }
+
+    // Check for ELF magic: 0x7f, 'E', 'L', 'F'
+    if (elf_start[0] != 0x7f || elf_start[1] != 'E' || elf_start[2] != 'L' || elf_start[3] != 'F') {
+        fprintf(stderr, "jit_debug_register_code: no ELF header found, not registering debug info\n");
+        return;
+    }
+
+    // Allocate memory for the JIT code entry (but not for the ELF data itself)
+    struct jit_code_entry *entry = malloc(sizeof(struct jit_code_entry));
+    if (!entry) {
+        return;
+    }
+
+    // Use the actual mapped entry point address as the load address
+    uintptr_t load_address = (uintptr_t) entry_point;
+
+    // Create a minimal ELF file with proper symbols for debugging
+    size_t new_elf_size;
+    const uint8_t *new_elf = create_minimal_elf_for_debugging(elf_start, elf_size, load_address, &new_elf_size);
+
+    if (!new_elf) {
+        fprintf(stderr, "ERROR: Failed to create minimal ELF for debugging\n");
+        return;
+    }
+
+    // Debug: dump ELF to file for inspection
+    FILE *f = fopen("/tmp/jit_debug.elf", "wb");
+    if (f) {
+        fwrite(new_elf, 1, new_elf_size, f);
+        fclose(f);
+        TRACE("Wrote JIT ELF to /tmp/jit_debug.elf (%zu bytes)\n", new_elf_size);
+    }
+
+    // Initialize the entry with the new ELF
+    entry->next_entry = NULL;
+    entry->prev_entry = NULL;
+    entry->symfile_addr = (const char *) new_elf;
+    entry->symfile_size = new_elf_size;
+
+    // Add to GDB's linked list
+    if (__jit_debug_descriptor.first_entry) {
+        __jit_debug_descriptor.first_entry->prev_entry = entry;
+        entry->next_entry = __jit_debug_descriptor.first_entry;
+    }
+    __jit_debug_descriptor.first_entry = entry;
+
+    // TODO: Store entry pointer in module for later unregistration
+
+    // Notify GDB that new code has been registered
+    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+    __jit_debug_descriptor.relevant_entry = entry;
+    __jit_debug_register_code();
+}
+
+void jit_debug_unregister_code(Context *ctx, Module *mod)
+{
+    UNUSED(ctx);
+    UNUSED(mod);
+
+    // TODO: Implement unregistration
+    // Need to store the jit_code_entry pointer in the module structure
+    // and retrieve it here to properly unregister
+}
+
+#endif // AVM_NO_JIT_DWARF
diff --git a/src/libAtomVM/jit.h b/src/libAtomVM/jit.h
index 77caa9d578..0bfbb7aad8 100644
--- a/src/libAtomVM/jit.h
+++ b/src/libAtomVM/jit.h
@@ -172,8 +172,11 @@ enum TrapAndLoadResult
 
 #define JIT_ARCH_X86_64 1
 #define JIT_ARCH_AARCH64 2
+#define JIT_ARCH_ARMV6M 3
+#define JIT_ARCH_RISCV32 4
 
 #define JIT_VARIANT_PIC 1
+#define JIT_VARIANT_FLOAT32 2
 
 #ifndef AVM_NO_JIT
 
@@ -187,6 +190,16 @@ enum TrapAndLoadResult
 #define JIT_JUMPTABLE_ENTRY_SIZE 4
 #endif
 
+#ifdef __arm__
+#define JIT_ARCH_TARGET JIT_ARCH_ARMV6M
+#define JIT_JUMPTABLE_ENTRY_SIZE 12
+#endif
+
+#if defined(__riscv) && (__riscv_xlen == 32)
+#define JIT_ARCH_TARGET JIT_ARCH_RISCV32
+#define JIT_JUMPTABLE_ENTRY_SIZE 8
+#endif
+
 #ifndef JIT_ARCH_TARGET
 #error Unknown JIT target
 #endif
@@ -212,6 +225,34 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream);
  */
 enum TrapAndLoadResult jit_trap_and_load(Context *ctx, Module *mod, uint32_t label);
 
+#ifndef AVM_NO_JIT_DWARF
+/**
+ * @brief Register JIT-compiled code with debug info with GDB/LLDB
+ *
+ * @details This function registers native code and associated DWARF debug
+ * information with the debugger using the GDB JIT interface. This allows
+ * debuggers to show function names and source line information for JIT code.
+ *
+ * @param mod The module containing the JIT code
+ * @param native_code Pointer to the native machine code
+ * @param native_size Size of the native code in bytes
+ * @param entry_point The actual mapped entry point address
+ */
+void jit_debug_register_code(Module *mod, const void *native_code, size_t native_size, ModuleNativeEntryPoint entry_point);
+
+/**
+ * @brief Unregister JIT-compiled code from debugger
+ *
+ * @details This function unregisters previously registered JIT code from
+ * the debugger. Should be called when a module is unloaded.
+ *
+ * @param ctx The context
+ * @param mod The module being unloaded
+ */
+void jit_debug_unregister_code(Context *ctx, Module *mod);
+
+#endif
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/libAtomVM/module.c b/src/libAtomVM/module.c
index 12961f121e..1b5528b522 100644
--- a/src/libAtomVM/module.c
+++ b/src/libAtomVM/module.c
@@ -38,6 +38,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+// #define ENABLE_TRACE
+#include "trace.h"
+
 #ifdef WITH_ZLIB
 #include <zlib.h>
 #endif
@@ -336,10 +339,23 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary
             fprintf(stderr, "Unknown native code chunk version (%d)\n", ENDIAN_SWAP_16(native_code->version));
         } else {
             for (int arch_index = 0; arch_index < ENDIAN_SWAP_16(native_code->architectures_count); arch_index++) {
-                if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == JIT_VARIANT_PIC) {
+                uint16_t runtime_variant;
+#ifdef AVM_USE_SINGLE_PRECISION
+                runtime_variant = JIT_VARIANT_FLOAT32 | JIT_VARIANT_PIC;
+#else
+                runtime_variant = JIT_VARIANT_PIC;
+#endif
+                if (ENDIAN_SWAP_16(native_code->architectures[arch_index].architecture) == JIT_ARCH_TARGET && ENDIAN_SWAP_16(native_code->architectures[arch_index].variant) == runtime_variant) {
                     size_t offset = ENDIAN_SWAP_32(native_code->info_size) + ENDIAN_SWAP_32(native_code->architectures[arch_index].offset) + sizeof(native_code->info_size);
                     ModuleNativeEntryPoint module_entry_point = sys_map_native_code((const uint8_t *) &native_code->info_size, ENDIAN_SWAP_32(native_code->size), offset);
                     module_set_native_code(mod, ENDIAN_SWAP_32(native_code->labels), module_entry_point);
+
+#ifndef AVM_NO_JIT_DWARF
+                    // Register debug info with debugger (will check for embedded ELF)
+                    const void *chunk_start = (const uint8_t *) &native_code->info_size;
+                    size_t chunk_size = ENDIAN_SWAP_32(native_code->size);
+                    jit_debug_register_code(mod, chunk_start, chunk_size, module_entry_point);
+#endif
                     break;
                 }
             }
@@ -457,6 +473,11 @@ Module *module_new_from_iff_binary(GlobalContext *global, const void *iff_binary
 
 COLD_FUNC void module_destroy(Module *module)
 {
+#ifndef AVM_NO_JIT_DWARF
+    // Unregister DWARF debug info from debugger if it was registered
+    jit_debug_unregister_code(NULL, module);
+#endif
+
     free(module->labels);
     free(module->imported_funcs);
     free(module->literals_table);
diff --git a/src/libAtomVM/nifs.c b/src/libAtomVM/nifs.c
index 2fe0b12948..17fdce2a20 100644
--- a/src/libAtomVM/nifs.c
+++ b/src/libAtomVM/nifs.c
@@ -209,6 +209,7 @@ static term nif_erlang_module_loaded(Context *ctx, int argc, term argv[]);
 static term nif_erlang_nif_error(Context *ctx, int argc, term argv[]);
 #ifndef AVM_NO_JIT
 static term nif_jit_backend_module(Context *ctx, int argc, term argv[]);
+static term nif_jit_variant(Context *ctx, int argc, term argv[]);
 #endif
 static term nif_lists_reverse(Context *ctx, int argc, term argv[]);
 static term nif_lists_keyfind(Context *ctx, int argc, term argv[]);
@@ -794,6 +795,11 @@ static const struct Nif jit_backend_module_nif = {
     .base.type = NIFFunctionType,
     .nif_ptr = nif_jit_backend_module
 };
+
+static const struct Nif jit_variant_nif = {
+    .base.type = NIFFunctionType,
+    .nif_ptr = nif_jit_variant
+};
 #endif
 
 static const struct Nif lists_reverse_nif = {
@@ -5680,10 +5686,27 @@ static term nif_jit_backend_module(Context *ctx, int argc, term argv[])
     return JIT_X86_64_ATOM;
 #elif JIT_ARCH_TARGET == JIT_ARCH_AARCH64
     return JIT_AARCH64_ATOM;
+#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M
+    return JIT_ARMV6M_ATOM;
+#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32
+    return JIT_RISCV32_ATOM;
 #else
 #error Unknown JIT target
 #endif
 }
+
+static term nif_jit_variant(Context *ctx, int argc, term argv[])
+{
+    UNUSED(ctx);
+    UNUSED(argc);
+    UNUSED(argv);
+
+#ifdef AVM_USE_SINGLE_PRECISION
+    return term_from_int(JIT_VARIANT_FLOAT32 | JIT_VARIANT_PIC);
+#else
+    return term_from_int(JIT_VARIANT_PIC);
+#endif
+}
 #endif
 
 static term nif_lists_reverse(Context *ctx, int argc, term argv[])
diff --git a/src/libAtomVM/nifs.gperf b/src/libAtomVM/nifs.gperf
index a647c1de04..a4a2591fa0 100644
--- a/src/libAtomVM/nifs.gperf
+++ b/src/libAtomVM/nifs.gperf
@@ -193,6 +193,7 @@ lists:keyfind/3, &lists_keyfind_nif
 lists:keymember/3, &lists_keymember_nif
 lists:member/2, &lists_member_nif
 jit:backend_module/0, IF_HAVE_JIT(&jit_backend_module_nif)
+jit:variant/0, IF_HAVE_JIT(&jit_variant_nif)
 lists:reverse/1, &lists_reverse_nif
 lists:reverse/2, &lists_reverse_nif
 maps:from_keys/2, &maps_from_keys_nif
diff --git a/src/libAtomVM/opcodesswitch.h b/src/libAtomVM/opcodesswitch.h
index d8fc4106b0..e9d49698a0 100644
--- a/src/libAtomVM/opcodesswitch.h
+++ b/src/libAtomVM/opcodesswitch.h
@@ -7444,7 +7444,7 @@ HOT_FUNC int scheduler_entry_point(GlobalContext *glb)
         }
 
 terminate_context:
-        TRACE("-- Code execution finished for %i--\n", ctx->process_id);
+        TRACE("-- Code execution finished for %i--\n", (int) ctx->process_id);
         GlobalContext *global = ctx->global;
         if (ctx->leader) {
             scheduler_stop_all(global);
diff --git a/src/platforms/esp32/CMakeLists.txt b/src/platforms/esp32/CMakeLists.txt
index 9dec6ec5f3..4ddc362924 100644
--- a/src/platforms/esp32/CMakeLists.txt
+++ b/src/platforms/esp32/CMakeLists.txt
@@ -51,8 +51,21 @@ endif()
 # On Esp32, select is run in a loop in a dedicated task
 set(AVM_SELECT_IN_TASK ON)
 
-# JIT is not available yet on esp32
-set(AVM_DISABLE_JIT ON)
+# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4)
+# Configuration comes from idf.py menuconfig (KConfig), not CMake options
+if(CONFIG_JIT_ENABLED)
+    if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4")
+        set(AVM_DISABLE_JIT OFF)
+        set(AVM_JIT_TARGET_ARCH riscv32)
+        message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)")
+    else()
+        message(WARNING "JIT is not supported on ${IDF_TARGET} (Xtensa architecture)")
+        set(AVM_DISABLE_JIT ON)
+    endif()
+else()
+    set(AVM_DISABLE_JIT ON)
+    message(STATUS "JIT compilation disabled")
+endif()
 
 project(atomvm-esp32)
 
diff --git a/src/platforms/esp32/components/avm_sys/CMakeLists.txt b/src/platforms/esp32/components/avm_sys/CMakeLists.txt
index ebcedd3b57..8156bb2ac8 100644
--- a/src/platforms/esp32/components/avm_sys/CMakeLists.txt
+++ b/src/platforms/esp32/components/avm_sys/CMakeLists.txt
@@ -25,6 +25,7 @@ set(AVM_SYS_COMPONENT_SRCS
     "sys.c"
     "platform_nifs.c"
     "platform_defaultatoms.c"
+    "jit_stream_flash.c"
     "../../../../libAtomVM/inet.c"
     "../../../../libAtomVM/otp_crypto.c"
     "../../../../libAtomVM/otp_net.c"
diff --git a/src/platforms/esp32/components/avm_sys/jit_stream_flash.c b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c
new file mode 100644
index 0000000000..77dfcca908
--- /dev/null
+++ b/src/platforms/esp32/components/avm_sys/jit_stream_flash.c
@@ -0,0 +1,34 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#ifndef AVM_NO_JIT
+
+#include "context.h"
+#include "jit.h"
+#include "term.h"
+
+ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
+{
+    UNUSED(ctx);
+    UNUSED(jit_stream);
+    return NULL;
+}
+
+#endif
diff --git a/src/platforms/esp32/components/avm_sys/sys.c b/src/platforms/esp32/components/avm_sys/sys.c
index 8318ae759a..ec229d70b2 100644
--- a/src/platforms/esp32/components/avm_sys/sys.c
+++ b/src/platforms/esp32/components/avm_sys/sys.c
@@ -807,3 +807,28 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global)
     UNUSED(global);
 #endif
 }
+
+#ifndef AVM_NO_JIT
+#include <soc/soc.h>
+
+ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset)
+{
+    UNUSED(size);
+    uintptr_t addr = (uintptr_t) (native_code + offset);
+
+#if defined(CONFIG_IDF_TARGET_ARCH_RISCV)
+    // On RISC-V ESP32 targets, native code in flash needs to be accessed
+    // through the instruction cache (IROM) not data cache (DROM)
+#if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32C2)
+    // ESP32-C3 and C2 have separate DROM and IROM regions
+    if (addr >= SOC_DROM_LOW && addr < SOC_DROM_HIGH) {
+        // Convert from data cache address to instruction cache address
+        addr = addr - SOC_DROM_LOW + SOC_IROM_LOW;
+    }
+#endif
+    // ESP32-C6, H2, and P4 have unified DROM/IROM, no conversion needed
+#endif
+
+    return (ModuleNativeEntryPoint) addr;
+}
+#endif
diff --git a/src/platforms/esp32/components/libatomvm/CMakeLists.txt b/src/platforms/esp32/components/libatomvm/CMakeLists.txt
index 97580dbfea..00595afeef 100644
--- a/src/platforms/esp32/components/libatomvm/CMakeLists.txt
+++ b/src/platforms/esp32/components/libatomvm/CMakeLists.txt
@@ -31,7 +31,12 @@ if (HAVE_PLATFORM_ATOMIC_H)
     target_include_directories(libAtomVM PUBLIC ../avm_sys/)
 endif()
 
-target_link_libraries(${COMPONENT_LIB}
-    INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init")
+if (AVM_DISABLE_JIT)
+    target_link_libraries(${COMPONENT_LIB}
+        INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init")
+else()
+    target_link_libraries(${COMPONENT_LIB}
+        INTERFACE libAtomVM "-u platform_nifs_get_nif" "-u platform_defaultatoms_init" "-u jit_stream_entry_point" "-u sys_map_native_code")
+endif()
 
 target_compile_features(${COMPONENT_LIB} INTERFACE c_std_11)
diff --git a/src/platforms/esp32/main/Kconfig.projbuild b/src/platforms/esp32/main/Kconfig.projbuild
index 88bf92aa1a..1eba944ed7 100755
--- a/src/platforms/esp32/main/Kconfig.projbuild
+++ b/src/platforms/esp32/main/Kconfig.projbuild
@@ -39,5 +39,11 @@ menu "AtomVM configuration"
          depends on USE_USB_SERIAL
          help
              Enable TinyUSB CDC functionality if USE_USB_SERIAL is enabled.
+
+     config JIT_ENABLED
+         bool "Enable just in time compilation"
+         default n
+         help
+             Enable Just in time compilation, or just execution of precompiled native code
              
 endmenu
diff --git a/src/platforms/esp32/test/CMakeLists.txt b/src/platforms/esp32/test/CMakeLists.txt
index 2d97d91345..cee138d34c 100644
--- a/src/platforms/esp32/test/CMakeLists.txt
+++ b/src/platforms/esp32/test/CMakeLists.txt
@@ -57,8 +57,16 @@ endif()
 # On Esp32, select is run in a loop in a dedicated task
 set(AVM_SELECT_IN_TASK ON)
 
-# JIT is not available yet on esp32
-set(AVM_DISABLE_JIT ON)
+# JIT is only supported on RISC-V targets (ESP32-C2, ESP32-C3, ESP32-C6, ESP32-H2, ESP32-P4)
+# This must be set before project() so libAtomVM is configured correctly
+if (${IDF_TARGET} MATCHES "esp32c2|esp32c3|esp32c6|esp32h2|esp32p4")
+    set(AVM_DISABLE_JIT OFF)
+    set(AVM_JIT_TARGET_ARCH riscv32)
+    message(STATUS "JIT compilation enabled for ${IDF_TARGET} (RISC-V32)")
+else()
+    message(STATUS "JIT is not supported on ${IDF_TARGET} (Xtensa architecture) - using interpreter")
+    set(AVM_DISABLE_JIT ON)
+endif()
 
 project(atomvm-esp32-test)
 
diff --git a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt
index e2d67269e8..dc4789f374 100644
--- a/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt
+++ b/src/platforms/esp32/test/main/test_erl_sources/CMakeLists.txt
@@ -20,11 +20,31 @@
 
 add_library(esp32_test_modules)
 
+include(ExternalProject)
+if(NOT AVM_DISABLE_JIT)
+set(host_atomvm_jit_target "--target=jit")
+else()
+set(host_atomvm_jit_target "")
+endif()
 ExternalProject_Add(HostAtomVM
     SOURCE_DIR ../../../../../../../../
     INSTALL_COMMAND cmake -E echo "Skipping install step."
+    BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM
 )
 
+macro(jit_precompile module_name)
+    if(NOT AVM_DISABLE_JIT)
+        add_custom_command(
+            OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam
+            COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH}
+                && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam
+            DEPENDS ${module_name}.beam HostAtomVM
+            COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}"
+            VERBATIM
+        )
+    endif()
+endmacro()
+
 function(compile_erlang module_name)
     add_custom_command(
         OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam"
@@ -33,6 +53,7 @@ function(compile_erlang module_name)
         COMMENT "Compiling ${module_name}.erl"
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
+    jit_precompile(${module_name})
 
     set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam")
 endfunction()
@@ -55,46 +76,44 @@ compile_erlang(test_time_and_processes)
 compile_erlang(test_twdt)
 compile_erlang(test_tz)
 
+set(erlang_test_beams
+    test_esp_partition.beam
+    test_file.beam
+    test_wifi_example.beam
+    test_list_to_atom.beam
+    test_list_to_binary.beam
+    test_md5.beam
+    test_crypto.beam
+    test_monotonic_time.beam
+    test_mount.beam
+    test_net.beam
+    test_rtc_slow.beam
+    test_select.beam
+    test_socket.beam
+    test_ssl.beam
+    test_time_and_processes.beam
+    test_twdt.beam
+    test_tz.beam
+)
+
+if(NOT AVM_DISABLE_JIT)
+    set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams})
+    list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/)
+    set(erlang_test_beams_to_package ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}})
+    set(erlang_test_beams_depends ${erlang_test_beams} ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}})
+else()
+    set(erlang_test_beams_to_package ${erlang_test_beams})
+    set(erlang_test_beams_depends ${erlang_test_beams})
+endif()
+
 add_custom_command(
     OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/esp32_test_modules.avm"
     COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i esp32_test_modules.avm
         HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm
-        test_esp_partition.beam
-        test_file.beam
-        test_wifi_example.beam
-        test_list_to_atom.beam
-        test_list_to_binary.beam
-        test_md5.beam
-        test_crypto.beam
-        test_monotonic_time.beam
-        test_mount.beam
-        test_net.beam
-        test_rtc_slow.beam
-        test_select.beam
-        test_socket.beam
-        test_ssl.beam
-        test_time_and_processes.beam
-        test_twdt.beam
-        test_tz.beam
+        ${erlang_test_beams_to_package}
     DEPENDS
         HostAtomVM
-        "${CMAKE_CURRENT_BINARY_DIR}/test_esp_partition.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_wifi_example.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_file.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_atom.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_list_to_binary.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_md5.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_monotonic_time.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_mount.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_net.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_rtc_slow.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_select.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_socket.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_ssl.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_time_and_processes.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_twdt.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_tz.beam"
+        ${erlang_test_beams_depends}
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     VERBATIM
 )
diff --git a/src/platforms/generic_unix/lib/jit_stream_mmap.c b/src/platforms/generic_unix/lib/jit_stream_mmap.c
index 376f7384d0..f246a9791d 100644
--- a/src/platforms/generic_unix/lib/jit_stream_mmap.c
+++ b/src/platforms/generic_unix/lib/jit_stream_mmap.c
@@ -244,7 +244,13 @@ ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
 #elif defined(__GNUC__)
     __builtin___clear_cache(js_obj->stream_base, js_obj->stream_base + js_obj->stream_size);
 #endif
+#if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M
+    // Set thumb bit for armv6m
+    ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base + 1;
+#else
     ModuleNativeEntryPoint result = (ModuleNativeEntryPoint) js_obj->stream_base;
+#endif
+
     js_obj->stream_base = NULL;
     return result;
 }
diff --git a/src/platforms/generic_unix/lib/sys.c b/src/platforms/generic_unix/lib/sys.c
index 086b39de79..099164dd89 100644
--- a/src/platforms/generic_unix/lib/sys.c
+++ b/src/platforms/generic_unix/lib/sys.c
@@ -842,7 +842,12 @@ ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t si
     }
     __builtin___clear_cache((char *) native_code_mmap, (char *) (native_code_mmap + size));
 #endif
+#if JIT_ARCH_TARGET == JIT_ARCH_ARMV6M
+    // Set thumb bit for armv6m
+    return (ModuleNativeEntryPoint) (native_code_mmap + offset + 1);
+#else
     return (ModuleNativeEntryPoint) (native_code_mmap + offset);
+#endif
 #else
     UNUSED(size);
     return (ModuleNativeEntryPoint) (native_code + offset);
diff --git a/src/platforms/rp2/CMakeLists.txt b/src/platforms/rp2/CMakeLists.txt
index 9b9eb582df..86e5e6683a 100644
--- a/src/platforms/rp2/CMakeLists.txt
+++ b/src/platforms/rp2/CMakeLists.txt
@@ -54,6 +54,8 @@ set(HAVE_MKFIFO "" CACHE INTERNAL "Have symbol mkfifo" FORCE)
 set(HAVE_UNLINK "" CACHE INTERNAL "Have symbol unlink" FORCE)
 # Likewise with EXECVE
 set(HAVE_EXECVE "" CACHE INTERNAL "Have symbol execve" FORCE)
+# getcwd is defined in newlib header but not implemented
+set(HAVE_GETCWD "" CACHE INTERNAL "Have symbol getcwd" FORCE)
 
 # Options that make sense for this platform
 option(AVM_DISABLE_SMP "Disable SMP support." OFF)
@@ -63,9 +65,25 @@ option(AVM_WAIT_FOR_USB_CONNECT "Wait for USB connection before starting" OFF)
 option(AVM_WAIT_BOOTSEL_ON_EXIT "Wait in BOOTSEL rather than shutdown on exit" ON)
 option(AVM_REBOOT_ON_NOT_OK "Reboot Pico if result is not ok" OFF)
 option(AVM_CREATE_STACKTRACES "Create stacktraces" ON)
-
-# JIT is not available yet on rp2
-set(AVM_DISABLE_JIT ON FORCE)
+option(AVM_DISABLE_JIT "Disable just in time compilation." ON)
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "^cortex-m.+$")
+    # We only have armv6m for now, which all cortex-m should support
+    if (NOT AVM_DISABLE_JIT)
+        set(AVM_JIT_TARGET_ARCH "armv6m")
+    endif()
+elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^hazard3$")
+    # Pico2 RISC-V processor (Hazard3)
+    if (NOT AVM_DISABLE_JIT)
+        set(AVM_JIT_TARGET_ARCH "riscv32")
+    endif()
+else()
+    # Other processors not supported yet
+    if (NOT AVM_DISABLE_JIT)
+        message("JIT is not supported on ${CMAKE_SYSTEM_PROCESSOR}")
+        set(AVM_DISABLE_JIT ON CACHE BOOL "Disable just in time compilation." FORCE)
+        set(AVM_ENABLE_PRECOMPILED OFF CACHE BOOL "Enable execution of precompiled code, even if JIT is disabled." FORCE)
+    endif()
+endif()
 
 set(AVM_DISABLE_TASK_DRIVER ON FORCE)
 
diff --git a/src/platforms/rp2/src/lib/CMakeLists.txt b/src/platforms/rp2/src/lib/CMakeLists.txt
index 957e346539..3cc69b56a3 100644
--- a/src/platforms/rp2/src/lib/CMakeLists.txt
+++ b/src/platforms/rp2/src/lib/CMakeLists.txt
@@ -31,6 +31,7 @@ set(HEADER_FILES
 
 set(SOURCE_FILES
     gpiodriver.c
+    jit_stream_flash.c
     networkdriver.c
     otp_crypto_platform.c
     platform_defaultatoms.c
diff --git a/src/platforms/rp2/src/lib/jit_stream_flash.c b/src/platforms/rp2/src/lib/jit_stream_flash.c
new file mode 100644
index 0000000000..77dfcca908
--- /dev/null
+++ b/src/platforms/rp2/src/lib/jit_stream_flash.c
@@ -0,0 +1,34 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#ifndef AVM_NO_JIT
+
+#include "context.h"
+#include "jit.h"
+#include "term.h"
+
+ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
+{
+    UNUSED(ctx);
+    UNUSED(jit_stream);
+    return NULL;
+}
+
+#endif
diff --git a/src/platforms/rp2/src/lib/sys.c b/src/platforms/rp2/src/lib/sys.c
index aeffd72870..ac4c9c717c 100644
--- a/src/platforms/rp2/src/lib/sys.c
+++ b/src/platforms/rp2/src/lib/sys.c
@@ -474,3 +474,12 @@ void sys_mbedtls_ctr_drbg_context_unlock(GlobalContext *global)
     struct RP2PlatformData *platform = global->platform_data;
     SMP_MUTEX_UNLOCK(platform->random_mutex);
 }
+
+#ifndef AVM_NO_JIT
+ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset)
+{
+    UNUSED(size);
+    // We need to set the Thumb bit
+    return (ModuleNativeEntryPoint) ((uintptr_t) (native_code + offset) | 1);
+}
+#endif
diff --git a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt
index cbdf581eef..b203d168b2 100644
--- a/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt
+++ b/src/platforms/rp2/tests/test_erl_sources/CMakeLists.txt
@@ -19,12 +19,30 @@
 #
 
 include(ExternalProject)
+if(NOT AVM_DISABLE_JIT)
+set(host_atomvm_jit_target "--target=jit")
+else()
+set(host_atomvm_jit_target "")
+endif()
 ExternalProject_Add(HostAtomVM
     SOURCE_DIR ../../../../../../
     INSTALL_COMMAND cmake -E echo "Skipping install step."
-    BUILD_COMMAND cmake --build . --target=atomvmlib --target=PackBEAM --target=UF2Tool
+    BUILD_COMMAND cmake --build . --target=atomvmlib ${host_atomvm_jit_target} --target=PackBEAM --target=UF2Tool
 )
 
+macro(jit_precompile module_name)
+    if(NOT AVM_DISABLE_JIT)
+        add_custom_command(
+            OUTPUT ${AVM_JIT_TARGET_ARCH}/${module_name}.beam
+            COMMAND mkdir -p ${AVM_JIT_TARGET_ARCH}
+                && erl -pa HostAtomVM-prefix/src/HostAtomVM-build/libs/jit/src/beams/ -noshell -s jit_precompile -s init stop -- ${AVM_JIT_TARGET_ARCH} ${AVM_JIT_TARGET_ARCH}/ ${module_name}.beam
+            DEPENDS ${module_name}.beam HostAtomVM
+            COMMENT "Compiling ${module_name}.beam to ${AVM_JIT_TARGET_ARCH}"
+            VERBATIM
+        )
+    endif()
+endmacro()
+
 function(compile_erlang module_name module_src_dir)
     add_custom_command(
         OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam"
@@ -33,6 +51,7 @@ function(compile_erlang module_name module_src_dir)
         COMMENT "Compiling ${module_name}.erl"
         WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     )
+    jit_precompile(${module_name})
 
     set_property(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/${module_name}.beam")
 endfunction()
@@ -41,18 +60,26 @@ compile_erlang(test_clocks "")
 compile_erlang(test_smp "")
 compile_erlang(test_crypto ../../../esp32/test/main/test_erl_sources/)
 
+set(erlang_test_beams
+    test_clocks.beam
+    test_smp.beam
+    test_crypto.beam
+)
+
+if(NOT AVM_DISABLE_JIT)
+    set(erlang_test_beams_${AVM_JIT_TARGET_ARCH} ${erlang_test_beams})
+    list(TRANSFORM erlang_test_beams_${AVM_JIT_TARGET_ARCH} PREPEND ${AVM_JIT_TARGET_ARCH}/)
+    list(APPEND erlang_test_beams ${erlang_test_beams_${AVM_JIT_TARGET_ARCH}})
+endif()
+
 add_custom_command(
     OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/rp2_test_modules.avm"
     COMMAND HostAtomVM-prefix/src/HostAtomVM-build/tools/packbeam/PackBEAM -i rp2_test_modules.avm
         HostAtomVM-prefix/src/HostAtomVM-build/libs/atomvmlib.avm
-        test_clocks.beam
-        test_smp.beam
-        test_crypto.beam
+        ${erlang_test_beams}
     DEPENDS
         HostAtomVM
-        "${CMAKE_CURRENT_BINARY_DIR}/test_clocks.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_smp.beam"
-        "${CMAKE_CURRENT_BINARY_DIR}/test_crypto.beam"
+        ${erlang_test_beams}
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
     VERBATIM
 )
diff --git a/src/platforms/stm32/CMakeLists.txt b/src/platforms/stm32/CMakeLists.txt
index 569b0a5a17..51489ca5bf 100644
--- a/src/platforms/stm32/CMakeLists.txt
+++ b/src/platforms/stm32/CMakeLists.txt
@@ -36,9 +36,6 @@ option(AVM_CONFIG_REBOOT_ON_NOT_OK "Reboot when application exits with non 'ok'
 option(AVM_DISABLE_GPIO_NIFS "Disable GPIO nifs (input and output)" OFF)
 option(AVM_DISABLE_GPIO_PORT_DRIVER "Disable GPIO 'port' driver (input, output, and interrupts)" OFF)
 
-# JIT is not available yet on esp32
-set(AVM_DISABLE_JIT ON FORCE)
-
 set(AVM_DISABLE_SMP ON FORCE)
 set(AVM_DISABLE_TASK_DRIVER ON FORCE)
 
@@ -85,6 +82,11 @@ if (NOT CMAKE_TOOLCHAIN_FILE)
 endif ()
 mark_as_advanced(CMAKE_TOOLCHAIN_FILE)
 
+option(AVM_DISABLE_JIT "Disable just in time compilation." ON)
+if (NOT AVM_DISABLE_JIT)
+    set(AVM_JIT_TARGET_ARCH "armv6m")
+endif()
+
 if ((NOT ${CMAKE_C_COMPILER_ID} STREQUAL "GNU") OR
     (NOT ${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU") OR
     (${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 7.2.1))
diff --git a/src/platforms/stm32/src/lib/CMakeLists.txt b/src/platforms/stm32/src/lib/CMakeLists.txt
index f1846c070d..536d21cc88 100644
--- a/src/platforms/stm32/src/lib/CMakeLists.txt
+++ b/src/platforms/stm32/src/lib/CMakeLists.txt
@@ -33,6 +33,7 @@ set(HEADER_FILES
 
 set(SOURCE_FILES
     gpio_driver.c
+    jit_stream_flash.c
     platform_nifs.c
     sys.c
     ../../../../libAtomVM/portnifloader.c
diff --git a/src/platforms/stm32/src/lib/jit_stream_flash.c b/src/platforms/stm32/src/lib/jit_stream_flash.c
new file mode 100644
index 0000000000..77dfcca908
--- /dev/null
+++ b/src/platforms/stm32/src/lib/jit_stream_flash.c
@@ -0,0 +1,34 @@
+/*
+ * This file is part of AtomVM.
+ *
+ * Copyright 2025 by Paul Guyot <pguyot@kallisys.net>
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+ */
+
+#ifndef AVM_NO_JIT
+
+#include "context.h"
+#include "jit.h"
+#include "term.h"
+
+ModuleNativeEntryPoint jit_stream_entry_point(Context *ctx, term jit_stream)
+{
+    UNUSED(ctx);
+    UNUSED(jit_stream);
+    return NULL;
+}
+
+#endif
diff --git a/src/platforms/stm32/src/lib/sys.c b/src/platforms/stm32/src/lib/sys.c
index c65a39cab3..262a2be8e1 100644
--- a/src/platforms/stm32/src/lib/sys.c
+++ b/src/platforms/stm32/src/lib/sys.c
@@ -296,3 +296,12 @@ void sys_init_icache()
     __dsb;
     __isb;
 }
+
+#ifndef AVM_NO_JIT
+ModuleNativeEntryPoint sys_map_native_code(const uint8_t *native_code, size_t size, size_t offset)
+{
+    UNUSED(size);
+    // We need to set the Thumb bit
+    return (ModuleNativeEntryPoint) ((uintptr_t) (native_code + offset) | 1);
+}
+#endif
diff --git a/tests/erlang_tests/CMakeLists.txt b/tests/erlang_tests/CMakeLists.txt
index 267a4b3d29..6d6285886f 100644
--- a/tests/erlang_tests/CMakeLists.txt
+++ b/tests/erlang_tests/CMakeLists.txt
@@ -27,6 +27,7 @@ macro(jit_precompile module_name)
             ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit.beam
             ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_precompile.beam
             ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_stream_binary.beam
+            ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_dwarf.beam
             ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${AVM_JIT_TARGET_ARCH}.beam
             ${CMAKE_BINARY_DIR}/libs/jit/src/beams/jit_${AVM_JIT_TARGET_ARCH}_asm.beam
         )
diff --git a/tests/libs/jit/CMakeLists.txt b/tests/libs/jit/CMakeLists.txt
index 70f46ccc09..6aa216b5dc 100644
--- a/tests/libs/jit/CMakeLists.txt
+++ b/tests/libs/jit/CMakeLists.txt
@@ -26,11 +26,20 @@ set(ERLANG_MODULES
     tests
     jit_tests
     jit_tests_common
+    jit_dwarf_tests
     jit_aarch64_tests
     jit_aarch64_asm_tests
+    jit_armv6m_tests
+    jit_armv6m_asm_tests
+    jit_riscv32_tests
+    jit_riscv32_asm_tests
     jit_x86_64_tests
     jit_x86_64_asm_tests
 )
 
-pack_archive(test_jit_lib ERLC_FLAGS -DTEST MODULES ${ERLANG_MODULES})
+if (NOT AVM_DISABLE_JIT_DWARF)
+    pack_archive(test_jit_lib ERLC_FLAGS -DTEST -DJIT_DWARF MODULES ${ERLANG_MODULES})
+else()
+    pack_archive(test_jit_lib ERLC_FLAGS -DTEST MODULES ${ERLANG_MODULES})
+endif()
 pack_eunit(test_jit estdlib eavmlib etest jit)
diff --git a/tests/libs/jit/jit_aarch64_asm_tests.erl b/tests/libs/jit/jit_aarch64_asm_tests.erl
index cf053da995..11c7fe392f 100644
--- a/tests/libs/jit/jit_aarch64_asm_tests.erl
+++ b/tests/libs/jit/jit_aarch64_asm_tests.erl
@@ -20,9 +20,7 @@
 
 -module(jit_aarch64_asm_tests).
 
--ifdef(TEST).
 -include_lib("eunit/include/eunit.hrl").
--endif.
 
 -export([
     list_to_integer/1,
diff --git a/tests/libs/jit/jit_aarch64_tests.erl b/tests/libs/jit/jit_aarch64_tests.erl
index 18bdcf88cb..247728dd29 100644
--- a/tests/libs/jit/jit_aarch64_tests.erl
+++ b/tests/libs/jit/jit_aarch64_tests.erl
@@ -88,6 +88,51 @@ call_primitive_2_args_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_primitive_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	f9401447 	ldr	x7, [x2, #40]\n"
+            "   4:	d2800202 	mov	x2, #0x10                  	// #16\n"
+            "   8:	d2800403 	mov	x3, #0x20                  	// #32\n"
+            "   c:	d2800044 	mov	x4, #0x2                   	// #2\n"
+            "  10:	d61f00e0 	br	x7"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_6_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get bin_ptr from x_reg 0 (similar to get_list_test pattern)
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK),
+    % Get another register for the last parameter to test {free, Reg} handling
+    {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}),
+    % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments
+    {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [
+        ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg}
+    ]),
+    Stream = ?BACKEND:stream(State4),
+    Dump =
+        <<
+            "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+            "   4:	927ef4e7 	and	x7, x7, #0xfffffffffffffffc\n"
+            "   8:	f9401c08 	ldr	x8, [x0, #56]\n"
+            "   c:	f940b850 	ldr	x16, [x2, #368]\n"
+            "  10:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+            "  14:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+            "  18:	aa0703e2 	mov	x2, x7\n"
+            "  1c:	d2800803 	mov	x3, #0x40                  	// #64\n"
+            "  20:	d2800104 	mov	x4, #0x8                   	// #8\n"
+            "  24:	aa0803e5 	mov	x5, x8\n"
+            "  28:	d63f0200 	blr	x16\n"
+            "  2c:	aa0003e7 	mov	x7, x0\n"
+            "  30:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+            "  34:	a8c103fe 	ldp	x30, x0, [sp], #16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_primitive_extended_regs_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
@@ -146,6 +191,44 @@ call_primitive_extended_regs_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_primitive_few_free_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg1} = ?BACKEND:move_to_native_register(State0, 1),
+    {State2, Reg2} = ?BACKEND:move_to_native_register(State1, 2),
+    {State3, Reg3} = ?BACKEND:move_to_native_register(State2, 3),
+    {State4, Reg4} = ?BACKEND:move_to_native_register(State3, 4),
+    {State5, Reg5} = ?BACKEND:move_to_native_register(State4, 5),
+    {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [
+        Reg2, Reg1, {free, Reg4}, Reg3, {free, Reg5}
+    ]),
+    State7 = ?BACKEND:free_native_registers(State6, [ResultReg, Reg2, Reg1, Reg3]),
+    ?BACKEND:assert_all_native_free(State7),
+    Stream = ?BACKEND:stream(State7),
+    Dump = <<
+        "   0:	d2800027 	mov	x7, #0x1                   	// #1\n"
+        "   4:	d2800048 	mov	x8, #0x2                   	// #2\n"
+        "   8:	d2800069 	mov	x9, #0x3                   	// #3\n"
+        "   c:	d280008a 	mov	x10, #0x4                   	// #4\n"
+        "  10:	d28000ab 	mov	x11, #0x5                   	// #5\n"
+        "  14:	f940e450 	ldr	x16, [x2, #456]\n"
+        "  18:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  1c:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  20:	a9bf23e9 	stp	x9, x8, [sp, #-16]!\n"
+        "  24:	f81f0fe7 	str	x7, [sp, #-16]!\n"
+        "  28:	aa0803e0 	mov	x0, x8\n"
+        "  2c:	aa0703e1 	mov	x1, x7\n"
+        "  30:	aa0a03e2 	mov	x2, x10\n"
+        "  34:	aa0903e3 	mov	x3, x9\n"
+        "  38:	aa0b03e4 	mov	x4, x11\n"
+        "  3c:	d63f0200 	blr	x16\n"
+        "  40:	aa0003ea 	mov	x10, x0\n"
+        "  44:	f84107e7 	ldr	x7, [sp], #16\n"
+        "  48:	a8c123e9 	ldp	x9, x8, [sp], #16\n"
+        "  4c:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  50:	a8c103fe 	ldp	x30, x0, [sp], #16"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_ext_only_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
@@ -168,6 +251,23 @@ call_ext_only_test() ->
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+call_primitive_last_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA}
+    ]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+        "   4:	f9404c48 	ldr	x8, [x2, #152]\n"
+        "   8:	d2800102 	mov	x2, #0x8                   	// #8\n"
+        "   c:	d2805963 	mov	x3, #0x2cb                 	// #715\n"
+        "  10:	aa0703e4 	mov	x4, x7\n"
+        "  14:	d61f0100 	br	x8"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_ext_last_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
@@ -760,17 +860,34 @@ if_else_block_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
-shift_right_test() ->
-    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:shift_right(State1, Reg, 3),
-    Stream = ?BACKEND:stream(State2),
-    Dump =
-        <<
-            "   0:	f9401807 	ldr	x7, [x0, #48]\n"
-            "   4:	d343fce7 	lsr	x7, x7, #3"
-        >>,
-    ?assertEqual(dump_to_bin(Dump), Stream).
+shift_right_test_() ->
+    [
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                    "   4:	d343fce7 	lsr	x7, x7, #3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end),
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3),
+            ?assertNotEqual(OtherReg, Reg),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:	f9401807 	ldr	x7, [x0, #48]\n"
+                    "   4:	d343fce8 	lsr	x8, x7, #3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end)
+    ].
 
 shift_left_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
@@ -1021,6 +1138,179 @@ is_boolean_test() ->
     >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
+%% Test OP_WAIT_TIMEOUT pattern
+wait_timeout_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    Label = 42,
+    {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0),
+    {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000),
+    State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [
+        ctx, jit_state, {free, TimeoutReg}, Label
+    ]),
+    State4 = ?BACKEND:add_label(State3, OffsetRef0),
+    State5 = ?BACKEND:continuation_entry_point(State4),
+    {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+        ctx, jit_state
+    ]),
+    State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}),
+    % ?WAITING_TIMEOUT_EXPIRED
+    {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]),
+    State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) ->
+        ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [
+            ctx, jit_state, Label
+        ])
+    end),
+    State10 = ?BACKEND:update_branches(State9),
+
+    Stream = ?BACKEND:stream(State10),
+    Dump = <<
+        "   0:	100000e7 	adr	x7, 0x1c\n"
+        "   4:	f9000427 	str	x7, [x1, #8]\n"
+        "   8:	d2827107 	mov	x7, #0x1388                	// #5000\n"
+        "   c:	f9407848 	ldr	x8, [x2, #240]\n"
+        "  10:	aa0703e2 	mov	x2, x7\n"
+        "  14:	d2800543 	mov	x3, #0x2a                  	// #42\n"
+        "  18:	d61f0100 	br	x8\n"
+        "  1c:	f9405450 	ldr	x16, [x2, #168]\n"
+        "  20:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  24:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  28:	d63f0200 	blr	x16\n"
+        "  2c:	aa0003e7 	mov	x7, x0\n"
+        "  30:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  34:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        "  38:	eb0000ff 	cmp	x7, x0\n"
+        "  3c:	54000060 	b.eq	0x48  // b.none\n"
+        "  40:	aa0703e0 	mov	x0, x7\n"
+        "  44:	d65f03c0 	ret\n"
+        "  48:	f9408450 	ldr	x16, [x2, #264]\n"
+        "  4c:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  50:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  54:	d2800041 	mov	x1, #0x2                   	// #2\n"
+        "  58:	d63f0200 	blr	x16\n"
+        "  5c:	aa0003e7 	mov	x7, x0\n"
+        "  60:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  64:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        "  68:	b5000087 	cbnz	x7, 0x78\n"
+        "  6c:	f9407c47 	ldr	x7, [x2, #248]\n"
+        "  70:	d2800542 	mov	x2, #0x2a                  	// #42\n"
+        "  74:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT pattern that uses set_continuation_to_label
+wait_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:set_continuation_to_label(State2, Label),
+    State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+
+    Stream = ?BACKEND:stream(State4),
+    Dump = <<
+        "   0:	14000000 	b	0x0\n"
+        "   4:	14000000 	b	0x4\n"
+        "   8:	14000000 	b	0x8\n"
+        "   c:	14000000 	b	0xc\n"
+        "  10:	14000000 	b	0x10\n"
+        "  14:	14000000 	b	0x14\n"
+        "  18:	10000007 	adr	x7, 0x18\n"
+        "  1c:	f9000427 	str	x7, [x1, #8]\n"
+        "  20:	f9407447 	ldr	x7, [x2, #232]\n"
+        "  24:	d61f00e0 	br	x7"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+return_labels_and_lines_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    % Test return_labels_and_lines with some sample labels and lines
+    State1 = ?BACKEND:add_label(State0, 2, 32),
+    State2 = ?BACKEND:add_label(State1, 1, 16),
+
+    % {Line, Offset} pairs
+    SortedLines = [{10, 16}, {20, 32}],
+
+    State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines),
+    Stream = ?BACKEND:stream(State3),
+
+    % Should have generated adr + ret + labels table + lines table
+    % adr = 4 bytes, ret = 4 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes
+    % Total minimum: 36 bytes
+    ?assert(byte_size(Stream) >= 36),
+
+    % Expected: adr x0, #8 + ret + labels table + lines table
+    % The data tables start at offset 0x8, so we load PC + 8 into x0
+    Dump = <<
+        "   0:	10000040 	adr	x0, 0x8\n"
+        "   4:	d65f03c0 	ret\n"
+        "   8:	01000200 	.word	0x01000200\n"
+        "   c:	10000000 	adr	x0, 0xc\n"
+        "  10:	00000200 	.word	0x00000200\n"
+        "  14:	02002000 	.word	0x02002000\n"
+        "  18:	00000a00 	.word	0x00000a00\n"
+        "  1c:	14001000 	.word	0x14001000\n"
+        "  20:	20000000 	.word	0x20000000"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test call_primitive with {free, {x_reg, X}}
+gc_bif2_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]),
+    {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [
+        ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}}
+    ]),
+
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	f9402050 	ldr	x16, [x2, #64]\n"
+        "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "   c:	aa0103e0 	mov	x0, x1\n"
+        "  10:	d2800541 	mov	x1, #0x2a                  	// #42\n"
+        "  14:	d63f0200 	blr	x16\n"
+        "  18:	aa0003e7 	mov	x7, x0\n"
+        "  1c:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  20:	a8c103fe 	ldp	x30, x0, [sp], #16\n"
+        "  24:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "  28:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "  2c:	d2800001 	mov	x1, #0x0                   	// #0\n"
+        "  30:	d2800062 	mov	x2, #0x3                   	// #3\n"
+        "  34:	f9401403 	ldr	x3, [x0, #40]\n"
+        "  38:	f9400063 	ldr	x3, [x3]\n"
+        "  3c:	f9401804 	ldr	x4, [x0, #48]\n"
+        "  40:	d63f00e0 	blr	x7\n"
+        "  44:	aa0003e7 	mov	x7, x0\n"
+        "  48:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  4c:	a8c103fe 	ldp	x30, x0, [sp], #16"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test case where parameter value is in r1
+memory_ensure_free_with_roots_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [
+        ctx, jit_state, {free, r1}, 4, 1
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    Dump = <<
+        "   0:	f940b050 	ldr	x16, [x2, #352]\n"
+        "   4:	a9bf03fe 	stp	x30, x0, [sp, #-16]!\n"
+        "   8:	a9bf0be1 	stp	x1, x2, [sp, #-16]!\n"
+        "   c:	aa0103e2 	mov	x2, x1\n"
+        "  10:	d2800083 	mov	x3, #0x4                   	// #4\n"
+        "  14:	d2800024 	mov	x4, #0x1                   	// #1\n"
+        "  18:	d63f0200 	blr	x16\n"
+        "  1c:	aa0003e7 	mov	x7, x0\n"
+        "  20:	a8c10be1 	ldp	x1, x2, [sp], #16\n"
+        "  24:	a8c103fe 	ldp	x30, x0, [sp], #16"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 call_ext_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
     State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
@@ -1645,6 +1935,66 @@ move_to_native_register_test_() ->
             ]
         end}.
 
+add_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:add(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+add_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    add_test0(State0, r2, 2, <<
+                        "   0:	91000842 	add	x2, x2, #0x2"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, r2, 256, <<
+                        "   0:	91040042 	add	x2, x2, #0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, r2, r3, <<
+                        "   0:	8b030042 	add	x2, x2, x3"
+                    >>)
+                end)
+            ]
+        end}.
+
+sub_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:sub(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+sub_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    sub_test0(State0, r2, 2, <<
+                        "   0:	d1000842 	sub	x2, x2, #0x2"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, r2, 256, <<
+                        "   0:	d1040042 	sub	x2, x2, #0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, r2, r3, <<
+                        "   0:	cb030042 	sub	x2, x2, x3"
+                    >>)
+                end)
+            ]
+        end}.
+
 mul_test0(State0, Reg, Imm, Dump) ->
     State1 = ?BACKEND:mul(State0, Reg, Imm),
     Stream = ?BACKEND:stream(State1),
@@ -1719,6 +2069,21 @@ mul_test_() ->
             ]
         end}.
 
+%% Test jump_to_continuation optimization for intra-module returns
+jump_to_continuation_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_to_continuation(State0, {free, r0}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: adr x7, NetOffset; add x7, x7, x0; br x7
+    % With default offset 0, NetOffset = 0 - 0 = 0, temp register is r7
+    Dump =
+        <<
+            "   0:	10000007 	adr	x7, 0x0\n"
+            "   4:	8b0000e7 	add	x7, x7, x0\n"
+            "   8:	d61f00e0 	br	x7"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 dump_to_bin(Dump) ->
     dump_to_bin0(Dump, addr, []).
 
diff --git a/tests/libs/jit/jit_armv6m_asm_tests.erl b/tests/libs/jit/jit_armv6m_asm_tests.erl
new file mode 100644
index 0000000000..eefe6781ef
--- /dev/null
+++ b/tests/libs/jit/jit_armv6m_asm_tests.erl
@@ -0,0 +1,343 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_armv6m_asm_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(_assertAsmEqual(Bin, Str, Value),
+    ?_assertEqual(jit_tests_common:asm(arm, Bin, Str), Value)
+).
+
+adds_test_() ->
+    [
+        ?_assertAsmEqual(<<16#3038:16/little>>, "adds r0, #56", jit_armv6m_asm:adds(r0, 56)),
+        ?_assertAsmEqual(
+            <<16#3038:16/little>>, "adds r0, r0, #56", jit_armv6m_asm:adds(r0, r0, 56)
+        ),
+        ?_assertAsmEqual(<<16#3000:16/little>>, "adds r0, #0", jit_armv6m_asm:adds(r0, 0)),
+        ?_assertAsmEqual(<<16#3101:16/little>>, "adds r1, #1", jit_armv6m_asm:adds(r1, 1)),
+        ?_assertAsmEqual(<<16#1C42:16/little>>, "adds r2, r0, #1", jit_armv6m_asm:adds(r2, r0, 1)),
+        ?_assertAsmEqual(<<16#18c9:16/little>>, "adds r1, r1, r3", jit_armv6m_asm:adds(r1, r1, r3)),
+        ?_assertAsmEqual(<<16#1850:16/little>>, "adds r0, r2, r1", jit_armv6m_asm:adds(r0, r2, r1))
+    ].
+
+add_test_() ->
+    [
+        %% ARMv6-M Thumb ADD instructions (register, high registers supported)
+        %% ADD Rd, Rm - adds register value to register (supports PC)
+        ?_assertAsmEqual(<<16#449f:16/little>>, "add pc, r3", jit_armv6m_asm:add(pc, r3)),
+        ?_assertAsmEqual(<<16#4440:16/little>>, "add r0, r8", jit_armv6m_asm:add(r0, r8)),
+        ?_assertAsmEqual(<<16#4488:16/little>>, "add r8, r1", jit_armv6m_asm:add(r8, r1)),
+        ?_assertAsmEqual(<<16#44c9:16/little>>, "add r9, r9", jit_armv6m_asm:add(r9, r9)),
+        ?_assertAsmEqual(<<16#4419:16/little>>, "add r1, r3", jit_armv6m_asm:add(r1, r3))
+    ].
+
+subs_test_() ->
+    [
+        ?_assertAsmEqual(<<16#3f38:16/little>>, "subs r7, #56", jit_armv6m_asm:subs(r7, 56)),
+        ?_assertAsmEqual(
+            <<16#3f38:16/little>>, "subs r7, r7, #56", jit_armv6m_asm:subs(r7, r7, 56)
+        ),
+        ?_assertAsmEqual(<<16#3800:16/little>>, "subs r0, #0", jit_armv6m_asm:subs(r0, 0)),
+        ?_assertAsmEqual(<<16#1e42:16/little>>, "subs r2, r0, #1", jit_armv6m_asm:subs(r2, r0, 1)),
+        ?_assertAsmEqual(<<16#1ad1:16/little>>, "subs r1, r2, r3", jit_armv6m_asm:subs(r1, r2, r3))
+    ].
+
+sub_test_() ->
+    [
+        ?_assertAsmEqual(<<16#B082:16/little>>, "sub sp, #8", jit_armv6m_asm:sub(sp, 8)),
+        ?_assertAsmEqual(<<16#B082:16/little>>, "sub sp, sp, #8", jit_armv6m_asm:sub(sp, sp, 8)),
+        ?_assertAsmEqual(<<16#B080:16/little>>, "sub sp, #0", jit_armv6m_asm:sub(sp, 0)),
+        ?_assertAsmEqual(<<16#B084:16/little>>, "sub sp, #16", jit_armv6m_asm:sub(sp, 16)),
+        ?_assertAsmEqual(<<16#B0FF:16/little>>, "sub sp, #508", jit_armv6m_asm:sub(sp, 508))
+    ].
+
+muls_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4359:16/little>>, "muls r1, r3", jit_armv6m_asm:muls(r1, r3)),
+        ?_assertAsmEqual(<<16#4348:16/little>>, "muls r0, r1", jit_armv6m_asm:muls(r0, r1))
+    ].
+
+b_test_() ->
+    [
+        %% Thumb B (unconditional) encoding tests - ARMv6-M 16-bit only
+        ?_assertAsmEqual(<<16#E7FE:16/little>>, "b .+0", jit_armv6m_asm:b(0)),
+        ?_assertAsmEqual(<<16#E006:16/little>>, "b .+16", jit_armv6m_asm:b(16)),
+        ?_assertAsmEqual(<<16#E7DE:16/little>>, "b .-64", jit_armv6m_asm:b(-64)),
+        ?_assertAsmEqual(<<16#E000:16/little>>, "b .+4", jit_armv6m_asm:b(4)),
+        ?_assertAsmEqual(<<16#E3FF:16/little>>, "b .+2050", jit_armv6m_asm:b(2050)),
+        ?_assertAsmEqual(<<16#E400:16/little>>, "b .-2044", jit_armv6m_asm:b(-2044)),
+        %% Test error cases for offsets too large for ARMv6-M
+        ?_assertError({unencodable_offset, 2052}, jit_armv6m_asm:b(2052)),
+        ?_assertError({unencodable_offset, -2046}, jit_armv6m_asm:b(-2046))
+    ].
+
+blx_test_() ->
+    [
+        %% Thumb BLX (register) encoding tests
+        ?_assertAsmEqual(<<16#4780:16/little>>, "blx r0", jit_armv6m_asm:blx(r0)),
+        ?_assertAsmEqual(<<16#4788:16/little>>, "blx r1", jit_armv6m_asm:blx(r1)),
+        ?_assertAsmEqual(<<16#47E8:16/little>>, "blx r13", jit_armv6m_asm:blx(r13))
+    ].
+
+bx_test_() ->
+    [
+        %% Thumb BX (branch exchange) encoding tests
+        ?_assertAsmEqual(<<16#4700:16/little>>, "bx r0", jit_armv6m_asm:bx(r0)),
+        ?_assertAsmEqual(<<16#4708:16/little>>, "bx r1", jit_armv6m_asm:bx(r1)),
+        ?_assertAsmEqual(<<16#4768:16/little>>, "bx r13", jit_armv6m_asm:bx(r13))
+    ].
+
+ldr_test_() ->
+    [
+        %% ARMv6-M Thumb LDR immediate offset (0-124, multiple of 4)
+        ?_assertAsmEqual(
+            <<16#6889:16/little>>, "ldr r1, [r1, #8]", jit_armv6m_asm:ldr(r1, {r1, 8})
+        ),
+        ?_assertAsmEqual(
+            <<16#6982:16/little>>, "ldr r2, [r0, #24]", jit_armv6m_asm:ldr(r2, {r0, 24})
+        ),
+        %% SP-relative load (0-1020, multiple of 4)
+        ?_assertAsmEqual(
+            <<16#9f00:16/little>>, "ldr r7, [sp, #0]", jit_armv6m_asm:ldr(r7, {sp, 0})
+        ),
+        ?_assertAsmEqual(
+            <<16#9801:16/little>>, "ldr r0, [sp, #4]", jit_armv6m_asm:ldr(r0, {sp, 4})
+        ),
+        %% PC-relative load (0-1020, multiple of 4)
+        ?_assertAsmEqual(
+            <<16#4a18:16/little>>, "ldr r2, [pc, #96]", jit_armv6m_asm:ldr(r2, {pc, 96})
+        ),
+        %% Register offset
+        ?_assertAsmEqual(
+            <<16#58d1:16/little>>, "ldr r1, [r2, r3]", jit_armv6m_asm:ldr(r1, {r2, r3})
+        )
+    ].
+
+movs_test_() ->
+    [
+        %% ARMv6-M Thumb MOVS instructions (sets flags)
+        %% MOVS immediate (8-bit only, 0-255)
+        ?_assertAsmEqual(<<16#2000:16/little>>, "movs r0, #0", jit_armv6m_asm:movs(r0, 0)),
+        ?_assertAsmEqual(<<16#2101:16/little>>, "movs r1, #1", jit_armv6m_asm:movs(r1, 1)),
+        ?_assertAsmEqual(<<16#22ff:16/little>>, "movs r2, #255", jit_armv6m_asm:movs(r2, 255)),
+        %% MOVS register - low registers only (r0-r7)
+        ?_assertAsmEqual(<<16#0008:16/little>>, "movs r0, r1", jit_armv6m_asm:movs(r0, r1)),
+        ?_assertAsmEqual(<<16#001a:16/little>>, "movs r2, r3", jit_armv6m_asm:movs(r2, r3))
+    ].
+
+mov_test_() ->
+    [
+        %% ARMv6-M Thumb MOV instructions (no flags, for high registers)
+        %% MOV register - requires at least one high register (r8-r15)
+        ?_assertAsmEqual(<<16#4680:16/little>>, "mov r8, r0", jit_armv6m_asm:mov(r8, r0)),
+        ?_assertAsmEqual(<<16#4640:16/little>>, "mov r0, r8", jit_armv6m_asm:mov(r0, r8)),
+        ?_assertAsmEqual(<<16#46c8:16/little>>, "mov r8, r9", jit_armv6m_asm:mov(r8, r9)),
+        ?_assertAsmEqual(<<16#46c0:16/little>>, "mov r8, r8", jit_armv6m_asm:mov(r8, r8)),
+        ?_assertAsmEqual(<<16#4619:16/little>>, "mov r1, r3", jit_armv6m_asm:mov(r1, r3)),
+        ?_assertAsmEqual(<<16#46c0:16/little>>, "nop", jit_armv6m_asm:nop())
+    ].
+
+str_test_() ->
+    [
+        %% ARMv6-M Thumb STR immediate offset (0-124, multiple of 4)
+        ?_assertAsmEqual(
+            <<16#6089:16/little>>, "str r1, [r1, #8]", jit_armv6m_asm:str(r1, {r1, 8})
+        ),
+        ?_assertAsmEqual(
+            <<16#6182:16/little>>, "str r2, [r0, #24]", jit_armv6m_asm:str(r2, {r0, 24})
+        ),
+        %% SP-relative store (0-1020, multiple of 4)
+        ?_assertAsmEqual(
+            <<16#9700:16/little>>, "str r7, [sp, #0]", jit_armv6m_asm:str(r7, {sp, 0})
+        ),
+        ?_assertAsmEqual(
+            <<16#9001:16/little>>, "str r0, [sp, #4]", jit_armv6m_asm:str(r0, {sp, 4})
+        ),
+        %% Register offset
+        ?_assertAsmEqual(
+            <<16#50d1:16/little>>, "str r1, [r2, r3]", jit_armv6m_asm:str(r1, {r2, r3})
+        )
+    ].
+
+cmp_test_() ->
+    [
+        %% ARMv6-M Thumb CMP register (low registers only)
+        ?_assertAsmEqual(<<16#4288:16/little>>, "cmp r0, r1", jit_armv6m_asm:cmp(r0, r1)),
+        ?_assertAsmEqual(<<16#42bb:16/little>>, "cmp r3, r7", jit_armv6m_asm:cmp(r3, r7)),
+        %% ARMv6-M Thumb CMP immediate (8-bit, 0-255, low registers only)
+        ?_assertAsmEqual(<<16#2800:16/little>>, "cmp r0, #0", jit_armv6m_asm:cmp(r0, 0)),
+        ?_assertAsmEqual(<<16#2805:16/little>>, "cmp r0, #5", jit_armv6m_asm:cmp(r0, 5)),
+        ?_assertAsmEqual(<<16#2fff:16/little>>, "cmp r7, #255", jit_armv6m_asm:cmp(r7, 255))
+    ].
+
+ands_test_() ->
+    [
+        %% ARMv6-M Thumb ANDS register (2-operand: Rd = Rd AND Rm)
+        ?_assertAsmEqual(<<16#4008:16/little>>, "ands r0, r1", jit_armv6m_asm:ands(r0, r1)),
+        ?_assertAsmEqual(<<16#4011:16/little>>, "ands r1, r2", jit_armv6m_asm:ands(r1, r2)),
+        ?_assertAsmEqual(<<16#401a:16/little>>, "ands r2, r3", jit_armv6m_asm:ands(r2, r3))
+    ].
+
+orrs_test_() ->
+    [
+        %% ARMv6-M Thumb ORRS register (2-operand: Rd = Rd OR Rm, sets flags)
+        ?_assertAsmEqual(<<16#4308:16/little>>, "orrs r0, r1", jit_armv6m_asm:orrs(r0, r1)),
+        ?_assertAsmEqual(<<16#4311:16/little>>, "orrs r1, r2", jit_armv6m_asm:orrs(r1, r2)),
+        ?_assertAsmEqual(<<16#431a:16/little>>, "orrs r2, r3", jit_armv6m_asm:orrs(r2, r3))
+    ].
+
+bics_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4391:16/little>>, "bics r1, r2", jit_armv6m_asm:bics(r1, r2)),
+        ?_assertAsmEqual(<<16#43a3:16/little>>, "bics r3, r4", jit_armv6m_asm:bics(r3, r4))
+    ].
+
+negs_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4251:16/little>>, "negs r1, r2", jit_armv6m_asm:negs(r1, r2)),
+        ?_assertAsmEqual(<<16#4263:16/little>>, "negs r3, r4", jit_armv6m_asm:negs(r3, r4))
+    ].
+
+rsbs_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4251:16/little>>, "rsbs r1, r2, 0", jit_armv6m_asm:rsbs(r1, r2, 0)),
+        ?_assertAsmEqual(<<16#4263:16/little>>, "rsbs r3, r4, 0", jit_armv6m_asm:rsbs(r3, r4, 0))
+    ].
+
+lsls_test_() ->
+    [
+        %% ARMv6-M Thumb LSLS immediate shift (1-31)
+        ?_assertAsmEqual(<<16#0148:16/little>>, "lsls r0, r1, #5", jit_armv6m_asm:lsls(r0, r1, 5)),
+        ?_assertAsmEqual(<<16#0212:16/little>>, "lsls r2, r2, #8", jit_armv6m_asm:lsls(r2, r2, 8)),
+        %% LSLS register shift
+        ?_assertAsmEqual(<<16#409a:16/little>>, "lsls r2, r3", jit_armv6m_asm:lsls(r2, r3))
+    ].
+
+lsrs_test_() ->
+    [
+        %% ARMv6-M Thumb LSRS immediate shift (1-32)
+        ?_assertAsmEqual(<<16#0948:16/little>>, "lsrs r0, r1, #5", jit_armv6m_asm:lsrs(r0, r1, 5)),
+        ?_assertAsmEqual(<<16#0a12:16/little>>, "lsrs r2, r2, #8", jit_armv6m_asm:lsrs(r2, r2, 8)),
+        %% LSRS register shift
+        ?_assertAsmEqual(<<16#40da:16/little>>, "lsrs r2, r3", jit_armv6m_asm:lsrs(r2, r3))
+    ].
+
+tst_test_() ->
+    [
+        %% ARMv6-M Thumb TST instructions (register only, low registers)
+        %% TST Rn, Rm - test bits (performs Rn & Rm, updates flags)
+        ?_assertAsmEqual(<<16#4208:16/little>>, "tst r0, r1", jit_armv6m_asm:tst(r0, r1)),
+        ?_assertAsmEqual(<<16#421a:16/little>>, "tst r2, r3", jit_armv6m_asm:tst(r2, r3)),
+        ?_assertAsmEqual(<<16#4239:16/little>>, "tst r1, r7", jit_armv6m_asm:tst(r1, r7))
+    ].
+
+bcc_test_() ->
+    [
+        %% Thumb conditional branch encoding tests - ARMv6-M 16-bit only
+        ?_assertAsmEqual(<<16#D0FE:16/little>>, "beq .+0", jit_armv6m_asm:bcc(eq, 0)),
+        ?_assertAsmEqual(<<16#D1FE:16/little>>, "bne .+0", jit_armv6m_asm:bcc(ne, 0)),
+        ?_assertAsmEqual(<<16#D1DE:16/little>>, "bne .-64", jit_armv6m_asm:bcc(ne, -64)),
+        ?_assertAsmEqual(<<16#D03E:16/little>>, "beq .+128", jit_armv6m_asm:bcc(eq, 128)),
+        ?_assertAsmEqual(<<16#D23E:16/little>>, "bcs .+128", jit_armv6m_asm:bcc(cs, 128)),
+        ?_assertAsmEqual(<<16#D33E:16/little>>, "bcc .+128", jit_armv6m_asm:bcc(cc, 128)),
+        ?_assertAsmEqual(<<16#D43E:16/little>>, "bmi .+128", jit_armv6m_asm:bcc(mi, 128)),
+        ?_assertAsmEqual(<<16#D53E:16/little>>, "bpl .+128", jit_armv6m_asm:bcc(pl, 128)),
+        ?_assertAsmEqual(<<16#D63E:16/little>>, "bvs .+128", jit_armv6m_asm:bcc(vs, 128)),
+        ?_assertAsmEqual(<<16#D83E:16/little>>, "bhi .+128", jit_armv6m_asm:bcc(hi, 128)),
+        ?_assertAsmEqual(<<16#D93E:16/little>>, "bls .+128", jit_armv6m_asm:bcc(ls, 128)),
+        ?_assertAsmEqual(<<16#DA3E:16/little>>, "bge .+128", jit_armv6m_asm:bcc(ge, 128)),
+        ?_assertAsmEqual(<<16#DB3E:16/little>>, "blt .+128", jit_armv6m_asm:bcc(lt, 128)),
+        ?_assertAsmEqual(<<16#DC3E:16/little>>, "bgt .+128", jit_armv6m_asm:bcc(gt, 128)),
+        ?_assertAsmEqual(<<16#DD3E:16/little>>, "ble .+128", jit_armv6m_asm:bcc(le, 128)),
+        ?_assertAsmEqual(<<16#E03E:16/little>>, "bal .+128", jit_armv6m_asm:bcc(al, 128)),
+        ?_assertAsmEqual(<<16#D07F:16/little>>, "beq .+258", jit_armv6m_asm:bcc(eq, 258)),
+        ?_assertAsmEqual(<<16#D180:16/little>>, "bne .-252", jit_armv6m_asm:bcc(ne, -252)),
+        %% Test error cases for offsets too large for ARMv6-M
+        ?_assertError({unencodable_offset, 260}, jit_armv6m_asm:bcc(eq, 260)),
+        ?_assertError({unencodable_offset, -254}, jit_armv6m_asm:bcc(ne, -254))
+    ].
+
+adr_test_() ->
+    [
+        %% ARMv6-M Thumb ADR (PC-relative address) - implemented as ADD Rd, PC, #imm
+        %% adr(Rd, N) means "Rd = current_PC + N" where PC is instruction address
+        %% Range: 4-1024, must be multiple of 4
+        ?_assertAsmEqual(<<16#a000:16/little>>, "adr r0, .+4", jit_armv6m_asm:adr(r0, 4)),
+        ?_assertAsmEqual(<<16#a101:16/little>>, "adr r1, .+8", jit_armv6m_asm:adr(r1, 8)),
+        ?_assertAsmEqual(<<16#a202:16/little>>, "adr r2, .+12", jit_armv6m_asm:adr(r2, 12)),
+        ?_assertAsmEqual(<<16#a708:16/little>>, "adr r7, .+36", jit_armv6m_asm:adr(r7, 36)),
+        %% Test maximum offset value (1024 bytes)
+        ?_assertAsmEqual(<<16#a0ff:16/little>>, "adr r0, .+1024", jit_armv6m_asm:adr(r0, 1024))
+    ].
+
+push_test_() ->
+    [
+        %% ARMv6-M Thumb PUSH instruction (low registers + optional LR)
+        %% Single register push
+        ?_assertAsmEqual(<<16#b401:16/little>>, "push {r0}", jit_armv6m_asm:push([r0])),
+        %% Multiple register push
+        ?_assertAsmEqual(
+            <<16#b407:16/little>>, "push {r0, r1, r2}", jit_armv6m_asm:push([r0, r1, r2])
+        ),
+        %% Push with LR
+        ?_assertAsmEqual(<<16#b500:16/little>>, "push {lr}", jit_armv6m_asm:push([lr])),
+        %% Push registers + LR
+        ?_assertAsmEqual(
+            <<16#b507:16/little>>, "push {r0, r1, r2, lr}", jit_armv6m_asm:push([r0, r1, r2, lr])
+        )
+    ].
+
+pop_test_() ->
+    [
+        %% ARMv6-M Thumb POP instruction (low registers + optional PC)
+        %% Single register pop
+        ?_assertAsmEqual(<<16#bc01:16/little>>, "pop {r0}", jit_armv6m_asm:pop([r0])),
+        %% Multiple register pop
+        ?_assertAsmEqual(
+            <<16#bc07:16/little>>, "pop {r0, r1, r2}", jit_armv6m_asm:pop([r0, r1, r2])
+        ),
+        %% Pop with PC
+        ?_assertAsmEqual(<<16#bd00:16/little>>, "pop {pc}", jit_armv6m_asm:pop([pc])),
+        %% Pop registers + PC
+        ?_assertAsmEqual(
+            <<16#bd07:16/little>>, "pop {r0, r1, r2, pc}", jit_armv6m_asm:pop([r0, r1, r2, pc])
+        )
+    ].
+
+bkpt_test_() ->
+    [
+        %% BKPT #0
+        ?_assertAsmEqual(<<16#be00:16/little>>, "bkpt #0", jit_armv6m_asm:bkpt(0)),
+        %% BKPT #1
+        ?_assertAsmEqual(<<16#be01:16/little>>, "bkpt #1", jit_armv6m_asm:bkpt(1)),
+        %% BKPT #255
+        ?_assertAsmEqual(<<16#beff:16/little>>, "bkpt #255", jit_armv6m_asm:bkpt(255))
+    ].
+
+mvns_test_() ->
+    [
+        %% ARMv6-M Thumb MVNS instructions (register only, low registers)
+        %% MVNS Rd, Rm - bitwise NOT (performs ~Rm -> Rd, sets flags)
+        ?_assertAsmEqual(<<16#43e3:16/little>>, "mvns r3, r4", jit_armv6m_asm:mvns(r3, r4)),
+        ?_assertAsmEqual(<<16#43f3:16/little>>, "mvns r3, r6", jit_armv6m_asm:mvns(r3, r6)),
+        ?_assertAsmEqual(<<16#43c8:16/little>>, "mvns r0, r1", jit_armv6m_asm:mvns(r0, r1))
+    ].
diff --git a/tests/libs/jit/jit_armv6m_tests.erl b/tests/libs/jit/jit_armv6m_tests.erl
new file mode 100644
index 0000000000..c7cf14ae75
--- /dev/null
+++ b/tests/libs/jit/jit_armv6m_tests.erl
@@ -0,0 +1,3749 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_armv6m_tests).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
+
+-include("jit/include/jit.hrl").
+-include("jit/src/term.hrl").
+-include("jit/src/default_atoms.hrl").
+-include("jit/src/primitives.hrl").
+
+-define(BACKEND, jit_armv6m).
+
+% disassembly obtained with:
+% arm-elf-objdump -b binary -D dump.bin -M arm
+
+call_primitive_0_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6817      	ldr	r7, [r2, #0]\n"
+            "   2:	b405      	push	{r0, r2}\n"
+            "   4:	9902      	ldr	r1, [sp, #8]\n"
+            "   6:	47b8      	blx	r7\n"
+            "   8:	4607      	mov	r7, r0\n"
+            "   a:	bc05      	pop	{r0, r2}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_1_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6857      	ldr	r7, [r2, #4]\n"
+            "   2:	b405      	push	{r0, r2}\n"
+            "   4:	9902      	ldr	r1, [sp, #8]\n"
+            "   6:	47b8      	blx	r7\n"
+            "   8:	4607      	mov	r7, r0\n"
+            "   a:	bc05      	pop	{r0, r2}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_2_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6897      	ldr	r7, [r2, #8]\n"
+            "   2:	b405      	push	{r0, r2}\n"
+            "   4:	212a      	movs	r1, #42	; 0x2a\n"
+            "   6:	222b      	movs	r2, #43	; 0x2b\n"
+            "   8:	232c      	movs	r3, #44	; 0x2c\n"
+            "   a:	47b8      	blx	r7\n"
+            "   c:	4607      	mov	r7, r0\n"
+            "   e:	bc05      	pop	{r0, r2}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6957      	ldr	r7, [r2, #20]\n"
+            "   2:	b082      	sub	sp, #8\n"
+            "   4:	2602      	movs	r6, #2\n"
+            "   6:	9600      	str	r6, [sp, #0]\n"
+            "   8:	9902      	ldr	r1, [sp, #8]\n"
+            "   a:	2210      	movs	r2, #16\n"
+            "   c:	2320      	movs	r3, #32\n"
+            "   e:	47b8      	blx	r7\n"
+            "  10:	b002      	add	sp, #8\n"
+            "  12:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_6_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get bin_ptr from x_reg 0 (similar to get_list_test pattern)
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK),
+    % Get another register for the last parameter to test {free, Reg} handling
+    {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}),
+    % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments
+    {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [
+        ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg}
+    ]),
+    Stream = ?BACKEND:stream(State4),
+    Dump =
+        <<
+            "   0:	6987      	ldr	r7, [r0, #24]\n"
+            "   2:	2603      	movs	r6, #3\n"
+            "   4:	43b7      	bics	r7, r6\n"
+            "   6:	69c6      	ldr	r6, [r0, #28]\n"
+            "   8:	25b8      	movs	r5, #184	; 0xb8\n"
+            "   a:	5955      	ldr	r5, [r2, r5]\n"
+            "   c:	b405      	push	{r0, r2}\n"
+            "   e:	b082      	sub	sp, #8\n"
+            "  10:	9601      	str	r6, [sp, #4]\n"
+            "  12:	2608      	movs	r6, #8\n"
+            "  14:	9600      	str	r6, [sp, #0]\n"
+            "  16:	9904      	ldr	r1, [sp, #16]\n"
+            "  18:	463a      	mov	r2, r7\n"
+            "  1a:	2340      	movs	r3, #64	; 0x40\n"
+            "  1c:	47a8      	blx	r5\n"
+            "  1e:	4605      	mov	r5, r0\n"
+            "  20:	b002      	add	sp, #8\n"
+            "  22:	bc05      	pop	{r0, r2}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_extended_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]),
+    {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [
+        ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}}
+    ]),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump = <<
+        "   0:	6c97      	ldr	r7, [r2, #72]	; 0x48\n"
+        "   2:	b405      	push	{r0, r2}\n"
+        "   4:	2113      	movs	r1, #19\n"
+        "   6:	47b8      	blx	r7\n"
+        "   8:	4607      	mov	r7, r0\n"
+        "   a:	bc05      	pop	{r0, r2}\n"
+        "   c:	6c96      	ldr	r6, [r2, #72]	; 0x48\n"
+        "   e:	b4c5      	push	{r0, r2, r6, r7}\n"
+        "  10:	2114      	movs	r1, #20\n"
+        "  12:	47b0      	blx	r6\n"
+        "  14:	4605      	mov	r5, r0\n"
+        "  16:	bcc5      	pop	{r0, r2, r6, r7}\n"
+        "  18:	6c96      	ldr	r6, [r2, #72]	; 0x48\n"
+        "  1a:	b4a5      	push	{r0, r2, r5, r7}\n"
+        "  1c:	2113      	movs	r1, #19\n"
+        "  1e:	47b0      	blx	r6\n"
+        "  20:	4606      	mov	r6, r0\n"
+        "  22:	bca5      	pop	{r0, r2, r5, r7}\n"
+        "  24:	6b54      	ldr	r4, [r2, #52]	; 0x34\n"
+        "  26:	b455      	push	{r0, r2, r4, r6}\n"
+        "  28:	6839      	ldr	r1, [r7, #0]\n"
+        "  2a:	682a      	ldr	r2, [r5, #0]\n"
+        "  2c:	47a0      	blx	r4\n"
+        "  2e:	4607      	mov	r7, r0\n"
+        "  30:	bc55      	pop	{r0, r2, r4, r6}\n"
+        "  32:	6037      	str	r7, [r6, #0]"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_few_free_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, r7} = ?BACKEND:move_to_native_register(State0, 1),
+    {State2, r6} = ?BACKEND:move_to_native_register(State1, 2),
+    {State3, r5} = ?BACKEND:move_to_native_register(State2, 3),
+    {State4, r4} = ?BACKEND:move_to_native_register(State3, 4),
+    {State5, r3} = ?BACKEND:move_to_native_register(State4, 5),
+    {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [
+        r6, r7, {free, r4}, r5, {free, r3}
+    ]),
+    State7 = ?BACKEND:free_native_registers(State6, [ResultReg, r6, r7, r5]),
+    ?BACKEND:assert_all_native_free(State7),
+    Stream = ?BACKEND:stream(State7),
+    Dump = <<
+        "   0:	2701      	movs	r7, #1\n"
+        "   2:	2602      	movs	r6, #2\n"
+        "   4:	2503      	movs	r5, #3\n"
+        "   6:	2404      	movs	r4, #4\n"
+        "   8:	2305      	movs	r3, #5\n"
+        "   a:	21e4      	movs	r1, #228	@ 0xe4\n"
+        "   c:	5851      	ldr	r1, [r2, r1]\n"
+        "   e:	b4e7      	push	{r0, r1, r2, r5, r6, r7}\n"
+        "  10:	b082      	sub	sp, #8\n"
+        "  12:	9300      	str	r3, [sp, #0]\n"
+        "  14:	4633      	mov	r3, r6\n"
+        "  16:	460e      	mov	r6, r1\n"
+        "  18:	4618      	mov	r0, r3\n"
+        "  1a:	4639      	mov	r1, r7\n"
+        "  1c:	4622      	mov	r2, r4\n"
+        "  1e:	462b      	mov	r3, r5\n"
+        "  20:	47b0      	blx	r6\n"
+        "  22:	4604      	mov	r4, r0\n"
+        "  24:	b002      	add	sp, #8\n"
+        "  26:	bce7      	pop	{r0, r1, r2, r5, r6, r7}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_only_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	9e00      	ldr	r6, [sp, #0]\n"
+        "   2:	68b7      	ldr	r7, [r6, #8]\n"
+        "   4:	3f01      	subs	r7, #1\n"
+        "   6:	60b7      	str	r7, [r6, #8]\n"
+        "   8:	d109      	bne.n	0x1e\n"
+        "   a:	a704      	add	r7, pc, #16	; (adr r7, 0x1c)\n"
+        "   c:	3701      	adds	r7, #1\n"
+        "   e:	6077      	str	r7, [r6, #4]\n"
+        "  10:	6897      	ldr	r7, [r2, #8]\n"
+        "  12:	9e05      	ldr	r6, [sp, #20]\n"
+        "  14:	9705      	str	r7, [sp, #20]\n"
+        "  16:	46b6      	mov	lr, r6\n"
+        "  18:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  1a:	46c0      	nop			; (mov r8, r8)\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1e:	6917      	ldr	r7, [r2, #16]\n"
+        "  20:	b082      	sub	sp, #8\n"
+        "  22:	2601      	movs	r6, #1\n"
+        "  24:	4276      	negs	r6, r6\n"
+        "  26:	9601      	str	r6, [sp, #4]\n"
+        "  28:	2602      	movs	r6, #2\n"
+        "  2a:	9600      	str	r6, [sp, #0]\n"
+        "  2c:	9902      	ldr	r1, [sp, #8]\n"
+        "  2e:	2220      	movs	r2, #32\n"
+        "  30:	2302      	movs	r3, #2\n"
+        "  32:	47b8      	blx	r7\n"
+        "  34:	b002      	add	sp, #8\n"
+        "  36:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_only_unaligned_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    %% First do a 2-byte instruction to create unaligned start
+    State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}),
+    State2 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State1),
+    State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]),
+    Stream = ?BACKEND:stream(State3),
+    Dump = <<
+        % State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}),
+        "   0:	6019      	str	r1, [r3, #0]\n"
+        % State2 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State1),
+        "   2:	9e00      	ldr	r6, [sp, #0]\n"
+        "   4:	68b7      	ldr	r7, [r6, #8]\n"
+        "   6:	3f01      	subs	r7, #1\n"
+        "   8:	60b7      	str	r7, [r6, #8]\n"
+        "   a:	d108      	bne.n	0x1e\n"
+        "   c:	a703      	add	r7, pc, #12	; (adr r7, 0x1c)\n"
+        "   e:	3701      	adds	r7, #1\n"
+        "  10:	6077      	str	r7, [r6, #4]\n"
+        "  12:	6897      	ldr	r7, [r2, #8]\n"
+        "  14:	9e05      	ldr	r6, [sp, #20]\n"
+        "  16:	9705      	str	r7, [sp, #20]\n"
+        "  18:	46b6      	mov	lr, r6\n"
+        "  1a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        % State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]),
+        "  1e:	6917      	ldr	r7, [r2, #16]\n"
+        "  20:	b082      	sub	sp, #8\n"
+        "  22:	2601      	movs	r6, #1\n"
+        "  24:	4276      	negs	r6, r6\n"
+        "  26:	9601      	str	r6, [sp, #4]\n"
+        "  28:	2602      	movs	r6, #2\n"
+        "  2a:	9600      	str	r6, [sp, #0]\n"
+        "  2c:	9902      	ldr	r1, [sp, #8]\n"
+        "  2e:	2220      	movs	r2, #32\n"
+        "  30:	2302      	movs	r3, #2\n"
+        "  32:	47b8      	blx	r7\n"
+        "  34:	b002      	add	sp, #8\n"
+        "  36:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_last_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA}
+    ]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        % {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [...
+        "   2:	6cd6      	ldr	r6, [r2, #76]	; 0x4c\n"
+        "   4:	b082      	sub	sp, #8\n"
+        "   6:	9700      	str	r7, [sp, #0]\n"
+        "   8:	9902      	ldr	r1, [sp, #8]\n"
+        "   a:	2204      	movs	r2, #4\n"
+        "   c:	4b01      	ldr	r3, [pc, #4]	; (0x14)\n"
+        "   e:	47b0      	blx	r6\n"
+        "  10:	b002      	add	sp, #8\n"
+        "  12:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        % Literal pool
+        "  14:	02cb      	lsls	r3, r1, #11\n"
+        "  16:	0000      	movs	r0, r0"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        % State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+        "   0:	9e00      	ldr	r6, [sp, #0]\n"
+        "   2:	68b7      	ldr	r7, [r6, #8]\n"
+        "   4:	3f01      	subs	r7, #1\n"
+        "   6:	60b7      	str	r7, [r6, #8]\n"
+        "   8:	d109      	bne.n	0x1e\n"
+        "   a:	a704      	add	r7, pc, #16	; (adr r7, 0x1c)\n"
+        "   c:	3701      	adds	r7, #1\n"
+        "   e:	6077      	str	r7, [r6, #4]\n"
+        "  10:	6897      	ldr	r7, [r2, #8]\n"
+        "  12:	9e05      	ldr	r6, [sp, #20]\n"
+        "  14:	9705      	str	r7, [sp, #20]\n"
+        "  16:	46b6      	mov	lr, r6\n"
+        "  18:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  1a:	46c0      	nop			; (mov r8, r8)\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        % State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]),
+        "1e:	6917      	ldr	r7, [r2, #16]\n"
+        "  20:	b082      	sub	sp, #8\n"
+        "  22:	260a      	movs	r6, #10\n"
+        "  24:	9601      	str	r6, [sp, #4]\n"
+        "  26:	2602      	movs	r6, #2\n"
+        "  28:	9600      	str	r6, [sp, #0]\n"
+        "  2a:	9902      	ldr	r1, [sp, #8]\n"
+        "  2c:	2220      	movs	r2, #32\n"
+        "  2e:	2302      	movs	r3, #2\n"
+        "  30:	47b8      	blx	r7\n"
+        "  32:	b002      	add	sp, #8\n"
+        "  34:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6817      	ldr	r7, [r2, #0]\n"
+            "   2:	222a      	movs	r2, #42	; 0x2a\n"
+            "   4:	9e05      	ldr	r6, [sp, #20]\n"
+            "   6:	9705      	str	r7, [sp, #20]\n"
+            "   8:	46b6      	mov	lr, r6\n"
+            "   a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+return_if_not_equal_to_ctx_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(r7, ResultReg),
+                    State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump =
+                        <<
+                            "   0:	6d57      	ldr	r7, [r2, #84]	; 0x54\n"
+                            "   2:	b405      	push	{r0, r2}\n"
+                            "   4:	9902      	ldr	r1, [sp, #8]\n"
+                            "   6:	47b8      	blx	r7\n"
+                            "   8:	4607      	mov	r7, r0\n"
+                            "   a:	bc05      	pop	{r0, r2}\n"
+                            "   c:	4287      	cmp	r7, r0\n"
+                            "   e:	d001      	beq.n	0x14\n"
+                            "  10:	4638      	mov	r0, r7\n"
+                            "  12:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(r7, ResultReg),
+                    {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg),
+                    ?assertEqual(r6, OtherReg),
+                    State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump =
+                        <<
+                            "   0:	6d57      	ldr	r7, [r2, #84]	; 0x54\n"
+                            "   2:	b405      	push	{r0, r2}\n"
+                            "   4:	9902      	ldr	r1, [sp, #8]\n"
+                            "   6:	47b8      	blx	r7\n"
+                            "   8:	4607      	mov	r7, r0\n"
+                            "   a:	bc05      	pop	{r0, r2}\n"
+                            "   c:	463e      	mov	r6, r7\n"
+                            "   e:	4286      	cmp	r6, r0\n"
+                            "  10:	d001      	beq.n	0x16\n"
+                            "  12:	4630      	mov	r0, r6\n"
+                            "  14:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_cp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6946      	ldr	r6, [r0, #20]\n"
+            "   2:	6837      	ldr	r7, [r6, #0]\n"
+            "   4:	65c7      	str	r7, [r0, #92]	; 0x5c"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+increment_sp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:increment_sp(State0, 7),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6947      	ldr	r7, [r0, #20]\n"
+            "   2:	371c      	adds	r7, #28\n"
+            "   4:	6147      	str	r7, [r0, #20]"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+if_block_test_() ->
+    {setup,
+        fun() ->
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+            {State2, RegA, RegB}
+        end,
+        fun({State0, RegA, RegB}) ->
+            [
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f00      	cmp	r7, #0\n"
+                        "   6:	d500      	bpl.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', RegB},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	42b7      	cmp	r7, r6\n"
+                        "   6:	da00      	bge.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f2a      	cmp	r7, #42	; 0x2a\n"
+                        "   6:	da00      	bge.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 1024},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	4d02      	ldr	r5, [pc, #8]	; (0x10)\n"
+                        "   6:	da01      	bge.n	0xc\n"
+                        "   8:	dafe      	bge.n	0x8\n"
+                        "   a:	3602      	adds	r6, #2\n"
+                        "   c:	e078      	b.n	0x100\n"
+                        "   e:	0000      	movs	r0, r0\n"
+                        "  10:	0400      	lsls	r0, r0, #16\n"
+                        "  12:	0000      	movs	r0, r0"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f00      	cmp	r7, #0\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f00      	cmp	r7, #0\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', -1},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2501      	movs	r5, #1\n"
+                        "   6:	426d      	negs	r5, r5\n"
+                        "   8:	42af      	cmp	r7, r5\n"
+                        "   a:	d100      	bne.n	0xe\n"
+                        "   c:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f00      	cmp	r7, #0\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f00      	cmp	r7, #0\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f3b      	cmp	r7, #59	; 0x3b\n"
+                        "   6:	d000      	beq.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f3b      	cmp	r7, #59	; 0x3b\n"
+                        "   6:	d000      	beq.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f2a      	cmp	r7, #42	; 0x2a\n"
+                        "   6:	d000      	beq.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    % Test large immediate (1995) that requires temporary register
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '!=', 1995},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 1)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	4d02      	ldr	r5, [pc, #8]	; (0x10)\n"
+                        "   6:	42af      	cmp	r7, r5\n"
+                        "   8:	d000      	beq.n	0xc\n"
+                        "   a:	3601      	adds	r6, #1\n"
+                        "   c:	e078      	b.n	0x100\n"
+                        "   e:	0000      	movs	r0, r0\n"
+                        "  10:	07cb      	lsls	r3, r1, #31\n"
+                        "  12:	0000      	movs	r0, r0"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f2a      	cmp	r7, #42	; 0x2a\n"
+                        "   6:	d000      	beq.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f3b      	cmp	r7, #59	; 0x3b\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f3b      	cmp	r7, #59	; 0x3b\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f2a      	cmp	r7, #42	; 0x2a\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2f2a      	cmp	r7, #42	; 0x2a\n"
+                        "   6:	d100      	bne.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	07fd      	lsls	r5, r7, #31\n"
+                        "   6:	d400      	bmi.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	07fd      	lsls	r5, r7, #31\n"
+                        "   6:	d400      	bmi.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	07fd      	lsls	r5, r7, #31\n"
+                        "   6:	d500      	bpl.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	07fd      	lsls	r5, r7, #31\n"
+                        "   6:	d500      	bpl.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	077d      	lsls	r5, r7, #29\n"
+                        "   6:	d000      	beq.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#5, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	2505      	movs	r5, #5\n"
+                        "   6:	422f      	tst	r7, r5\n"
+                        "   8:	d000      	beq.n	0xc\n"
+                        "   a:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	077d      	lsls	r5, r7, #29\n"
+                        "   6:	d000      	beq.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	43fd      	mvns	r5, r7\n"
+                        "   6:	072d      	lsls	r5, r5, #28\n"
+                        "   8:	d000      	beq.n	0xc\n"
+                        "   a:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	43ff      	mvns	r7, r7\n"
+                        "   6:	073f      	lsls	r7, r7, #28\n"
+                        "   8:	d000      	beq.n	0xc\n"
+                        "   a:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	463d      	mov	r5, r7\n"
+                        "   6:	243f      	movs	r4, #63	; 0x3f\n"
+                        "   8:	4025      	ands	r5, r4\n"
+                        "   a:	2d08      	cmp	r5, #8\n"
+                        "   c:	d000      	beq.n	0x10\n"
+                        "   e:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '<', RegB},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	42b7      	cmp	r7, r6\n"
+                        "   6:	da00      	bge.n	0xa\n"
+                        "   8:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {
+                            {free, RegA},
+                            '&',
+                            ?TERM_BOXED_TAG_MASK,
+                            '!=',
+                            ?TERM_BOXED_POSITIVE_INTEGER
+                        },
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                        "   4:	253f      	movs	r5, #63	; 0x3f\n"
+                        "   6:	402f      	ands	r7, r5\n"
+                        "   8:	2f08      	cmp	r7, #8\n"
+                        "   a:	d000      	beq.n	0xe\n"
+                        "   c:	3602      	adds	r6, #2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end)
+            ]
+        end}.
+
+%% Test coverage for bitwise AND optimization paths
+bitwise_and_optimization_test_() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 6}),
+    {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 7}),
+    [
+        %% Test optimized case: 16#3 (low bits mask, 2 bits) - lsls r5, r7, #30
+        ?_test(begin
+            State3 = ?BACKEND:if_block(
+                State2,
+                {RegA, '&', 16#3, '!=', 0},
+                fun(BSt0) ->
+                    ?BACKEND:add(BSt0, RegB, 2)
+                end
+            ),
+            Stream = ?BACKEND:stream(State3),
+            Dump = <<
+                "   0:	6b07      	ldr	r7, [r0, #48]	; 0x30\n"
+                "   2:	6b46      	ldr	r6, [r0, #52]	; 0x34\n"
+                "   4:	07bd      	lsls	r5, r7, #30\n"
+                "   6:	d000      	beq.n	0xa\n"
+                "   8:	3602      	adds	r6, #2"
+            >>,
+            ?assertEqual(dump_to_bin(Dump), Stream),
+            ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3))
+        end),
+        %% Test optimized case: 16#F (low bits mask, 4 bits) - lsls r5, r7, #28
+        ?_test(begin
+            State3 = ?BACKEND:if_block(
+                State2,
+                {RegA, '&', 16#F, '!=', 0},
+                fun(BSt0) ->
+                    ?BACKEND:add(BSt0, RegB, 2)
+                end
+            ),
+            Stream = ?BACKEND:stream(State3),
+            Dump = <<
+                "   0:	6b07      	ldr	r7, [r0, #48]	; 0x30\n"
+                "   2:	6b46      	ldr	r6, [r0, #52]	; 0x34\n"
+                "   4:	073d      	lsls	r5, r7, #28\n"
+                "   6:	d000      	beq.n	0xa\n"
+                "   8:	3602      	adds	r6, #2"
+            >>,
+            ?assertEqual(dump_to_bin(Dump), Stream),
+            ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3))
+        end),
+        %% Test optimized case: 16#3F (low bits mask, 6 bits) - lsls r5, r7, #26
+        ?_test(begin
+            State3 = ?BACKEND:if_block(
+                State2,
+                {RegA, '&', 16#3F, '!=', 0},
+                fun(BSt0) ->
+                    ?BACKEND:add(BSt0, RegB, 2)
+                end
+            ),
+            Stream = ?BACKEND:stream(State3),
+            Dump = <<
+                "   0:	6b07      	ldr	r7, [r0, #48]	; 0x30\n"
+                "   2:	6b46      	ldr	r6, [r0, #52]	; 0x34\n"
+                "   4:	06bd      	lsls	r5, r7, #26\n"
+                "   6:	d000      	beq.n	0xa\n"
+                "   8:	3602      	adds	r6, #2"
+            >>,
+            ?assertEqual(dump_to_bin(Dump), Stream),
+            ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3))
+        end),
+        %% Test non-optimized case: 5 (neither single bit nor low bits mask) - mov+tst
+        ?_test(begin
+            State3 = ?BACKEND:if_block(
+                State2,
+                {RegA, '&', 5, '!=', 0},
+                fun(BSt0) ->
+                    ?BACKEND:add(BSt0, RegB, 2)
+                end
+            ),
+            Stream = ?BACKEND:stream(State3),
+            Dump = <<
+                "   0:	6b07      	ldr	r7, [r0, #48]	; 0x30\n"
+                "   2:	6b46      	ldr	r6, [r0, #52]	; 0x34\n"
+                "   4:	2505      	movs	r5, #5\n"
+                "   6:	422f      	tst	r7, r5\n"
+                "   8:	d000      	beq.n	0xc\n"
+                "   a:	3602      	adds	r6, #2"
+            >>,
+            ?assertEqual(dump_to_bin(Dump), Stream),
+            ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State3))
+        end)
+    ].
+
+if_else_block_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    State3 = ?BACKEND:if_else_block(
+        State2,
+        {Reg1, '==', ?TERM_NIL},
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 2)
+        end,
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 4)
+        end
+    ),
+    Stream = ?BACKEND:stream(State3),
+    Dump =
+        <<
+            "   0:	6987      	ldr	r7, [r0, #24]\n"
+            "   2:	69c6      	ldr	r6, [r0, #28]\n"
+            "   4:	2f3b      	cmp	r7, #59	; 0x3b\n"
+            "   6:	d101      	bne.n	0xc\n"
+            "   8:	3602      	adds	r6, #2\n"
+            "   a:	e000      	b.n	0xe\n"
+            "   c:	3604      	adds	r6, #4"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+shift_right_test_() ->
+    [
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:	6987      	ldr	r7, [r0, #24]\n"
+                    "   2:	08ff      	lsrs	r7, r7, #3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end),
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3),
+            ?assertNotEqual(OtherReg, Reg),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:	6987      	ldr	r7, [r0, #24]\n"
+                    "   2:	08fe      	lsrs	r6, r7, #3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end)
+    ].
+
+shift_left_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:shift_left(State1, Reg, 3),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:	6987      	ldr	r7, [r0, #24]\n"
+            "   2:	00ff      	lsls	r7, r7, #3"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_only_or_schedule_next_and_label_relocation_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    Dump =
+        <<
+            "   0:	4b01      	ldr	r3, [pc, #4]	; (0x8)\n"
+            "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "   4:	449f      	add	pc, r3\n"
+            "   6:	46c0      	nop			; (mov r8, r8)\n"
+            "   8:	0054      	lsls	r4, r2, #1\n"
+            "   a:	0000      	movs	r0, r0\n"
+            "   c:	4b01      	ldr	r3, [pc, #4]	; (0x14)\n"
+            "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  10:	449f      	add	pc, r3\n"
+            "  12:	46c0      	nop			; (mov r8, r8)\n"
+            "  14:	0010      	movs	r0, r2\n"
+            "  16:	0000      	movs	r0, r0\n"
+            "  18:	4b01      	ldr	r3, [pc, #4]	; (0x20)\n"
+            "  1a:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  1c:	449f      	add	pc, r3\n"
+            "  1e:	46c0      	nop			; (mov r8, r8)\n"
+            "  20:	0030      	movs	r0, r6\n"
+            "  22:	0000      	movs	r0, r0\n"
+            "  24:	9e00      	ldr	r6, [sp, #0]\n"
+            "  26:	68b7      	ldr	r7, [r6, #8]\n"
+            "  28:	3f01      	subs	r7, #1\n"
+            "  2a:	60b7      	str	r7, [r6, #8]\n"
+            "  2c:	d004      	beq.n	0x38\n"
+            "  2e:	e00f      	b.n	0x50\n"
+            "  30:	46c0      	nop			; (mov r8, r8)\n"
+            "  32:	46c0      	nop			; (mov r8, r8)\n"
+            "  34:	46c0      	nop			; (mov r8, r8)\n"
+            "  36:	46c0      	nop			; (mov r8, r8)\n"
+            "  38:	a700      	add	r7, pc, #0	; (adr r7, 0x3c)\n"
+            "  3a:	2623      	movs	r6, #35	; 0x23\n"
+            "  3c:	4276      	negs	r6, r6\n"
+            "  3e:	19f6      	adds	r6, r6, r7\n"
+            "  40:	9f00      	ldr	r7, [sp, #0]\n"
+            "  42:	607e      	str	r6, [r7, #4]\n"
+            "  44:	6897      	ldr	r7, [r2, #8]\n"
+            "  46:	9e05      	ldr	r6, [sp, #20]\n"
+            "  48:	9705      	str	r7, [sp, #20]\n"
+            "  4a:	46b6      	mov	lr, r6\n"
+            "  4c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  4e:	46c0      	nop			; (mov r8, r8)\n"
+            "  50:	6817      	ldr	r7, [r2, #0]\n"
+            "  52:	9e05      	ldr	r6, [sp, #20]\n"
+            "  54:	9705      	str	r7, [sp, #20]\n"
+            "  56:	46b6      	mov	lr, r6\n"
+            "  58:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  5a:	46c0      	nop			; (mov r8, r8)\n"
+            "  5c:	6857      	ldr	r7, [r2, #4]\n"
+            "  5e:	9e05      	ldr	r6, [sp, #20]\n"
+            "  60:	9705      	str	r7, [sp, #20]\n"
+            "  62:	46b6      	mov	lr, r6\n"
+            "  64:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test with different alignment (unaligned start)
+call_only_or_schedule_next_and_label_relocation_unaligned_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    %% First do a 2-byte instruction to create unaligned start
+    State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}),
+    State2 = ?BACKEND:jump_table(State1, 2),
+    State3 = ?BACKEND:add_label(State2, 1),
+    State4 = ?BACKEND:call_only_or_schedule_next(State3, 2),
+    State5 = ?BACKEND:add_label(State4, 2),
+    State6 = ?BACKEND:call_primitive_last(State5, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State7 = ?BACKEND:add_label(State6, 0),
+    State8 = ?BACKEND:call_primitive_last(State7, 1, [ctx, jit_state]),
+    State9 = ?BACKEND:update_branches(State8),
+    Stream = ?BACKEND:stream(State9),
+    Dump =
+        <<
+            "   0:	6019      	str	r1, [r3, #0]\n"
+            "   2:	4b01      	ldr	r3, [pc, #4]	; (0x8)\n"
+            "   4:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "   6:	449f      	add	pc, r3\n"
+            "   8:	46c0      	nop			; (mov r8, r8)\n"
+            "   a:	0056      	lsls	r6, r2, #1\n"
+            "   c:	0000      	movs	r0, r0\n"
+            "   e:	4b01      	ldr	r3, [pc, #4]	; (0x14)\n"
+            "  10:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  12:	449f      	add	pc, r3\n"
+            "  14:	46c0      	nop			; (mov r8, r8)\n"
+            "  16:	0012      	movs	r2, r2\n"
+            "  18:	0000      	movs	r0, r0\n"
+            "  1a:	4b01      	ldr	r3, [pc, #4]	; (0x20)\n"
+            "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  1e:	449f      	add	pc, r3\n"
+            "  20:	46c0      	nop			; (mov r8, r8)\n"
+            "  22:	0032      	movs	r2, r6\n"
+            "  24:	0000      	movs	r0, r0\n"
+            "  26:	46c0      	nop			; (mov r8, r8)\n"
+            "  28:	9e00      	ldr	r6, [sp, #0]\n"
+            "  2a:	68b7      	ldr	r7, [r6, #8]\n"
+            "  2c:	3f01      	subs	r7, #1\n"
+            "  2e:	60b7      	str	r7, [r6, #8]\n"
+            "  30:	d004      	beq.n	0x3c\n"
+            "  32:	e00f      	b.n	0x54\n"
+            "  34:	46c0      	nop			; (mov r8, r8)\n"
+            "  36:	46c0      	nop			; (mov r8, r8)\n"
+            "  38:	46c0      	nop			; (mov r8, r8)\n"
+            "  3a:	46c0      	nop			; (mov r8, r8)\n"
+            "  3c:	a700      	add	r7, pc, #0	; (adr r7, 0x40)\n"
+            "  3e:	2627      	movs	r6, #39	; 0x27\n"
+            "  40:	4276      	negs	r6, r6\n"
+            "  42:	19f6      	adds	r6, r6, r7\n"
+            "  44:	9f00      	ldr	r7, [sp, #0]\n"
+            "  46:	607e      	str	r6, [r7, #4]\n"
+            "  48:	6897      	ldr	r7, [r2, #8]\n"
+            "  4a:	9e05      	ldr	r6, [sp, #20]\n"
+            "  4c:	9705      	str	r7, [sp, #20]\n"
+            "  4e:	46b6      	mov	lr, r6\n"
+            "  50:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  52:	46c0      	nop			; (mov r8, r8)\n"
+            "  54:	6817      	ldr	r7, [r2, #0]\n"
+            "  56:	9e05      	ldr	r6, [sp, #20]\n"
+            "  58:	9705      	str	r7, [sp, #20]\n"
+            "  5a:	46b6      	mov	lr, r6\n"
+            "  5c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  5e:	46c0      	nop			; (mov r8, r8)\n"
+            "  60:	6857      	ldr	r7, [r2, #4]\n"
+            "  62:	9e05      	ldr	r6, [sp, #20]\n"
+            "  64:	9705      	str	r7, [sp, #20]\n"
+            "  66:	46b6      	mov	lr, r6\n"
+            "  68:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test with large gap (256+ bytes) to force mov_immediate path
+call_only_or_schedule_next_and_label_relocation_large_gap_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    % Add large padding by emitting many move_to_native_register operations
+    % This creates a large gap between the jump table and the actual function bodies
+    % Each operation emits ~2 bytes, so 128 operations = ~256 bytes
+    StatePadded = lists:foldl(
+        fun(_, S) ->
+            ?BACKEND:move_to_native_register(S, {x_reg, 2}, r3)
+        end,
+        State1,
+        lists:seq(1, 128)
+    ),
+    State2 = ?BACKEND:add_label(StatePadded, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    % Extract the final section starting at 0x124 to verify the literal pool pattern
+    Dump = <<
+        " 124:	9e00      	ldr	r6, [sp, #0]\n"
+        " 126:	68b7      	ldr	r7, [r6, #8]\n"
+        " 128:	3f01      	subs	r7, #1\n"
+        " 12a:	60b7      	str	r7, [r6, #8]\n"
+        " 12c:	d004      	beq.n	0x138\n"
+        " 12e:	e00f      	b.n	0x150\n"
+        " 130:	46c0      	nop			; (mov r8, r8)\n"
+        " 132:	46c0      	nop			; (mov r8, r8)\n"
+        " 134:	46c0      	nop			; (mov r8, r8)\n"
+        " 136:	46c0      	nop			; (mov r8, r8)\n"
+        " 138:	a700      	add	r7, pc, #0	; (adr r7, 0x13c)\n"
+        " 13a:	4e04      	ldr	r6, [pc, #16]	; (0x14c)\n"
+        " 13c:	19f6      	adds	r6, r6, r7\n"
+        " 13e:	9f00      	ldr	r7, [sp, #0]\n"
+        " 140:	607e      	str	r6, [r7, #4]\n"
+        " 142:	6897      	ldr	r7, [r2, #8]\n"
+        " 144:	9e05      	ldr	r6, [sp, #20]\n"
+        " 146:	9705      	str	r7, [sp, #20]\n"
+        " 148:	46b6      	mov	lr, r6\n"
+        " 14a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 14c:	fedd ffff 	mrc2	15, 6, pc, cr13, cr15, {7}\n"
+        " 150:	6817      	ldr	r7, [r2, #0]\n"
+        " 152:	9e05      	ldr	r6, [sp, #20]\n"
+        " 154:	9705      	str	r7, [sp, #20]\n"
+        " 156:	46b6      	mov	lr, r6\n"
+        " 158:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 15a:	46c0      	nop			; (mov r8, r8)\n"
+        " 15c:	6857      	ldr	r7, [r2, #4]\n"
+        " 15e:	9e05      	ldr	r6, [sp, #20]\n"
+        " 160:	9705      	str	r7, [sp, #20]\n"
+        " 162:	46b6      	mov	lr, r6\n"
+        " 164:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+    >>,
+    {_, RelevantBinary} = split_binary(Stream, 16#124),
+    ?assertEqual(dump_to_bin(Dump), RelevantBinary).
+
+%% Test with large gap (256+ bytes) and different alignment to force literal pool path
+call_only_or_schedule_next_and_label_relocation_large_gap_unaligned_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    % Add large padding by emitting many move_to_native_register operations
+    % This creates a large gap between the jump table and the rest of the code
+    % Use 127 operations (instead of 128) to create different alignment
+    StatePadded = lists:foldl(
+        fun(_, S) ->
+            ?BACKEND:move_to_native_register(S, {x_reg, 2}, r3)
+        end,
+        State1,
+        lists:seq(1, 127)
+    ),
+    State2 = ?BACKEND:add_label(StatePadded, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    % Extract the final section starting at 0x122 to verify the literal pool pattern with different alignment
+    Dump = <<
+        " 122:	46c0      	nop			; (mov r8, r8)\n"
+        " 124:	9e00      	ldr	r6, [sp, #0]\n"
+        " 126:	68b7      	ldr	r7, [r6, #8]\n"
+        " 128:	3f01      	subs	r7, #1\n"
+        " 12a:	60b7      	str	r7, [r6, #8]\n"
+        " 12c:	d004      	beq.n	0x138\n"
+        " 12e:	e00f      	b.n	0x150\n"
+        " 130:	46c0      	nop			; (mov r8, r8)\n"
+        " 132:	46c0      	nop			; (mov r8, r8)\n"
+        " 134:	46c0      	nop			; (mov r8, r8)\n"
+        " 136:	46c0      	nop			; (mov r8, r8)\n"
+        " 138:	a700      	add	r7, pc, #0	; (adr r7, 0x13c)\n"
+        " 13a:	4e04      	ldr	r6, [pc, #16]	; (0x14c)\n"
+        " 13c:	19f6      	adds	r6, r6, r7\n"
+        " 13e:	9f00      	ldr	r7, [sp, #0]\n"
+        " 140:	607e      	str	r6, [r7, #4]\n"
+        " 142:	6897      	ldr	r7, [r2, #8]\n"
+        " 144:	9e05      	ldr	r6, [sp, #20]\n"
+        " 146:	9705      	str	r7, [sp, #20]\n"
+        " 148:	46b6      	mov	lr, r6\n"
+        " 14a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 14c:	fedd ffff 	mrc2	15, 6, pc, cr13, cr15, {7}\n"
+        " 150:	6817      	ldr	r7, [r2, #0]\n"
+        " 152:	9e05      	ldr	r6, [sp, #20]\n"
+        " 154:	9705      	str	r7, [sp, #20]\n"
+        " 156:	46b6      	mov	lr, r6\n"
+        " 158:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        " 15a:	46c0      	nop			; (mov r8, r8)\n"
+        " 15c:	6857      	ldr	r7, [r2, #4]\n"
+        " 15e:	9e05      	ldr	r6, [sp, #20]\n"
+        " 160:	9705      	str	r7, [sp, #20]\n"
+        " 162:	46b6      	mov	lr, r6\n"
+        " 164:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+    >>,
+    {_, RelevantBinary} = split_binary(Stream, 16#122),
+    ?assertEqual(dump_to_bin(Dump), RelevantBinary).
+
+call_bif_with_large_literal_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]),
+    {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]),
+    {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [
+        ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg}
+    ]),
+    State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) ->
+        ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset])
+    end),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "   0:	6a17      	ldr	r7, [r2, #32]\n"
+            "   2:	b405      	push	{r0, r2}\n"
+            "   4:	9802      	ldr	r0, [sp, #8]\n"
+            "   6:	2102      	movs	r1, #2\n"
+            "   8:	47b8      	blx	r7\n"
+            "   a:	4607      	mov	r7, r0\n"
+            "   c:	bc05      	pop	{r0, r2}\n"
+            "   e:	6bd6      	ldr	r6, [r2, #60]	; 0x3c\n"
+            "  10:	b4c5      	push	{r0, r2, r6, r7}\n"
+            "  12:	490b      	ldr	r1, [pc, #44]	; (0x40)\n"
+            "  14:	47b0      	blx	r6\n"
+            "  16:	4605      	mov	r5, r0\n"
+            "  18:	bcc5      	pop	{r0, r2, r6, r7}\n"
+            "  1a:	b405      	push	{r0, r2}\n"
+            "  1c:	b082      	sub	sp, #8\n"
+            "  1e:	9500      	str	r5, [sp, #0]\n"
+            "  20:	2100      	movs	r1, #0\n"
+            "  22:	2201      	movs	r2, #1\n"
+            "  24:	6983      	ldr	r3, [r0, #24]\n"
+            "  26:	47b8      	blx	r7\n"
+            "  28:	4607      	mov	r7, r0\n"
+            "  2a:	b002      	add	sp, #8\n"
+            "  2c:	bc05      	pop	{r0, r2}\n"
+            "  2e:	2f00      	cmp	r7, #0\n"
+            "  30:	d108      	bne.n	0x44\n"
+            "  32:	6997      	ldr	r7, [r2, #24]\n"
+            "  34:	2234      	movs	r2, #52	; 0x34\n"
+            "  36:	9e05      	ldr	r6, [sp, #20]\n"
+            "  38:	9705      	str	r7, [sp, #20]\n"
+            "  3a:	46b6      	mov	lr, r6\n"
+            "  3c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  3e:	0000      	movs	r0, r0\n"
+            "  40:	e895 3b7f 	ldmia.w	r5, {r0, r1, r2, r3, r4, r5, r6, r8, r9, fp, ip, sp}\n"
+            "  44:	6187      	str	r7, [r0, #24]"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+get_list_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}),
+    State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}),
+    State5 = ?BACKEND:free_native_registers(State4, [Reg]),
+    ?BACKEND:assert_all_native_free(State5),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	2603      	movs	r6, #3\n"
+        "   4:	43b7      	bics	r7, r6\n"
+        "   6:	687d      	ldr	r5, [r7, #4]\n"
+        "   8:	6946      	ldr	r6, [r0, #20]\n"
+        "   a:	6075      	str	r5, [r6, #4]\n"
+        "   c:	683d      	ldr	r5, [r7, #0]\n"
+        "   e:	6946      	ldr	r6, [r0, #20]\n"
+        "  10:	6035      	str	r5, [r6, #0]"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
+    State2 = ?BACKEND:if_block(
+        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
+            MSt1 = ?BACKEND:if_block(
+                MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            ),
+            MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg),
+            ?BACKEND:if_block(
+                MSt3,
+                {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            )
+        end
+    ),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	43fe      	mvns	r6, r7\n"
+        "   4:	0736      	lsls	r6, r6, #28\n"
+        "   6:	d015      	beq.n	0x34\n"
+        "   8:	463e      	mov	r6, r7\n"
+        "   a:	2503      	movs	r5, #3\n"
+        "   c:	402e      	ands	r6, r5\n"
+        "   e:	2e02      	cmp	r6, #2\n"
+        "  10:	d004      	beq.n	0x1c\n"
+        "  12:	e075      	b.n	0x100\n"
+        "  14:	46c0      	nop			; (mov r8, r8)\n"
+        "  16:	46c0      	nop			; (mov r8, r8)\n"
+        "  18:	46c0      	nop			; (mov r8, r8)\n"
+        "  1a:	46c0      	nop			; (mov r8, r8)\n"
+        "  1c:	2603      	movs	r6, #3\n"
+        "  1e:	43b7      	bics	r7, r6\n"
+        "  20:	683f      	ldr	r7, [r7, #0]\n"
+        "  22:	263f      	movs	r6, #63	; 0x3f\n"
+        "  24:	4037      	ands	r7, r6\n"
+        "  26:	2f08      	cmp	r7, #8\n"
+        "  28:	d004      	beq.n	0x34\n"
+        "  2a:	e069      	b.n	0x100\n"
+        "  2c:	46c0      	nop			; (mov r8, r8)\n"
+        "  2e:	46c0      	nop			; (mov r8, r8)\n"
+        "  30:	46c0      	nop			; (mov r8, r8)\n"
+        "  32:	46c0      	nop			; (mov r8, r8)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+cond_jump_to_label(Cond, Label, MMod, MSt0) ->
+    MMod:if_block(MSt0, Cond, fun(BSt0) ->
+        MMod:jump_to_label(BSt0, Label)
+    end).
+
+%% Keep the unoptimized version to test the and case.
+is_number_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
+    State2 = ?BACKEND:if_block(
+        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
+            BSt1 = cond_jump_to_label(
+                {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0
+            ),
+            BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg),
+            cond_jump_to_label(
+                {'and', [
+                    {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                    {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT}
+                ]},
+                Label,
+                ?BACKEND,
+                BSt3
+            )
+        end
+    ),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	43fe      	mvns	r6, r7\n"
+        "   4:	0736      	lsls	r6, r6, #28\n"
+        "   6:	d01b      	beq.n	0x40\n"
+        "   8:	463e      	mov	r6, r7\n"
+        "   a:	2503      	movs	r5, #3\n"
+        "   c:	402e      	ands	r6, r5\n"
+        "   e:	2e02      	cmp	r6, #2\n"
+        "  10:	d004      	beq.n	0x1c\n"
+        "  12:	e075      	b.n	0x100\n"
+        "  14:	46c0      	nop			; (mov r8, r8)\n"
+        "  16:	46c0      	nop			; (mov r8, r8)\n"
+        "  18:	46c0      	nop			; (mov r8, r8)\n"
+        "  1a:	46c0      	nop			; (mov r8, r8)\n"
+        "  1c:	2603      	movs	r6, #3\n"
+        "  1e:	43b7      	bics	r7, r6\n"
+        "  20:	683f      	ldr	r7, [r7, #0]\n"
+        "  22:	463e      	mov	r6, r7\n"
+        "  24:	253f      	movs	r5, #63	; 0x3f\n"
+        "  26:	402e      	ands	r6, r5\n"
+        "  28:	2e08      	cmp	r6, #8\n"
+        "  2a:	d009      	beq.n	0x40\n"
+        "  2c:	263f      	movs	r6, #63	; 0x3f\n"
+        "  2e:	4037      	ands	r7, r6\n"
+        "  30:	2f18      	cmp	r7, #24\n"
+        "  32:	d005      	beq.n	0x40\n"
+        "  34:	e064      	b.n	0x100\n"
+        "  36:	46c0      	nop			; (mov r8, r8)\n"
+        "  38:	46c0      	nop			; (mov r8, r8)\n"
+        "  3a:	46c0      	nop			; (mov r8, r8)\n"
+        "  3c:	46c0      	nop			; (mov r8, r8)\n"
+        "  3e:	46c0      	nop			; (mov r8, r8)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	2f4b      	cmp	r7, #75	; 0x4b\n"
+        "   4:	d006      	beq.n	0x14\n"
+        "   6:	2f0b      	cmp	r7, #11\n"
+        "   8:	d004      	beq.n	0x14\n"
+        "   a:	e079      	b.n	0x100\n"
+        "   c:	46c0      	nop			; (mov r8, r8)\n"
+        "   e:	46c0      	nop			; (mov r8, r8)\n"
+        "  10:	46c0      	nop			; (mov r8, r8)\n"
+        "  12:	46c0      	nop			; (mov r8, r8)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#1000),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	2f4b      	cmp	r7, #75	; 0x4b\n"
+        "   4:	d006      	beq.n	0x14\n"
+        "   6:	2f0b      	cmp	r7, #11\n"
+        "   8:	d004      	beq.n	0x14\n"
+        "   a:	4e01      	ldr	r6, [pc, #4]	; (0x10)\n"
+        "   c:	447e      	add	r6, pc\n"
+        "   e:	4730      	bx	r6\n"
+        "  10:	0ff1      	lsrs	r0, r6, #31\n"
+        "  12:	0000      	movs	r0, r0"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_unaligned_test() ->
+    % Create a new state with a 2-byte instruction already in the stream
+    % to simulate starting at an odd offset (offset 2 instead of 0)
+    PaddingInstruction = jit_armv6m_asm:bx(lr),
+    TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction),
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream),
+
+    Label = 1,
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#1000),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	4770      	bx	lr\n"
+        "   2:	6987      	ldr	r7, [r0, #24]\n"
+        "   4:	2f4b      	cmp	r7, #75	@ 0x4b\n"
+        "   6:	d007      	beq.n	0x18\n"
+        "   8:	2f0b      	cmp	r7, #11\n"
+        "   a:	d005      	beq.n	0x18\n"
+        "   c:	4e01      	ldr	r6, [pc, #4]	@ (0x14)\n"
+        "   e:	447e      	add	r6, pc\n"
+        "  10:	4730      	bx	r6\n"
+        "  12:	46c0      	nop			@ (mov r8, r8)\n"
+        "  14:	0fef      	lsrs	r7, r5, #31\n"
+        "  16:	0000      	movs	r0, r0"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_known_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    State1 = ?BACKEND:add_label(State0, Label, 16#1000),
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	2f4b      	cmp	r7, #75	; 0x4b\n"
+        "   4:	d006      	beq.n	0x14\n"
+        "   6:	2f0b      	cmp	r7, #11\n"
+        "   8:	d004      	beq.n	0x14\n"
+        "   a:	4e01      	ldr	r6, [pc, #4]	; (0x10)\n"
+        "   c:	447e      	add	r6, pc\n"
+        "   e:	4730      	bx	r6\n"
+        "  10:	0ff1      	lsrs	r1, r6, #31\n"
+        "  12:	0000      	movs	r0, r0"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_known_unaligned_test() ->
+    % Create a new state with a 2-byte instruction already in the stream
+    % to simulate starting at an odd offset (offset 2 instead of 0)
+    PaddingInstruction = jit_armv6m_asm:bx(lr),
+    TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction),
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream),
+
+    Label = 1,
+    State1 = ?BACKEND:add_label(State0, Label, 16#1000),
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:	4770      	bx	lr\n"
+        "   2:	6987      	ldr	r7, [r0, #24]\n"
+        "   4:	2f4b      	cmp	r7, #75	; 0x4b\n"
+        "   6:	d007      	beq.n	0x18\n"
+        "   8:	2f0b      	cmp	r7, #11\n"
+        "   a:	d005      	beq.n	0x18\n"
+        "   c:	4e01      	ldr	r6, [pc, #4]	; (0x14)\n"
+        "   e:	447e      	add	r6, pc\n"
+        "  10:	4730      	bx	r6\n"
+        "  12:	46c0      	nop			; (mov r8, r8)\n"
+        "  14:	0fef      	lsrs	r7, r5, #31\n"
+        "  16:	0000      	movs	r0, r0"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point
+wait_timeout_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    Label = 42,
+    {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0),
+    {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000),
+    State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [
+        ctx, jit_state, {free, TimeoutReg}, Label
+    ]),
+    State4 = ?BACKEND:add_label(State3, OffsetRef0),
+    State5 = ?BACKEND:continuation_entry_point(State4),
+    {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+        ctx, jit_state
+    ]),
+    State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}),
+    % ?WAITING_TIMEOUT_EXPIRED
+    {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]),
+    State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) ->
+        ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [
+            ctx, jit_state, Label
+        ])
+    end),
+    State10 = ?BACKEND:update_branches(State9),
+
+    Stream = ?BACKEND:stream(State10),
+    Dump = <<
+        "   0:	a706      	add	r7, pc, #24	; (adr r7, 0x1c)\n"
+        "   2:	3701      	adds	r7, #1\n"
+        "   4:	9e00      	ldr	r6, [sp, #0]\n"
+        "   6:	6077      	str	r7, [r6, #4]\n"
+        "   8:	4f03      	ldr	r7, [pc, #12]	; (0x18)\n"
+        "   a:	6f96      	ldr	r6, [r2, #120]	; 0x78\n"
+        "   c:	463a      	mov	r2, r7\n"
+        "   e:	232a      	movs	r3, #42	; 0x2a\n"
+        "  10:	9f05      	ldr	r7, [sp, #20]\n"
+        "  12:	9605      	str	r6, [sp, #20]\n"
+        "  14:	46be      	mov	lr, r7\n"
+        "  16:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  18:	1388      	asrs	r0, r1, #14\n"
+        "  1a:	0000      	movs	r0, r0\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1e:	6d57      	ldr	r7, [r2, #84]	; 0x54\n"
+        "  20:	b405      	push	{r0, r2}\n"
+        "  22:	9902      	ldr	r1, [sp, #8]\n"
+        "  24:	47b8      	blx	r7\n"
+        "  26:	4607      	mov	r7, r0\n"
+        "  28:	bc05      	pop	{r0, r2}\n"
+        "  2a:	4287      	cmp	r7, r0\n"
+        "  2c:	d001      	beq.n	0x32\n"
+        "  2e:	4638      	mov	r0, r7\n"
+        "  30:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  32:	2784      	movs	r7, #132	; 0x84\n"
+        "  34:	59d7      	ldr	r7, [r2, r7]\n"
+        "  36:	b405      	push	{r0, r2}\n"
+        "  38:	2102      	movs	r1, #2\n"
+        "  3a:	47b8      	blx	r7\n"
+        "  3c:	4607      	mov	r7, r0\n"
+        "  3e:	bc05      	pop	{r0, r2}\n"
+        "  40:	2f00      	cmp	r7, #0\n"
+        "  42:	d105      	bne.n	0x50\n"
+        "  44:	6fd7      	ldr	r7, [r2, #124]	; 0x7c\n"
+        "  46:	222a      	movs	r2, #42	; 0x2a\n"
+        "  48:	9e05      	ldr	r6, [sp, #20]\n"
+        "  4a:	9705      	str	r7, [sp, #20]\n"
+        "  4c:	46b6      	mov	lr, r6\n"
+        "  4e:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT pattern that uses set_continuation_to_label
+wait_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:set_continuation_to_label(State2, Label),
+    State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+
+    Stream = ?BACKEND:stream(State4),
+    Dump = <<
+        "   0:	4b01      	ldr	r3, [pc, #4]	; (0x8)\n"
+        "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "   4:	449f      	add	pc, r3\n"
+        "   6:	46c0      	nop			; (mov r8, r8)\n"
+        "   8:	0000      	movs	r0, r0\n"
+        "   a:	0000      	movs	r0, r0\n"
+        "   c:	4b01      	ldr	r3, [pc, #4]	; (0x14)\n"
+        "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  10:	449f      	add	pc, r3\n"
+        "  12:	46c0      	nop			; (mov r8, r8)\n"
+        "  14:	0000      	movs	r0, r0\n"
+        "  16:	0000      	movs	r0, r0\n"
+        "  18:	4b01      	ldr	r3, [pc, #4]	; (0x20)\n"
+        "  1a:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1c:	449f      	add	pc, r3\n"
+        "  1e:	46c0      	nop			; (mov r8, r8)\n"
+        "  20:	0000      	movs	r0, r0\n"
+        "  22:	0000      	movs	r0, r0\n"
+        "  24:	4b01      	ldr	r3, [pc, #4]	; (0x2c)\n"
+        "  26:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  28:	449f      	add	pc, r3\n"
+        "  2a:	46c0      	nop			; (mov r8, r8)\n"
+        "  2c:	0000      	movs	r0, r0\n"
+        "  2e:	0000      	movs	r0, r0\n"
+        "  30:	4b01      	ldr	r3, [pc, #4]	; (0x38)\n"
+        "  32:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  34:	449f      	add	pc, r3\n"
+        "  36:	46c0      	nop			; (mov r8, r8)\n"
+        "  38:	0000      	movs	r0, r0\n"
+        "  3a:	0000      	movs	r0, r0\n"
+        "  3c:	4b01      	ldr	r3, [pc, #4]	; (0x44)\n"
+        "  3e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  40:	449f      	add	pc, r3\n"
+        "  42:	46c0      	nop			; (mov r8, r8)\n"
+        "  44:	0000      	movs	r0, r0\n"
+        "  46:	0000      	movs	r0, r0\n"
+        "  48:	a700      	add	r7, pc, #0	; (adr r7, 0x4c)\n"
+        "  4a:	2633      	movs	r6, #51	; 0x33\n"
+        "  4c:	4276      	negs	r6, r6\n"
+        "  4e:	19f6      	adds	r6, r6, r7\n"
+        "  50:	9f00      	ldr	r7, [sp, #0]\n"
+        "  52:	607e      	str	r6, [r7, #4]\n"
+        "  54:	6f57      	ldr	r7, [r2, #116]	; 0x74\n"
+        "  56:	9e05      	ldr	r6, [sp, #20]\n"
+        "  58:	9705      	str	r7, [sp, #20]\n"
+        "  5a:	46b6      	mov	lr, r6\n"
+        "  5c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test return_labels_and_lines/2 function
+return_labels_and_lines_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    % Test return_labels_and_lines with some sample labels and lines
+    State1 = ?BACKEND:add_label(State0, 2, 32),
+    State2 = ?BACKEND:add_label(State1, 1, 16),
+
+    % {Line, Offset} pairs
+    SortedLines = [{10, 16}, {20, 32}],
+
+    State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines),
+    Stream = ?BACKEND:stream(State3),
+
+    % Should have generated adr + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table
+    % adr = 4 bytes, pop = 2 bytes, labels table = 6*2 = 12 bytes, lines table = 6*2 = 12 bytes
+    % Total minimum: 30 bytes
+    ?assert(byte_size(Stream) >= 30),
+
+    % Expected: adr r0, <offset> + pop {r1,r4,r5,r6,r7,pc} + labels table + lines table
+    % The data tables start at offset 4, so adr should be adr r0, 4 not adr r0, 8
+    Dump = <<
+        "   0:	a000      	add	r0, pc, #0	; (adr r0, 0x4)\n"
+        "   2:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "   4:	0200      	lsls	r0, r0, #8\n"
+        "   6:	0100      	lsls	r0, r0, #4\n"
+        "   8:	0000      	movs	r0, r0\n"
+        "   a:	1000      	asrs	r0, r0, #32\n"
+        "   c:	0200      	lsls	r0, r0, #8\n"
+        "   e:	0000      	movs	r0, r0\n"
+        "  10:	2000      	movs	r0, #0\n"
+        "  12:	0200      	lsls	r0, r0, #8\n"
+        "  14:	0a00      	lsrs	r0, r0, #8\n"
+        "  16:	0000      	movs	r0, r0\n"
+        "  18:	1000      	asrs	r0, r0, #32\n"
+        "  1a:	1400      	asrs	r0, r0, #16\n"
+        "  1c:	0000      	movs	r0, r0\n"
+        "  1e:	2000      	movs	r0, #0"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test return_labels_and_lines/2 with unaligned offset
+return_labels_and_lines_unaligned_test() ->
+    % Create a new state with a 2-byte instruction already in the stream
+    % to simulate starting at an odd offset (offset 2 instead of 0)
+    PaddingInstruction = jit_armv6m_asm:bx(lr),
+    TempState = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    TempStream = jit_stream_binary:append(?BACKEND:stream(TempState), PaddingInstruction),
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, TempStream),
+
+    % Test return_labels_and_lines with some sample labels and lines
+    State1 = ?BACKEND:add_label(State0, 2, 32),
+    State2 = ?BACKEND:add_label(State1, 1, 16),
+
+    % {Line, Offset} pairs
+    SortedLines = [{10, 16}, {20, 32}],
+
+    State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines),
+    Stream = ?BACKEND:stream(State3),
+
+    Dump = <<
+        "   0:	4770      	bx	lr\n"
+        "2:	a001      	add	r0, pc, #4	; (adr r0, 0x8)\n"
+        "4:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "6:	0000      	movs	r0, r0\n"
+        "8:	0200      	lsls	r0, r0, #8\n"
+        "a:	0100      	lsls	r0, r0, #4\n"
+        "c:	0000      	movs	r0, r0\n"
+        "e:	1000      	asrs	r0, r0, #32\n"
+        "10:	0200      	lsls	r0, r0, #8\n"
+        "12:	0000      	movs	r0, r0\n"
+        "14:	2000      	movs	r0, #0\n"
+        "16:	0200      	lsls	r0, r0, #8\n"
+        "18:	0a00      	lsrs	r0, r0, #8\n"
+        "1a:	0000      	movs	r0, r0\n"
+        "1c:	1000      	asrs	r0, r0, #32\n"
+        "1e:	1400      	asrs	r0, r0, #16\n"
+        "20:	0000      	movs	r0, r0\n"
+        "22:	2000      	movs	r0, #0"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test call_primitive with {free, {x_reg, X}}
+gc_bif2_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]),
+    {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [
+        ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}}
+    ]),
+
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	6a17      	ldr	r7, [r2, #32]\n"
+        "   2:	b405      	push	{r0, r2}\n"
+        "   4:	9802      	ldr	r0, [sp, #8]\n"
+        "   6:	212a      	movs	r1, #42	; 0x2a\n"
+        "   8:	47b8      	blx	r7\n"
+        "   a:	4607      	mov	r7, r0\n"
+        "   c:	bc05      	pop	{r0, r2}\n"
+        "   e:	b405      	push	{r0, r2}\n"
+        "  10:	b082      	sub	sp, #8\n"
+        "  12:	6986      	ldr	r6, [r0, #24]\n"
+        "  14:	9600      	str	r6, [sp, #0]\n"
+        "  16:	2100      	movs	r1, #0\n"
+        "  18:	2203      	movs	r2, #3\n"
+        "  1a:	6946      	ldr	r6, [r0, #20]\n"
+        "  1c:	6833      	ldr	r3, [r6, #0]\n"
+        "  1e:	47b8      	blx	r7\n"
+        "  20:	4607      	mov	r7, r0\n"
+        "  22:	b002      	add	sp, #8\n"
+        "  24:	bc05      	pop	{r0, r2}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test case where parameter value is in r1
+memory_ensure_free_with_roots_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [
+        ctx, jit_state, {free, r1}, 4, 1
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    Dump = <<
+        "   0:	27b0      	movs	r7, #176	; 0xb0\n"
+        "   2:	59d7      	ldr	r7, [r2, r7]\n"
+        "   4:	b405      	push	{r0, r2}\n"
+        "   6:	b082      	sub	sp, #8\n"
+        "   8:	2601      	movs	r6, #1\n"
+        "   a:	9600      	str	r6, [sp, #0]\n"
+        "   c:	460e      	mov	r6, r1\n"
+        "   e:	9904      	ldr	r1, [sp, #16]\n"
+        "  10:	4632      	mov	r2, r6\n"
+        "  12:	2304      	movs	r3, #4\n"
+        "  14:	47b8      	blx	r7\n"
+        "  16:	4607      	mov	r7, r0\n"
+        "  18:	b002      	add	sp, #8\n"
+        "  1a:	bc05      	pop	{r0, r2}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]),
+    ?BACKEND:assert_all_native_free(State2),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:	9e00      	ldr	r6, [sp, #0]\n"
+        "   2:	68b7      	ldr	r7, [r6, #8]\n"
+        "   4:	3f01      	subs	r7, #1\n"
+        "   6:	60b7      	str	r7, [r6, #8]\n"
+        "   8:	d109      	bne.n	0x1e\n"
+        "   a:	a704      	add	r7, pc, #16	; (adr r7, 0x1c)\n"
+        "   c:	3701      	adds	r7, #1\n"
+        "   e:	6077      	str	r7, [r6, #4]\n"
+        "  10:	6897      	ldr	r7, [r2, #8]\n"
+        "  12:	9e05      	ldr	r6, [sp, #20]\n"
+        "  14:	9705      	str	r7, [sp, #20]\n"
+        "  16:	46b6      	mov	lr, r6\n"
+        "  18:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  1a:	46c0      	nop			; (mov r8, r8)\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1e:	9e00      	ldr	r6, [sp, #0]\n"
+        "  20:	6837      	ldr	r7, [r6, #0]\n"
+        "  22:	683f      	ldr	r7, [r7, #0]\n"
+        "  24:	063f      	lsls	r7, r7, #24\n"
+        "  26:	4e07      	ldr	r6, [pc, #28]	; (0x44)\n"
+        "  28:	4337      	orrs	r7, r6\n"
+        "  2a:	65c7      	str	r7, [r0, #92]	; 0x5c\n"
+        "  2c:	6917      	ldr	r7, [r2, #16]\n"
+        "  2e:	b082      	sub	sp, #8\n"
+        "  30:	2601      	movs	r6, #1\n"
+        "  32:	4276      	negs	r6, r6\n"
+        "  34:	9600      	str	r6, [sp, #0]\n"
+        "  36:	9902      	ldr	r1, [sp, #8]\n"
+        "  38:	2202      	movs	r2, #2\n"
+        "  3a:	2305      	movs	r3, #5\n"
+        "  3c:	47b8      	blx	r7\n"
+        "  3e:	b002      	add	sp, #8\n"
+        "  40:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  42:	0000      	movs	r0, r0\n"
+        "  44:	0120      	lsls	r0, r4, #4\n"
+        "  46:	0000      	movs	r0, r0\n"
+        "  48:	b5f2      	push	{r1, r4, r5, r6, r7, lr}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_fun_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    FuncReg = {x_reg, 0},
+    ArgsCount = 0,
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg),
+    {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg),
+    State4 = ?BACKEND:if_block(
+        State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK),
+    State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy),
+    State7 = ?BACKEND:if_block(
+        State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    State8 = ?BACKEND:free_native_registers(State7, [RegCopy]),
+    State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [
+        ctx, jit_state, Reg, ArgsCount
+    ]),
+    ?BACKEND:assert_all_native_free(State9),
+    Stream = ?BACKEND:stream(State9),
+    Dump = <<
+        "   0:	9e00      	ldr	r6, [sp, #0]\n"
+        "   2:	68b7      	ldr	r7, [r6, #8]\n"
+        "   4:	3f01      	subs	r7, #1\n"
+        "   6:	60b7      	str	r7, [r6, #8]\n"
+        "   8:	d109      	bne.n	0x1e\n"
+        "   a:	a704      	add	r7, pc, #16	; (adr r7, 0x1c)\n"
+        "   c:	3701      	adds	r7, #1\n"
+        "   e:	6077      	str	r7, [r6, #4]\n"
+        "  10:	6897      	ldr	r7, [r2, #8]\n"
+        "  12:	9e05      	ldr	r6, [sp, #20]\n"
+        "  14:	9705      	str	r7, [sp, #20]\n"
+        "  16:	46b6      	mov	lr, r6\n"
+        "  18:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  1a:	46c0      	nop			; (mov r8, r8)\n"
+        "  1c:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+        "  1e:	6987      	ldr	r7, [r0, #24]\n"
+        "  20:	463e      	mov	r6, r7\n"
+        "  22:	4635      	mov	r5, r6\n"
+        "  24:	2403      	movs	r4, #3\n"
+        "  26:	4025      	ands	r5, r4\n"
+        "  28:	2d02      	cmp	r5, #2\n"
+        "  2a:	d00b      	beq.n	0x44\n"
+        "  2c:	6cd7      	ldr	r7, [r2, #76]	; 0x4c\n"
+        "  2e:	b082      	sub	sp, #8\n"
+        "  30:	9600      	str	r6, [sp, #0]\n"
+        "  32:	9902      	ldr	r1, [sp, #8]\n"
+        "  34:	222e      	movs	r2, #46	; 0x2e\n"
+        "  36:	4b02      	ldr	r3, [pc, #8]	; (0x40)\n"
+        "  38:	47b8      	blx	r7\n"
+        "  3a:	b002      	add	sp, #8\n"
+        "  3c:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  3e:	0000      	movs	r0, r0\n"
+        "  40:	018b      	lsls	r3, r1, #6\n"
+        "  42:	0000      	movs	r0, r0\n"
+        "  44:	2503      	movs	r5, #3\n"
+        "  46:	43ae      	bics	r6, r5\n"
+        "  48:	6836      	ldr	r6, [r6, #0]\n"
+        "  4a:	4635      	mov	r5, r6\n"
+        "  4c:	243f      	movs	r4, #63	; 0x3f\n"
+        "  4e:	4025      	ands	r5, r4\n"
+        "  50:	2d14      	cmp	r5, #20\n"
+        "  52:	d00b      	beq.n	0x6c\n"
+        "  54:	6cd7      	ldr	r7, [r2, #76]	; 0x4c\n"
+        "  56:	b082      	sub	sp, #8\n"
+        "  58:	9600      	str	r6, [sp, #0]\n"
+        "  5a:	9902      	ldr	r1, [sp, #8]\n"
+        "  5c:	2256      	movs	r2, #86	; 0x56\n"
+        "  5e:	4b02      	ldr	r3, [pc, #8]	; (0x68)\n"
+        "  60:	47b8      	blx	r7\n"
+        "  62:	b002      	add	sp, #8\n"
+        "  64:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  66:	0000      	movs	r0, r0\n"
+        "  68:	018b      	lsls	r3, r1, #6\n"
+        "  6a:	0000      	movs	r0, r0\n"
+        "  6c:	9d00      	ldr	r5, [sp, #0]\n"
+        "  6e:	682e      	ldr	r6, [r5, #0]\n"
+        "  70:	6836      	ldr	r6, [r6, #0]\n"
+        "  72:	0636      	lsls	r6, r6, #24\n"
+        "  74:	4d05      	ldr	r5, [pc, #20]	; (0x8c)\n"
+        "  76:	432e      	orrs	r6, r5\n"
+        "  78:	65c6      	str	r6, [r0, #92]	; 0x5c\n"
+        "  7a:	2680      	movs	r6, #128	; 0x80\n"
+        "  7c:	5996      	ldr	r6, [r2, r6]\n"
+        "  7e:	463a      	mov	r2, r7\n"
+        "  80:	2300      	movs	r3, #0\n"
+        "  82:	9f05      	ldr	r7, [sp, #20]\n"
+        "  84:	9605      	str	r6, [sp, #20]\n"
+        "  86:	46be      	mov	lr, r7\n"
+        "  88:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        "  8a:	0000      	movs	r0, r0\n"
+        "  8c:	0240      	lsls	r0, r0, #9\n"
+        "  8e:	0000      	movs	r0, r0\n"
+        "  90:	b5f2      	push	{r1, r4, r5, r6, r7, lr}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test0(State, Source, Dest, Dump) ->
+    State1 = ?BACKEND:move_to_vm_register(State, Source, Dest),
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, 0}, <<
+                        "   0:	2700      	movs	r7, #0\n"
+                        "   2:	6187      	str	r7, [r0, #24]\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, extra}, <<
+                        "   0:	2700      	movs	r7, #0\n"
+                        "   2:	6587      	str	r7, [r0, #88]	; 0x58\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {ptr, r6}, <<
+                        "   0:	2700      	movs	r7, #0\n"
+                        "   2:	6037      	str	r7, [r6, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 2}, <<
+                        "   0:	2600      	movs	r6, #0\n"
+                        "   2:	6947      	ldr	r7, [r0, #20]\n"
+                        "   4:	60be      	str	r6, [r7, #8]\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 20}, <<
+                        "   0:	2600      	movs	r6, #0\n"
+                        "   2:	6947      	ldr	r7, [r0, #20]\n"
+                        "   4:	653e      	str	r6, [r7, #80]	; 0x50\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, 0}, <<
+                        "   0:	272a      	movs	r7, #42	; 0x2a\n"
+                        "   2:	6187      	str	r7, [r0, #24]\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, extra}, <<
+                        "   0:	272a      	movs	r7, #42	; 0x2a\n"
+                        "   2:	6587      	str	r7, [r0, #88]	; 0x58\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 2}, <<
+                        "   0:	262a      	movs	r6, #42	; 0x2a\n"
+                        "   2:	6947      	ldr	r7, [r0, #20]\n"
+                        "   4:	60be      	str	r6, [r7, #8]\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 20}, <<
+                        "   0:	262a      	movs	r6, #42	; 0x2a\n"
+                        "   2:	6947      	ldr	r7, [r0, #20]\n"
+                        "   4:	653e      	str	r6, [r7, #80]	; 0x50\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 99, {ptr, r3}, <<
+                        "   0:	2763      	movs	r7, #99	; 0x63\n"
+                        "   2:	601f      	str	r7, [r3, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: x_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, <<
+                        "   0:	69c7      	ldr	r7, [r0, #28]\n"
+                        "   2:	6207      	str	r7, [r0, #32]\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: x_reg to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, r1}, <<
+                        "   0:	69c7      	ldr	r7, [r0, #28]\n"
+                        "   2:	600f      	str	r7, [r1, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: ptr to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {ptr, r4}, {x_reg, 3}, <<
+                        "   0:	6827      	ldr	r7, [r4, #0]\n"
+                        "   2:	6247      	str	r7, [r0, #36]	; 0x24\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	6946      	ldr	r6, [r0, #20]\n"
+                        "   4:	6077      	str	r7, [r6, #4]\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, <<
+                        "   0:	6946      	ldr	r6, [r0, #20]\n"
+                        "   2:	6837      	ldr	r7, [r6, #0]\n"
+                        "   4:	6247      	str	r7, [r0, #36]	; 0x24\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: y_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, <<
+                        "   0:	6946      	ldr	r6, [r0, #20]\n"
+                        "   2:	6877      	ldr	r7, [r6, #4]\n"
+                        "   4:	6247      	str	r7, [r0, #36]	; 0x24\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Native register to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r5, {x_reg, 0}, <<
+                        "   0:	6185      	str	r5, [r0, #24]\n"
+                        "   2:	e07d      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r6, {x_reg, extra}, <<
+                        "   0:	6586      	str	r6, [r0, #88]	; 0x58\n"
+                        "   2:	e07d      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Native register to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r4, {ptr, r3}, <<
+                        "   0:	601c      	str	r4, [r3, #0]\n"
+                        "   2:	e07d      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Native register to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, r1, {y_reg, 0}, <<
+                        "   0:	6947      	ldr	r7, [r0, #20]\n"
+                        "   2:	6039      	str	r1, [r7, #0]\n"
+                        "   4:	e07c      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Large immediate to x_reg (32-bit literal pool, aligned case)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, <<
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6187      	str	r7, [r0, #24]\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
+                    >>)
+                end),
+                %% Test: Large immediate to x_reg (32-bit literal pool, unaligned case)
+                ?_test(begin
+                    %% First do a 2-byte instruction to create unaligned start
+                    State1 = ?BACKEND:move_to_vm_register(State0, r1, {ptr, r3}),
+                    %% Then do large immediate which should handle unaligned case
+                    State2 = ?BACKEND:move_to_vm_register(State1, 16#12345678, {x_reg, 0}),
+                    State3 = ?BACKEND:jump_to_offset(State2, 16#100),
+                    Stream = ?BACKEND:stream(State3),
+                    Expected = dump_to_bin(<<
+                        "   0:	6019      	str	r1, [r3, #0]\n"
+                        "   2:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   4:	6187      	str	r7, [r0, #24]\n"
+                        "   6:	e07b      	b.n	0x100\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
+                    >>),
+                    ?assertEqual(Expected, Stream)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, <<
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6587      	str	r7, [r0, #88]	; 0x58\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, <<
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6946      	ldr	r6, [r0, #20]\n"
+                        "   4:	60b7      	str	r7, [r6, #8]\n"
+                        "   6:	e07b      	b.n	0x100\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, <<
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	6946      	ldr	r6, [r0, #20]\n"
+                        "   4:	6537      	str	r7, [r6, #80]	; 0x50\n"
+                        "   6:	e07b      	b.n	0x100\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
+                    >>)
+                end),
+                %% Test: Large immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {ptr, r3}, <<
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	601f      	str	r7, [r3, #0]\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	5678      	ldrsb	r0, [r7, r1]\n"
+                        "   a:	1234      	asrs	r4, r6, #8"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, <<
+                        "   0:	6d47      	ldr	r7, [r0, #84]	; 0x54\n"
+                        "   2:	6946      	ldr	r6, [r0, #20]\n"
+                        "   4:	67f7      	str	r7, [r6, #124]	; 0x7c\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, <<
+                        "   0:	6946      	ldr	r6, [r0, #20]\n"
+                        "   2:	6ff7      	ldr	r7, [r6, #124]	; 0x7c\n"
+                        "   4:	6547      	str	r7, [r0, #84]	; 0x54\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Large y_reg index (32) that exceeds str immediate offset limit
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 32}, <<
+                        "   0:	262a      	movs	r6, #42	; 0x2a\n"
+                        "   2:	6947      	ldr	r7, [r0, #20]\n"
+                        "   4:	2580      	movs	r5, #128	; 0x80\n"
+                        "   6:	443d      	add	r5, r7\n"
+                        "   8:	602e      	str	r6, [r5, #0]\n"
+                        "   a:	e079      	b.n	0x100"
+                    >>)
+                end),
+                %% Test: Negative immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -1, {x_reg, 0}, <<
+                        "   0:	2701      	movs	r7, #1\n"
+                        "   2:	427f      	negs	r7, r7\n"
+                        "   4:	6187      	str	r7, [r0, #24]\n"
+                        "   6:	e07b      	b.n	0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+move_array_element_test0(State, Reg, Index, Dest, Dump) ->
+    State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 2, {x_reg, 0}, <<
+                        "   0:	689f      	ldr	r7, [r3, #8]\n"
+                        "   2:	6187      	str	r7, [r0, #24]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to ptr
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 3, {ptr, r5}, <<
+                        "   0:	68df      	ldr	r7, [r3, #12]\n"
+                        "   2:	602f      	str	r7, [r5, #0]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 1, {y_reg, 2}, <<
+                        "   0:	685e      	ldr	r6, [r3, #4]\n"
+                        "   2:	6947      	ldr	r7, [r0, #20]\n"
+                        "   4:	60be      	str	r6, [r7, #8]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to native reg (r5)
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 1, r5, <<
+                        "   0:	685d      	ldr	r5, [r3, #4]"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 7, {y_reg, 31}, <<
+                        "   0:	69de      	ldr	r6, [r3, #28]\n"
+                        "   2:	6947      	ldr	r7, [r0, #20]\n"
+                        "   4:	67fe      	str	r6, [r7, #124]	; 0x7c"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, r3, 7, {x_reg, 15}, <<
+                        "   0:	69df      	ldr	r7, [r3, #28]\n"
+                        "   2:	6547      	str	r7, [r0, #84]	; 0x54"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to x_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4),
+                    move_array_element_test0(State1, r3, {free, Reg}, {x_reg, 2}, <<
+                        "   0:	691f      	ldr	r7, [r3, #16]\n"
+                        "   2:	00bf      	lsls	r7, r7, #2\n"
+                        "   4:	59df      	ldr	r7, [r3, r7]\n"
+                        "   6:	6207      	str	r7, [r0, #32]"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to pointer (large x reg)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4),
+                    move_array_element_test0(State1, r3, {free, Reg}, {ptr, r5}, <<
+                        "   0:	691f      	ldr	r7, [r3, #16]\n"
+                        "   2:	00bf      	lsls	r7, r7, #2\n"
+                        "   4:	59df      	ldr	r7, [r3, r7]\n"
+                        "   6:	602f      	str	r7, [r5, #0]"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to y_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r3, 4),
+                    move_array_element_test0(State1, r3, {free, Reg}, {y_reg, 31}, <<
+                        "   0:	691f      	ldr	r7, [r3, #16]\n"
+                        "   2:	00bf      	lsls	r7, r7, #2\n"
+                        "   4:	59df      	ldr	r7, [r3, r7]\n"
+                        "   6:	6946      	ldr	r6, [r0, #20]\n"
+                        "   8:	67f7      	str	r7, [r6, #124]	; 0x7c"
+                    >>)
+                end),
+                %% move_array_element with integer index and x_reg destination
+                ?_test(begin
+                    {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+                    move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	68be      	ldr	r6, [r7, #8]\n"
+                        "   4:	62c6      	str	r6, [r0, #44]	; 0x2c"
+                    >>)
+                end)
+            ]
+        end}.
+
+get_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% get_array_element: reg[x] to new native reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, r4, 4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6927      	ldr	r7, [r4, #16]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(r7, Reg)
+                end)
+            ]
+        end}.
+
+move_to_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_array_element/4: x_reg to reg[x]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	609f      	str	r7, [r3, #8]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: x_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, r4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	4626      	mov	r6, r4\n"
+                        "   4:	00b6      	lsls	r6, r6, #2\n"
+                        "   6:	519f      	str	r7, [r3, r6]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: ptr to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {ptr, r7}, r3, r4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	683f      	ldr	r7, [r7, #0]\n"
+                        "   2:	4626      	mov	r6, r4\n"
+                        "   4:	00b6      	lsls	r6, r6, #2\n"
+                        "   6:	519f      	str	r7, [r3, r6]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: y_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, r3, r4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6946      	ldr	r6, [r0, #20]\n"
+                        "   2:	68b7      	ldr	r7, [r6, #8]\n"
+                        "   4:	4626      	mov	r6, r4\n"
+                        "   6:	00b6      	lsls	r6, r6, #2\n"
+                        "   8:	519f      	str	r7, [r3, r6]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, r3, 2, 1),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	609f      	str	r7, [r3, #8]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]),
+                    State2 = setelement(7, State1, [r3, r4]),
+                    [r3, r4] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, r3, r4, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	1c66      	adds	r6, r4, #1\n"
+                        "   4:	00b6      	lsls	r6, r6, #2\n"
+                        "   6:	519f      	str	r7, [r3, r6]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: imm to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [r3, r4]),
+                    State2 = setelement(7, State1, [r3, r4]),
+                    [r3, r4] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, 42, r3, r4, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "   0:	272a      	movs	r7, #42	; 0x2a\n"
+                        "   2:	1c66      	adds	r6, r4, #1\n"
+                        "   4:	00b6      	lsls	r6, r6, #2\n"
+                        "   6:	519f      	str	r7, [r3, r6]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_native_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_native_register/2: imm
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	272a      	movs	r7, #42	; 0x2a"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: negative value
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	272a      	movs	r7, #42	; 0x2a\n"
+                        "   2:	427f      	negs	r7, r7"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: -255 (boundary case)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	27ff      	movs	r7, #255	; 0xff\n"
+                        "   2:	427f      	negs	r7, r7"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: -256 (boundary case, should use literal pool)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	4f00      	ldr	r7, [pc, #0]	; (0x4)\n"
+                        "   2:	e07d      	b.n	0x100\n"
+                        "   4:	ff00 ffff 	vmaxnm.f32	<illegal reg q7.5>, q8, <illegal reg q15.5>"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {ptr, reg}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, r6}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r6, Reg),
+                    Dump = <<
+                        "   0:	6836      	ldr	r6, [r6, #0]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {x_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 3}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	6a47      	ldr	r7, [r0, #36]	; 0x24"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {y_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(r7, Reg),
+                    Dump = <<
+                        "   0:	6946      	ldr	r6, [r0, #20]\n"
+                        "   2:	68f7      	ldr	r7, [r6, #12]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: imm to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, 42, r6),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	262a      	movs	r6, #42	; 0x2a"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: reg to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, r7, r5),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	463d      	mov	r5, r7"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {ptr, reg} to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {ptr, r7}, r4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	683c      	ldr	r4, [r7, #0]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {x_reg, x} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, r3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6a03      	ldr	r3, [r0, #32]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {y_reg, y} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, r1),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:	6947      	ldr	r7, [r0, #20]\n"
+                        "   2:	68b9      	ldr	r1, [r7, #8]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% Test: ptr with offset to fp_reg (term_to_float)
+                ?_test(begin
+                    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+                    State2 = ?BACKEND:move_to_vm_register(
+                        State1, {free, {ptr, RegA, 1}}, {fp_reg, 3}
+                    ),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:	6987      	ldr	r7, [r0, #24]\n"
+                        "   2:	6e06      	ldr	r6, [r0, #96]	; 0x60\n"
+                        "   4:	687d      	ldr	r5, [r7, #4]\n"
+                        "   6:	61b5      	str	r5, [r6, #24]\n"
+                        "   8:	68bd      	ldr	r5, [r7, #8]\n"
+                        "   a:	61f5      	str	r5, [r6, #28]"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+add_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:add(State0, Reg, Imm),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+add_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    add_test0(State0, r2, 2, <<
+                        "   0:	3202      	adds	r2, #2\n"
+                        "   2:	e07d      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, r2, 256, <<
+                        "   0:	4f01      	ldr	r7, [pc, #4]	; (0x8)\n"
+                        "   2:	19d2      	adds	r2, r2, r7\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	0100      	lsls	r0, r0, #4\n"
+                        "   a:	0000      	movs	r0, r0"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, r2, r3, <<
+                        "   0:	18d2      	adds	r2, r2, r3\n"
+                        "   2:	e07d      	b.n	0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+sub_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:sub(State0, Reg, Imm),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+sub_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    sub_test0(State0, r2, 2, <<
+                        "   0:	3a02      	subs	r2, #2\n"
+                        "   2:	e07d      	b.n	0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, r2, 256, <<
+                        "   0:	4f01      	ldr	r7, [pc, #4]	@ (0xc)\n"
+                        "   2:	1bd2      	subs	r2, r2, r7\n"
+                        "   4:	e07c      	b.n	0x100\n"
+                        "   6:	0000      	movs	r0, r0\n"
+                        "   8:	0100      	lsls	r0, r0, #4\n"
+                        "   a:	0000      	movs	r0, r0"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, r2, r3, <<
+                        "   0:	1ad2      	subs	r2, r2, r3\n"
+                        "   2:	e07d      	b.n	0x110"
+                    >>)
+                end)
+            ]
+        end}.
+
+mul_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:mul(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+mul_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    mul_test0(State0, r2, 2, <<
+                        "   0:	0052      	lsls	r2, r2, #1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 3, <<
+                        "   0:	0057      	lsls	r7, r2, #1\n"
+                        "   2:	18ba      	adds	r2, r7, r2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 4, <<
+                        "   0:	0092      	lsls	r2, r2, #2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 5, <<
+                        "   0:	0097      	lsls	r7, r2, #2\n"
+                        "   2:	18ba      	adds	r2, r7, r2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 6, <<
+                        "   0:	0057      	lsls	r7, r2, #1\n"
+                        "   2:	18ba      	adds	r2, r7, r2\n"
+                        "   4:	0052      	lsls	r2, r2, #1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 7, <<
+                        "   0:	00d7      	lsls	r7, r2, #3\n"
+                        "   2:	1aba      	subs	r2, r7, r2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 8, <<
+                        "   0:	00d2      	lsls	r2, r2, #3"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 9, <<
+                        "   0:	00d7      	lsls	r7, r2, #3\n"
+                        "   2:	18ba      	adds	r2, r7, r2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 10, <<
+                        "   0:	0097      	lsls	r7, r2, #2\n"
+                        "   2:	18ba      	adds	r2, r7, r2\n"
+                        "   4:	0052      	lsls	r2, r2, #1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, r2, 11, <<
+                        "   0:	270b      	movs	r7, #11\n"
+                        "   2:	437a      	muls	r2, r7"
+                    >>)
+                end)
+            ]
+        end}.
+
+%% Test set_args1 with y_reg pattern
+set_args1_y_reg_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1
+    % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}])
+    % but with {y_reg, 5} instead of {free, Src}
+    {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [
+        {y_reg, 5}
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    % Expected disassembly for loading from y_reg and calling primitive
+    Dump = <<
+        "   0:	2743      	movs	r7, #67	; 0x43\n"
+        "   2:	00bf      	lsls	r7, r7, #2\n"
+        "   4:	59d7      	ldr	r7, [r2, r7]\n"
+        "   6:	b405      	push	{r0, r2}\n"
+        "   8:	6946      	ldr	r6, [r0, #20]\n"
+        "   a:	6970      	ldr	r0, [r6, #20]\n"
+        "   c:	47b8      	blx	r7\n"
+        "   e:	4607      	mov	r7, r0\n"
+        "  10:	bc05      	pop	{r0, r2}"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test large Y register read (Y=32, offset=128, exceeds 124-byte limit)
+large_y_reg_read_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Move from a large Y register (32 * 4 = 128 bytes, exceeds 124-byte immediate limit)
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 32}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses helper with temp register since offset 128 > 124
+    Dump = <<
+        "   0:	6946      	ldr	r6, [r0, #20]\n"
+        "   2:	2780      	movs	r7, #128	; 0x80\n"
+        "   4:	4437      	add	r7, r6\n"
+        "   6:	683f      	ldr	r7, [r7, #0]"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(r7, Reg).
+
+%% Test large Y register write with available temp registers
+large_y_reg_write_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get a native register first
+    {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    % Move to a large Y register (40 * 4 = 160 bytes)
+    State2 = ?BACKEND:move_to_vm_register(State1, SrcReg, {y_reg, 40}),
+    Stream = ?BACKEND:stream(State2),
+    % Expected: uses helper with two temp registers since we have registers available
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	6946      	ldr	r6, [r0, #20]\n"
+        "   4:	25a0      	movs	r5, #160	; 0xa0\n"
+        "   6:	4435      	add	r5, r6\n"
+        "   8:	602f      	str	r7, [r5, #0]"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test large Y register read with limited registers (uses IP_REG fallback)
+large_y_reg_read_register_exhaustion_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper)
+    {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback
+    {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}),
+    Stream = ?BACKEND:stream(StateFinal),
+    % Expected: uses IP_REG (r12) fallback sequence
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+        "   4:	6a05      	ldr	r5, [r0, #32]\n"
+        "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+        "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+        "   a:	6941      	ldr	r1, [r0, #20]\n"
+        "   c:	468c      	mov	ip, r1\n"
+        "   e:	218c      	movs	r1, #140	; 0x8c\n"
+        "  10:	4461      	add	r1, ip\n"
+        "  12:	6809      	ldr	r1, [r1, #0]"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(r1, ResultReg).
+
+%% Test large Y register write with register exhaustion (uses IP_REG fallback)
+large_y_reg_write_register_exhaustion_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get a source register first
+    {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    % Allocate most remaining registers to simulate exhaustion
+    {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    % Try to write to large Y register when only one temp register is available
+    StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}),
+    Stream = ?BACKEND:stream(StateFinal),
+    % Expected: uses IP_REG (r12) fallback sequence
+    Dump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+        "   4:	6a05      	ldr	r5, [r0, #32]\n"
+        "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+        "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+        "   a:	6941      	ldr	r1, [r0, #20]\n"
+        "   c:	468c      	mov	ip, r1\n"
+        "   e:	21c8      	movs	r1, #200	; 0xc8\n"
+        "  10:	4461      	add	r1, ip\n"
+        "  12:	600f      	str	r7, [r1, #0]"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing)
+y_reg_boundary_direct_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses direct addressing since 31 * 4 = 124 <= 124
+    Dump = <<
+        "   0:	6946      	ldr	r6, [r0, #20]\n"
+        "   2:	6ff7      	ldr	r7, [r6, #124]	; 0x7c"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(r7, Reg).
+
+%% Test debugger function
+debugger_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:debugger(State0),
+    Stream = ?BACKEND:stream(State1),
+    Dump = <<
+        "   0:	be00      	bkpt	0x0000"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+and_register_exhaustion_negative_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate all available registers to simulate register exhaustion
+    {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+    % Test negative immediate (-4) which should use BICS with r0 as temp
+    StateResult = ?BACKEND:and_(StateNoRegs, r7, -4),
+    Stream = ?BACKEND:stream(StateResult),
+    ExpectedDump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+        "   4:	6a05      	ldr	r5, [r0, #32]\n"
+        "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+        "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+        "   a:	6ac1      	ldr	r1, [r0, #44]	; 0x2c\n"
+        "   c:	4684      	mov	ip, r0\n"
+        "   e:	2003      	movs	r0, #3\n"
+        "  10:	4387      	bics	r7, r0\n"
+        "  12:	4660      	mov	r0, ip"
+    >>,
+    ?assertEqual(dump_to_bin(ExpectedDump), Stream).
+
+and_register_exhaustion_positive_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate all available registers to simulate register exhaustion
+    {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    {StateNoRegs, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+    % Test positive immediate (0x3F) which should use ANDS with r0 as temp
+    StateResult = ?BACKEND:and_(StateNoRegs, r7, 16#3F),
+    Stream = ?BACKEND:stream(StateResult),
+    ExpectedDump = <<
+        "   0:	6987      	ldr	r7, [r0, #24]\n"
+        "   2:	69c6      	ldr	r6, [r0, #28]\n"
+        "   4:	6a05      	ldr	r5, [r0, #32]\n"
+        "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+        "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+        "   a:	6ac1      	ldr	r1, [r0, #44]	; 0x2c\n"
+        "   c:	4684      	mov	ip, r0\n"
+        "   e:	203f      	movs	r0, #63	; 0x3f\n"
+        "  10:	4007      	ands	r7, r0\n"
+        "  12:	4660      	mov	r0, ip"
+    >>,
+    ?assertEqual(dump_to_bin(ExpectedDump), Stream).
+
+jump_table_large_labels_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 512),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual((512 + 1) * 12, byte_size(Stream)).
+
+alloc_boxed_integer_fragment_small_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [
+        ctx, {avm_int64_t, 42}
+    ]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:	6bd7      	ldr	r7, [r2, #60]	; 0x3c\n"
+            "   2:	b405      	push	{r0, r2}\n"
+            "   4:	222a      	movs	r2, #42	; 0x2a\n"
+            "   6:	2300      	movs	r3, #0\n"
+            "   8:	47b8      	blx	r7\n"
+            "   a:	4607      	mov	r7, r0\n"
+            "   c:	bc05      	pop	{r0, r2}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+alloc_boxed_integer_fragment_large_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [
+        ctx, {avm_int64_t, 16#123456789ABCDEF0}
+    ]),
+    % Add a call primitive last to emit literal pool
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg}
+    ]),
+    ?assertEqual(r7, ResultReg),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:	6bd7      	ldr	r7, [r2, #60]	@ 0x3c\n"
+            "   2:	b405      	push	{r0, r2}\n"
+            "   4:	4a06      	ldr	r2, [pc, #24]	@ (0x20)\n"
+            "   6:	4b07      	ldr	r3, [pc, #28]	@ (0x24)\n"
+            "   8:	47b8      	blx	r7\n"
+            "   a:	4607      	mov	r7, r0\n"
+            "   c:	bc05      	pop	{r0, r2}\n"
+            "   e:	6cd6      	ldr	r6, [r2, #76]	@ 0x4c\n"
+            "  10:	b082      	sub	sp, #8\n"
+            "  12:	9700      	str	r7, [sp, #0]\n"
+            "  14:	9902      	ldr	r1, [sp, #8]\n"
+            "  16:	2210      	movs	r2, #16\n"
+            "  18:	4b03      	ldr	r3, [pc, #12]	@ (0x28)\n"
+            "  1a:	47b0      	blx	r6\n"
+            "  1c:	b002      	add	sp, #8\n"
+            "  1e:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  20:	def0      	udf	#240	@ 0xf0\n"
+            "  22:	9abc      	ldr	r2, [sp, #752]	@ 0x2f0\n"
+            "  24:	5678      	ldrsb	r0, [r7, r1]\n"
+            "  26:	1234      	asrs	r4, r6, #8\n"
+            "  28:	028b      	lsls	r3, r1, #10\n"
+            "  2a:	0000      	movs	r0, r0"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test for stack alignment issue in call_func_ptr
+%% When we have an odd number of saved registers, the stack becomes misaligned
+%% before the function call, violating ARM AAPCS which requires 8-byte alignment
+call_func_ptr_stack_alignment_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, r4} = ?BACKEND:call_func_ptr(State3, {free, r3}, [42]),
+    Stream = ?BACKEND:stream(State4),
+    Dump =
+        <<
+            "   0:	6987      	ldr	r7, [r0, #24]\n"
+            "   2:	69c6      	ldr	r6, [r0, #28]\n"
+            "   4:	6a05      	ldr	r5, [r0, #32]\n"
+            "   6:	b4ed      	push	{r0, r2, r3, r5, r6, r7}\n"
+            "   8:	202a      	movs	r0, #42	; 0x2a\n"
+            "   a:	4798      	blx	r3\n"
+            "   c:	4604      	mov	r4, r0\n"
+            "   e:	bced      	pop	{r0, r2, r3, r5, r6, r7}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test for register exhaustion issue in call_func_ptr with 5+ arguments
+%% When all registers are used and we call a function with 5+ args,
+%% set_args needs temporary registers but none are available
+call_func_ptr_register_exhaustion_test_() ->
+    {setup,
+        fun() ->
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+            % Allocate all available registers to simulate register pressure
+            {State1, r7} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, r6} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+            {State3, r5} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+            {State4, r4} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+            {State5, r3} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+            {State6, r1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+            State6
+        end,
+        fun(State6) ->
+            [
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, r6},
+                        [ctx, jit_state, {free, r3}, 3, 1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:	6987      	ldr	r7, [r0, #24]\n"
+                            "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                            "   4:	6a05      	ldr	r5, [r0, #32]\n"
+                            "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+                            "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+                            "   a:	6ac1      	ldr	r1, [r0, #44]	; 0x2c\n"
+                            "   c:	b4b7      	push	{r0, r1, r2, r4, r5, r7}\n"
+                            "   e:	b082      	sub	sp, #8\n"
+                            "  10:	2101      	movs	r1, #1\n"
+                            "  12:	9100      	str	r1, [sp, #0]\n"
+                            "  14:	9908      	ldr	r1, [sp, #32]\n"
+                            "  16:	461a      	mov	r2, r3\n"
+                            "  18:	2303      	movs	r3, #3\n"
+                            "  1a:	47b0      	blx	r6\n"
+                            "  1c:	4606      	mov	r6, r0\n"
+                            "  1e:	b002      	add	sp, #8\n"
+                            "  20:	bcb7      	pop	{r0, r1, r2, r4, r5, r7}"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, r6},
+                        [ctx, jit_state, {free, r3}, 1, r1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:	6987      	ldr	r7, [r0, #24]\n"
+                            "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                            "   4:	6a05      	ldr	r5, [r0, #32]\n"
+                            "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+                            "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+                            "   a:	6ac1      	ldr	r1, [r0, #44]	; 0x2c\n"
+                            "   c:	b4b7      	push	{r0, r1, r2, r4, r5, r7}\n"
+                            "   e:	b082      	sub	sp, #8\n"
+                            "  10:	9100      	str	r1, [sp, #0]\n"
+                            "  12:	9908      	ldr	r1, [sp, #32]\n"
+                            "  14:	461a      	mov	r2, r3\n"
+                            "  16:	2301      	movs	r3, #1\n"
+                            "  18:	47b0      	blx	r6\n"
+                            "  1a:	4606      	mov	r6, r0\n"
+                            "  1c:	b002      	add	sp, #8\n"
+                            "  1e:	bcb7      	pop	{r0, r1, r2, r4, r5, r7}"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, r6},
+                        [ctx, jit_state, {free, r3}, r1, 1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:	6987      	ldr	r7, [r0, #24]\n"
+                            "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                            "   4:	6a05      	ldr	r5, [r0, #32]\n"
+                            "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+                            "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+                            "   a:	6ac1      	ldr	r1, [r0, #44]	; 0x2c\n"
+                            "   c:	b4b7      	push	{r0, r1, r2, r4, r5, r7}\n"
+                            "   e:	b082      	sub	sp, #8\n"
+                            "  10:	2401      	movs	r4, #1\n"
+                            "  12:	9400      	str	r4, [sp, #0]\n"
+                            "  14:	460f      	mov	r7, r1\n"
+                            "  16:	9908      	ldr	r1, [sp, #32]\n"
+                            "  18:	461a      	mov	r2, r3\n"
+                            "  1a:	463b      	mov	r3, r7\n"
+                            "  1c:	47b0      	blx	r6\n"
+                            "  1e:	4606      	mov	r6, r0\n"
+                            "  20:	b002      	add	sp, #8\n"
+                            "  22:	bcb7      	pop	{r0, r1, r2, r4, r5, r7}"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(r6, ResultReg)
+                end),
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, r1},
+                        [r6, r3]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:	6987      	ldr	r7, [r0, #24]\n"
+                            "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                            "   4:	6a05      	ldr	r5, [r0, #32]\n"
+                            "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+                            "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+                            "   a:	6ac1      	ldr	r1, [r0, #44]	; 0x2c\n"
+                            "   c:	b4ff      	push	{r0, r1, r2, r3, r4, r5, r6, r7}\n"
+                            "   e:	460c      	mov	r4, r1\n"
+                            "  10:	4630      	mov	r0, r6\n"
+                            "  12:	4619      	mov	r1, r3\n"
+                            "  14:	47a0      	blx	r4\n"
+                            "  16:	9001      	str	r0, [sp, #4]\n"
+                            "  18:	bcff      	pop	{r0, r1, r2, r3, r4, r5, r6, r7}"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {primitive, 2},
+                        [{free, r6}, r3]
+                    ),
+                    ?assertEqual(ResultReg, r6),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:	6987      	ldr	r7, [r0, #24]\n"
+                            "   2:	69c6      	ldr	r6, [r0, #28]\n"
+                            "   4:	6a05      	ldr	r5, [r0, #32]\n"
+                            "   6:	6a44      	ldr	r4, [r0, #36]	; 0x24\n"
+                            "   8:	6a83      	ldr	r3, [r0, #40]	; 0x28\n"
+                            "   a:	6ac1      	ldr	r1, [r0, #44]	; 0x2c\n"
+                            "   c:	b4ff      	push	{r0, r1, r2, r3, r4, r5, r6, r7}\n"
+                            "   e:	6894      	ldr	r4, [r2, #8]\n"
+                            "  10:	4630      	mov	r0, r6\n"
+                            "  12:	4619      	mov	r1, r3\n"
+                            "  14:	47a0      	blx	r4\n"
+                            "  16:	9006      	str	r0, [sp, #24]\n"
+                            "  18:	bcff      	pop	{r0, r1, r2, r3, r4, r5, r6, r7}"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+%% Test jump_to_continuation optimization for intra-module returns
+jump_to_continuation_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_to_continuation(State0, {free, r0}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: armv6m PIC sequence with function epilogue pattern
+    % Based on actual generated output
+    Dump =
+        <<
+            "   0:	a700      	add	r7, pc, #0	; (adr r7, 0x4)\n"
+            "   2:	19c0      	adds	r0, r0, r7\n"
+            "   4:	2703      	movs	r7, #3\n"
+            "   6:	427f      	negs	r7, r7\n"
+            "   8:	19c0      	adds	r0, r0, r7\n"
+            "   a:	9f05      	ldr	r7, [sp, #20]\n"
+            "   c:	9005      	str	r0, [sp, #20]\n"
+            "   e:	46be      	mov	lr, r7\n"
+            "  10:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Mimic part of add.beam
+add_beam_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 3),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}),
+    State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}),
+    State5 = ?BACKEND:call_only_or_schedule_next(State4, 2),
+    State6 = ?BACKEND:add_label(State5, 2),
+    {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [
+        ctx, jit_state, 1, 0, 1
+    ]),
+    State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) ->
+        ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset])
+    end),
+    State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}),
+    State10 = ?BACKEND:call_or_schedule_next(State9, 3),
+    State11 = ?BACKEND:add_label(State10, 3),
+    State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [
+        ctx, jit_state
+    ]),
+    % OP_INT_CALL_END
+    State13 = ?BACKEND:add_label(State12, 0),
+    State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]),
+    State15 = ?BACKEND:update_branches(State14),
+    Stream = ?BACKEND:stream(State15),
+    Dump =
+        <<
+            % jump table
+            "   0:	4b01      	ldr	r3, [pc, #4]	; (0x8)\n"
+            "   2:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "   4:	449f      	add	pc, r3\n"
+            "   6:	46c0      	nop			; (mov r8, r8)\n"
+            "   8:	00d8      	lsls	r0, r3, #3\n"
+            "   a:	0000      	movs	r0, r0\n"
+            "   c:	4b01      	ldr	r3, [pc, #4]	; (0x14)\n"
+            "   e:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  10:	449f      	add	pc, r3\n"
+            "  12:	46c0      	nop			; (mov r8, r8)\n"
+            "  14:	001c      	movs	r4, r3\n"
+            "  16:	0000      	movs	r0, r0\n"
+            "  18:	4b01      	ldr	r3, [pc, #4]	; (0x20)\n"
+            "  1a:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  1c:	449f      	add	pc, r3\n"
+            "  1e:	46c0      	nop			; (mov r8, r8)\n"
+            "  20:	0044      	lsls	r4, r0, #1\n"
+            "  22:	0000      	movs	r0, r0\n"
+            "  24:	4b01      	ldr	r3, [pc, #4]	; (0x2c)\n"
+            "  26:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  28:	449f      	add	pc, r3\n"
+            "  2a:	46c0      	nop			; (mov r8, r8)\n"
+            "  2c:	00a8      	lsls	r0, r5, #2\n"
+            "  2e:	0000      	movs	r0, r0\n"
+            % label 1
+            % {move,{integer,9},{x,1}}.
+            "  30:	279f      	movs	r7, #159	; 0x9f\n"
+            "  32:	61c7      	str	r7, [r0, #28]\n"
+            % {move,{integer,8},{x,0}}
+            "  34:	278f      	movs	r7, #143	; 0x8f\n"
+            "  36:	6187      	str	r7, [r0, #24]\n"
+            % {call_only,2,{f,2}}.
+            "  38:	9e00      	ldr	r6, [sp, #0]\n"
+            "  3a:	68b7      	ldr	r7, [r6, #8]\n"
+            "  3c:	3f01      	subs	r7, #1\n"
+            "  3e:	60b7      	str	r7, [r6, #8]\n"
+            "  40:	d004      	beq.n	0x4c\n"
+            "  42:	e00f      	b.n	0x64\n"
+            "  44:	46c0      	nop			; (mov r8, r8)\n"
+            "  46:	46c0      	nop			; (mov r8, r8)\n"
+            "  48:	46c0      	nop			; (mov r8, r8)\n"
+            "  4a:	46c0      	nop			; (mov r8, r8)\n"
+            "  4c:	a700      	add	r7, pc, #0	; (adr r7, 0x50)\n"
+            "  4e:	2637      	movs	r6, #55	; 0x37\n"
+            "  50:	4276      	negs	r6, r6\n"
+            "  52:	19f6      	adds	r6, r6, r7\n"
+            "  54:	9f00      	ldr	r7, [sp, #0]\n"
+            "  56:	607e      	str	r6, [r7, #4]\n"
+            "  58:	6897      	ldr	r7, [r2, #8]\n"
+            "  5a:	9e05      	ldr	r6, [sp, #20]\n"
+            "  5c:	9705      	str	r7, [sp, #20]\n"
+            "  5e:	46b6      	mov	lr, r6\n"
+            "  60:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  62:	46c0      	nop			; (mov r8, r8)\n"
+            % label 2
+            % {allocate,1,1}.
+            "  64:	6957      	ldr	r7, [r2, #20]\n"
+            "  66:	b405      	push	{r0, r2}\n"
+            "  68:	b082      	sub	sp, #8\n"
+            "  6a:	2601      	movs	r6, #1\n"
+            "  6c:	9600      	str	r6, [sp, #0]\n"
+            "  6e:	9904      	ldr	r1, [sp, #16]\n"
+            "  70:	2201      	movs	r2, #1\n"
+            "  72:	2300      	movs	r3, #0\n"
+            "  74:	47b8      	blx	r7\n"
+            "  76:	4607      	mov	r7, r0\n"
+            "  78:	b002      	add	sp, #8\n"
+            "  7a:	bc05      	pop	{r0, r2}\n"
+            "  7c:	07fe      	lsls	r6, r7, #31\n"
+            "  7e:	d405      	bmi.n	0x8c\n"
+            "  80:	6997      	ldr	r7, [r2, #24]\n"
+            "  82:	2282      	movs	r2, #130	; 0x82\n"
+            "  84:	9e05      	ldr	r6, [sp, #20]\n"
+            "  86:	9705      	str	r7, [sp, #20]\n"
+            "  88:	46b6      	mov	lr, r6\n"
+            "  8a:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            % {init_yregs,{list,[{y,0}]}}.
+            %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}),
+            "  8c:	263b      	movs	r6, #59	; 0x3b\n"
+            "  8e:	6947      	ldr	r7, [r0, #20]\n"
+            "  90:	603e      	str	r6, [r7, #0]\n"
+            % {call,1,{f,3}}
+            %% call_or_schedule_next(State9, 3),
+            "  92:	9e00      	ldr	r6, [sp, #0]\n"
+            "  94:	6837      	ldr	r7, [r6, #0]\n"
+            "  96:	683f      	ldr	r7, [r7, #0]\n"
+            "  98:	063f      	lsls	r7, r7, #24\n"
+            "  9a:	4e0c      	ldr	r6, [pc, #48]	; (0xcc)\n"
+            "  9c:	4337      	orrs	r7, r6\n"
+            "  9e:	65c7      	str	r7, [r0, #92]	; 0x5c\n"
+            "  a0:	9e00      	ldr	r6, [sp, #0]\n"
+            "  a2:	68b7      	ldr	r7, [r6, #8]\n"
+            "  a4:	3f01      	subs	r7, #1\n"
+            "  a6:	60b7      	str	r7, [r6, #8]\n"
+            "  a8:	d004      	beq.n	0xb4\n"
+            "  aa:	e013      	b.n	0xd4\n"
+            "  ac:	46c0      	nop			; (mov r8, r8)\n"
+            "  ae:	46c0      	nop			; (mov r8, r8)\n"
+            "  b0:	46c0      	nop			; (mov r8, r8)\n"
+            "  b2:	46c0      	nop			; (mov r8, r8)\n"
+            "  b4:	a700      	add	r7, pc, #0	; (adr r7, 0xb8)\n"
+            "  b6:	2693      	movs	r6, #147	; 0x93\n"
+            "  b8:	4276      	negs	r6, r6\n"
+            "  ba:	19f6      	adds	r6, r6, r7\n"
+            "  bc:	9f00      	ldr	r7, [sp, #0]\n"
+            "  be:	607e      	str	r6, [r7, #4]\n"
+            "  c0:	6897      	ldr	r7, [r2, #8]\n"
+            "  c2:	9e05      	ldr	r6, [sp, #20]\n"
+            "  c4:	9705      	str	r7, [sp, #20]\n"
+            "  c6:	46b6      	mov	lr, r6\n"
+            "  c8:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  ca:	0000      	movs	r0, r0\n"
+            "  cc:	0340      	lsls	r0, r0, #13\n"
+            "  ce:	0000      	movs	r0, r0\n"
+            %% (continuation)
+            "  d0:	b5f2      	push	{r1, r4, r5, r6, r7, lr}\n"
+            "  d2:	46c0      	nop			; (mov r8, r8)\n"
+            % label 3
+            "  d4:	6857      	ldr	r7, [r2, #4]\n"
+            "  d6:	9e05      	ldr	r6, [sp, #20]\n"
+            "  d8:	9705      	str	r7, [sp, #20]\n"
+            "  da:	46b6      	mov	lr, r6\n"
+            "  dc:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+            "  de:	46c0      	nop			; (mov r8, r8)\n"
+            % label 0
+            "  e0:	6857      	ldr	r7, [r2, #4]\n"
+            "  e2:	9e05      	ldr	r6, [sp, #20]\n"
+            "  e4:	9705      	str	r7, [sp, #20]\n"
+            "  e6:	46b6      	mov	lr, r6\n"
+            "  e8:	bdf2      	pop	{r1, r4, r5, r6, r7, pc}\n"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+dump_to_bin(Dump) ->
+    dump_to_bin0(Dump, addr, []).
+
+-define(IS_HEX_DIGIT(C),
+    ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F))
+).
+
+dump_to_bin0(<<N, $:, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<N, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+%% Handle 32-bits undefined instruction
+dump_to_bin0(<<H1, H2, H3, H4, $\s, H5, H6, H7, H8, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4) andalso
+        ?IS_HEX_DIGIT(H5) andalso
+        ?IS_HEX_DIGIT(H6) andalso
+        ?IS_HEX_DIGIT(H7) andalso
+        ?IS_HEX_DIGIT(H8)
+->
+    InstrA = list_to_integer([H1, H2, H3, H4], 16),
+    InstrB = list_to_integer([H5, H6, H7, H8], 16),
+    dump_to_bin0(Rest, instr, [<<InstrB:16/little>>, <<InstrA:16/little>> | Acc]);
+%% Handle 16-bit ARM32 Thumb instructions (4 hex digits)
+dump_to_bin0(<<H1, H2, H3, H4, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4)
+->
+    %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction)
+    Instr = list_to_integer([H1, H2, H3, H4], 16),
+    dump_to_bin0(Rest, instr, [<<Instr:16/little>> | Acc]);
+dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, instr, Acc);
+dump_to_bin0(<<>>, _, Acc) ->
+    list_to_binary(lists:reverse(Acc)).
diff --git a/tests/libs/jit/jit_dwarf_tests.erl b/tests/libs/jit/jit_dwarf_tests.erl
new file mode 100644
index 0000000000..0588ae25f2
--- /dev/null
+++ b/tests/libs/jit/jit_dwarf_tests.erl
@@ -0,0 +1,276 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_dwarf_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+-include("../../../libs/jit/src/opcodes.hrl").
+
+basic_dwarf_state_test() ->
+    % Create a basic DWARF state
+    State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024),
+
+    % Add some test data
+    State1 = jit_dwarf:opcode(State, ?OP_FUNC_INFO),
+    State2 = jit_dwarf:function(State1, get_value, 2),
+    State3 = jit_dwarf:line(State2, 42),
+
+    % Verify state contains our data
+    ?assert(is_tuple(State3)),
+
+    % Test stream interface
+    Stream = jit_dwarf:stream(State3),
+    ?assert(is_binary(Stream)).
+
+elf_generation_test() ->
+    % Create state with some debug info
+    State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024),
+    % Some opcode
+    State1 = jit_dwarf:opcode(State, <<"test_opcode/2">>),
+    State2 = jit_dwarf:function(State1, test_func, 1),
+    State3 = jit_dwarf:line(State2, 100),
+
+    % Generate ELF
+    case jit_dwarf:elf(State3, <<>>) of
+        false ->
+            ok;
+        {ok, ElfBinary, _ElfWithText} ->
+            % Verify ELF magic
+            <<127, $E, $L, $F, _Rest/binary>> = ElfBinary,
+
+            % Verify ELF header structure
+            ?assert(byte_size(ElfBinary) >= 52),
+
+            % Extract and verify key header fields
+            <<_Magic:4/binary, Class, Endian, _Version, _OSABI, _ABIVersion:8/binary,
+                Type:16/little, Machine:16/little, _ElfVersion:32/little, _Entry:32/little,
+                _PHOff:32/little, SHOff:32/little, _Flags:32/little, EHSize:16/little,
+                _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little, SHNum:16/little,
+                _SHStrNdx:16/little, _/binary>> = ElfBinary,
+
+            % Verify basic ELF structure
+
+            % ELFCLASS32
+            ?assertEqual(1, Class),
+            % ELFDATA2LSB
+            ?assertEqual(1, Endian),
+            % ET_REL
+            ?assertEqual(1, Type),
+            % EM_ARM
+            ?assertEqual(40, Machine),
+            % ELF header size
+            ?assertEqual(52, EHSize),
+
+            % Verify we have the expected sections
+
+            % null + 4 debug sections + shstrtab
+            ?assert(SHNum >= 6),
+            % Section headers after ELF header
+            ?assert(SHOff > 52),
+
+            % Verify the ELF is complete (section headers exist)
+
+            % Headers should exist
+            ExpectedMinSize = SHOff + (SHNum * 40),
+            ?assert(byte_size(ElfBinary) >= ExpectedMinSize)
+    end.
+
+section_header_test() ->
+    State = jit_dwarf:new(jit_armv6m, test_module, jit_stream_binary, 1024),
+    State1 = jit_dwarf:function(State, main, 0),
+
+    case jit_dwarf:elf(State1, <<>>) of
+        false ->
+            ok;
+        {ok, ElfBinary, _ElfWithText} ->
+            % Extract section header info from ELF header (parse full header)
+            <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little,
+                _Entry:32/little, _PHOff:32/little, SHOff:32/little, _Flags:32/little,
+                _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little,
+                SHNum:16/little, _SHStrNdx:16/little, _Rest/binary>> = ElfBinary,
+
+            % Verify we can read section headers
+            SectionHeadersSize = SHNum * 40,
+            SectionHeadersStart = SHOff,
+
+            ?assert(byte_size(ElfBinary) >= SectionHeadersStart + SectionHeadersSize),
+
+            % Extract first section header (should be null)
+            <<_:SectionHeadersStart/binary, NullHeader:40/binary, _/binary>> = ElfBinary,
+            % All zeros
+            ?assertEqual(<<0:320>>, NullHeader)
+    end.
+
+string_table_test() ->
+    State = jit_dwarf:new(jit_armv6m, string_test, jit_stream_binary, 1024),
+
+    case jit_dwarf:elf(State, <<>>) of
+        false ->
+            ok;
+        {ok, ElfBinary, _ElfWithText} ->
+            % Find string table section - parse ELF header
+            <<_ElfMagic2:16/binary, _Type2:16/little, _Machine2:16/little, _Version2:32/little,
+                _Entry2:32/little, _PHOff2:32/little, SHOff:32/little, _Flags2:32/little,
+                _EHSize2:16/little, _PHEntSize2:16/little, _PHNum2:16/little, _SHEntSize2:16/little,
+                _SHNum:16/little, SHStrNdx:16/little, _Rest2/binary>> = ElfBinary,
+
+            % Extract string table section header
+            StrTabHeaderOffset = SHOff + (SHStrNdx * 40),
+            <<_:StrTabHeaderOffset/binary, _StrName:32/little, _StrType:32/little,
+                _StrFlags:32/little, _StrAddr:32/little, StrOffset:32/little, StrSize:32/little,
+                _/binary>> = ElfBinary,
+
+            % Extract string table data
+            <<_:StrOffset/binary, StringTable:StrSize/binary, _/binary>> = ElfBinary,
+
+            % Verify string table contains expected section names
+            StrTabStrings = binary:split(StringTable, <<0>>, [global]),
+
+            ?assert(lists:member(<<".debug_info">>, StrTabStrings)),
+            ?assert(lists:member(<<".shstrtab">>, StrTabStrings))
+    end.
+
+elf_with_text_test() ->
+    % Test the new elf_with_text/2 function that creates complete ELF with .text section
+    State = jit_dwarf:new(jit_x86_64, test_module, jit_stream_binary, 1024),
+
+    % Some dummy x86_64 native code (mov rax, 1; ret)
+    NativeCode = <<16#48, 16#c7, 16#c0, 16#01, 16#00, 16#00, 16#00, 16#c3>>,
+
+    % Generate complete ELF with debug info and .text section
+    case jit_dwarf:elf(State, NativeCode) of
+        false ->
+            ok;
+        {ok, _DebugOnlyELF, CombinedELF} ->
+            % Verify ELF magic
+            <<127, $E, $L, $F, _Rest/binary>> = CombinedELF,
+
+            % Parse ELF header to check section count (should be 9: null + 6 debug sections + .text + shstrtab)
+            <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little,
+                _Entry:32/little, _PHOff:32/little, _SHOff:32/little, _Flags:32/little,
+                _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little,
+                SHNum:16/little, _SHStrNdx:16/little, _/binary>> = CombinedELF,
+
+            % Should have 9 sections total
+            ?assertEqual(9, SHNum),
+
+            % Verify the native code is present in the binary
+            ?assert(binary:match(CombinedELF, NativeCode) =/= nomatch),
+
+            % Verify ELF is larger due to added .text section
+            {ok, DebugOnlyELF, _} = jit_dwarf:elf(State, <<>>),
+            ?assert(byte_size(CombinedELF) > byte_size(DebugOnlyELF))
+    end.
+
+text_section_properties_test() ->
+    % Test that the .text section has proper properties
+    State = jit_dwarf:new(jit_aarch64, test_module, jit_stream_binary, 1024),
+
+    % AArch64 native code (mov x0, #42; ret)
+    NativeCode = <<16#d2800540, 16#d65f03c0>>,
+
+    case jit_dwarf:elf(State, NativeCode) of
+        false ->
+            ok;
+        {ok, _DebugOnlyELF, CombinedELF} ->
+            % Parse ELF to find .text section
+            <<_ElfMagic:16/binary, _Type:16/little, _Machine:16/little, _Version:32/little,
+                _Entry:32/little, _PHOff:32/little, SHOff:32/little, _Flags:32/little,
+                _EHSize:16/little, _PHEntSize:16/little, _PHNum:16/little, _SHEntSize:16/little,
+                SHNum:16/little, SHStrNdx:16/little, RestOfFile/binary>> = CombinedELF,
+
+            % Extract section headers
+            SectionHeadersStart = SHOff - 52,
+            <<_SectionData:SectionHeadersStart/binary, SectionHeaders/binary>> = RestOfFile,
+
+            % Extract string table to find .text section by name
+            StringTableHeaderOffset = SHStrNdx * 40,
+            <<_:StringTableHeaderOffset/binary, _StrName:32/little, _StrType:32/little,
+                _StrFlags:32/little, _StrAddr:32/little, StrOffset:32/little, StrSize:32/little,
+                _/binary>> = SectionHeaders,
+
+            % Extract string table content
+            StrTableFileOffset = StrOffset - 52,
+            <<_:StrTableFileOffset/binary, StringTable:StrSize/binary, _/binary>> = _SectionData,
+
+            % Find .text section by scanning all section headers
+            TextSectionFound = find_text_section(SectionHeaders, StringTable, SHNum, 0),
+            ?assert(TextSectionFound =/= not_found),
+
+            {TextType, TextFlags, TextSize, TextAddr} = TextSectionFound,
+
+            % Verify .text section properties
+            SHT_PROGBITS = 1,
+            SHF_ALLOC = 2,
+            SHF_EXECINSTR = 4,
+            ExpectedFlags = SHF_ALLOC bor SHF_EXECINSTR,
+
+            ?assertEqual(SHT_PROGBITS, TextType),
+            ?assertEqual(ExpectedFlags, TextFlags),
+            ?assertEqual(byte_size(NativeCode), TextSize),
+            % Should be 0 for relocatable
+            ?assertEqual(0, TextAddr)
+    end.
+
+different_architectures_test() ->
+    % Test elf_with_text with different JIT backends
+    Backends = [jit_x86_64, jit_aarch64, jit_armv6m],
+    % Simple nop instruction
+    NativeCode = <<16#90>>,
+
+    lists:foreach(
+        fun(Backend) ->
+            State = jit_dwarf:new(Backend, test_module, jit_stream_binary, 1024),
+            case jit_dwarf:elf(State, NativeCode) of
+                false ->
+                    ok;
+                {ok, _DebugOnlyELF, CombinedELF} ->
+                    % Verify ELF magic and basic structure
+                    <<127, $E, $L, $F, _Rest/binary>> = CombinedELF,
+                    % Verify native code is present
+                    ?assert(binary:match(CombinedELF, NativeCode) =/= nomatch)
+            end
+        end,
+        Backends
+    ).
+
+% Helper function to find .text section in ELF
+find_text_section(_Headers, _StringTable, 0, _Index) ->
+    not_found;
+find_text_section(Headers, StringTable, Remaining, Index) ->
+    HeaderOffset = Index * 40,
+    <<_:HeaderOffset/binary, NameOffset:32/little, Type:32/little, Flags:32/little, Addr:32/little,
+        _Offset:32/little, Size:32/little, _/binary>> = Headers,
+
+    % Extract section name from string table
+    SectionName = extract_string_at_offset(StringTable, NameOffset),
+
+    case SectionName of
+        <<".text">> ->
+            {Type, Flags, Size, Addr};
+        _ ->
+            find_text_section(Headers, StringTable, Remaining - 1, Index + 1)
+    end.
+
+% Helper function to extract null-terminated string at given offset
+extract_string_at_offset(StringTable, Offset) ->
+    <<_:Offset/binary, Rest/binary>> = StringTable,
+    [String | _] = binary:split(Rest, <<0>>, []),
+    String.
diff --git a/tests/libs/jit/jit_riscv32_asm_tests.erl b/tests/libs/jit/jit_riscv32_asm_tests.erl
new file mode 100644
index 0000000000..28a0f4fa58
--- /dev/null
+++ b/tests/libs/jit/jit_riscv32_asm_tests.erl
@@ -0,0 +1,900 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32_asm_tests).
+
+-include_lib("eunit/include/eunit.hrl").
+
+-define(_assertAsmEqual(Bin, Str, Value),
+    ?_assertEqual(jit_tests_common:asm(riscv32, Bin, Str), Value)
+).
+
+%%-----------------------------------------------------------------------------
+%% R-type arithmetic and logical instruction tests
+%%-----------------------------------------------------------------------------
+
+add_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00628533:32/little>>, "add a0, t0, t1", jit_riscv32_asm:add(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#95aa:16/little>>, "add a1, a1, a0", jit_riscv32_asm:add(a1, a1, a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#97fa:16/little>>, "add a5, a5, t5", jit_riscv32_asm:add(a5, a5, t5)
+        )
+    ].
+
+sub_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#40628533:32/little>>, "sub a0, t0, t1", jit_riscv32_asm:sub(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8d89:16/little>>, "sub a1, a1, a0", jit_riscv32_asm:sub(a1, a1, a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#41e787b3:32/little>>, "sub a5, a5, t5", jit_riscv32_asm:sub(a5, a5, t5)
+        )
+    ].
+
+and_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062f533:32/little>>, "and a0, t0, t1", jit_riscv32_asm:and_(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8df1:16/little>>, "and a1, a1, a2", jit_riscv32_asm:and_(a1, a1, a2)
+        )
+    ].
+
+or_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062e533:32/little>>, "or a0, t0, t1", jit_riscv32_asm:or_(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8dd1:16/little>>, "or a1, a1, a2", jit_riscv32_asm:or_(a1, a1, a2)
+        )
+    ].
+
+xor_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062c533:32/little>>, "xor a0, t0, t1", jit_riscv32_asm:xor_(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8db1:16/little>>, "xor a1, a1, a2", jit_riscv32_asm:xor_(a1, a1, a2)
+        )
+    ].
+
+sll_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00629533:32/little>>, "sll a0, t0, t1", jit_riscv32_asm:sll(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c59633:32/little>>, "sll a2, a1, a2", jit_riscv32_asm:sll(a2, a1, a2)
+        )
+    ].
+
+srl_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062d533:32/little>>, "srl a0, t0, t1", jit_riscv32_asm:srl(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c5d633:32/little>>, "srl a2, a1, a2", jit_riscv32_asm:srl(a2, a1, a2)
+        )
+    ].
+
+sra_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4062d533:32/little>>, "sra a0, t0, t1", jit_riscv32_asm:sra(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#40c5d633:32/little>>, "sra a2, a1, a2", jit_riscv32_asm:sra(a2, a1, a2)
+        )
+    ].
+
+slt_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062a533:32/little>>, "slt a0, t0, t1", jit_riscv32_asm:slt(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c5a633:32/little>>, "slt a2, a1, a2", jit_riscv32_asm:slt(a2, a1, a2)
+        )
+    ].
+
+sltu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062b533:32/little>>, "sltu a0, t0, t1", jit_riscv32_asm:sltu(a0, t0, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#00c5b633:32/little>>, "sltu a2, a1, a2", jit_riscv32_asm:sltu(a2, a1, a2)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% I-type immediate instruction tests
+%%-----------------------------------------------------------------------------
+
+addi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#01428513:32/little>>, "addi a0, t0, 20", jit_riscv32_asm:addi(a0, t0, 20)
+        ),
+        ?_assertAsmEqual(
+            <<16#15fd:16/little>>, "addi a1, a1, -1", jit_riscv32_asm:addi(a1, a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#7ff00513:32/little>>, "addi a0, zero, 2047", jit_riscv32_asm:addi(a0, zero, 2047)
+        ),
+        ?_assertAsmEqual(
+            <<16#80000593:32/little>>, "addi a1, zero, -2048", jit_riscv32_asm:addi(a1, zero, -2048)
+        )
+    ].
+
+andi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0ff2f513:32/little>>, "andi a0, t0, 255", jit_riscv32_asm:andi(a0, t0, 255)
+        ),
+        ?_assertAsmEqual(
+            <<16#89bd:16/little>>, "andi a1, a1, 15", jit_riscv32_asm:andi(a1, a1, 15)
+        )
+    ].
+
+ori_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0ff2e513:32/little>>, "ori a0, t0, 255", jit_riscv32_asm:ori(a0, t0, 255)
+        ),
+        ?_assertAsmEqual(
+            <<16#00f5e593:32/little>>, "ori a1, a1, 15", jit_riscv32_asm:ori(a1, a1, 15)
+        )
+    ].
+
+xori_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0ff2c513:32/little>>, "xori a0, t0, 255", jit_riscv32_asm:xori(a0, t0, 255)
+        ),
+        ?_assertAsmEqual(
+            <<16#fff5c593:32/little>>, "xori a1, a1, -1", jit_riscv32_asm:xori(a1, a1, -1)
+        )
+    ].
+
+slli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00329513:32/little>>, "slli a0, t0, 3", jit_riscv32_asm:slli(a0, t0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#05fe:16/little>>, "slli a1, a1, 31", jit_riscv32_asm:slli(a1, a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#00051513:32/little>>, "slli a0, a0, 0", jit_riscv32_asm:slli(a0, a0, 0)
+        )
+    ].
+
+srli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0032d513:32/little>>, "srli a0, t0, 3", jit_riscv32_asm:srli(a0, t0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#81fd:16/little>>, "srli a1, a1, 31", jit_riscv32_asm:srli(a1, a1, 31)
+        )
+    ].
+
+srai_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4032d513:32/little>>, "srai a0, t0, 3", jit_riscv32_asm:srai(a0, t0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#85fd:16/little>>, "srai a1, a1, 31", jit_riscv32_asm:srai(a1, a1, 31)
+        )
+    ].
+
+slti_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0142a513:32/little>>, "slti a0, t0, 20", jit_riscv32_asm:slti(a0, t0, 20)
+        ),
+        ?_assertAsmEqual(
+            <<16#fff5a593:32/little>>, "slti a1, a1, -1", jit_riscv32_asm:slti(a1, a1, -1)
+        )
+    ].
+
+sltiu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0142b513:32/little>>, "sltiu a0, t0, 20", jit_riscv32_asm:sltiu(a0, t0, 20)
+        ),
+        ?_assertAsmEqual(
+            <<16#00153513:32/little>>, "sltiu a0, a0, 1", jit_riscv32_asm:sltiu(a0, a0, 1)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Load instruction tests
+%%-----------------------------------------------------------------------------
+
+lw_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0, 0)),
+        ?_assertAsmEqual(<<16#4108:16/little>>, "lw a0, 0(a0)", jit_riscv32_asm:lw(a0, a0)),
+        ?_assertAsmEqual(<<16#414c:16/little>>, "lw a1, 4(a0)", jit_riscv32_asm:lw(a1, a0, 4)),
+        ?_assertAsmEqual(
+            <<16#ffc52503:32/little>>, "lw a0, -4(a0)", jit_riscv32_asm:lw(a0, a0, -4)
+        ),
+        ?_assertAsmEqual(
+            <<16#7ff52503:32/little>>, "lw a0, 2047(a0)", jit_riscv32_asm:lw(a0, a0, 2047)
+        )
+    ].
+
+lh_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0, 0)),
+        ?_assertAsmEqual(<<16#00051503:32/little>>, "lh a0, 0(a0)", jit_riscv32_asm:lh(a0, a0)),
+        ?_assertAsmEqual(<<16#00251583:32/little>>, "lh a1, 2(a0)", jit_riscv32_asm:lh(a1, a0, 2))
+    ].
+
+lhu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0, 0)
+        ),
+        ?_assertAsmEqual(<<16#00055503:32/little>>, "lhu a0, 0(a0)", jit_riscv32_asm:lhu(a0, a0)),
+        ?_assertAsmEqual(<<16#00255583:32/little>>, "lhu a1, 2(a0)", jit_riscv32_asm:lhu(a1, a0, 2))
+    ].
+
+lb_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0, 0)),
+        ?_assertAsmEqual(<<16#00050503:32/little>>, "lb a0, 0(a0)", jit_riscv32_asm:lb(a0, a0)),
+        ?_assertAsmEqual(<<16#00150583:32/little>>, "lb a1, 1(a0)", jit_riscv32_asm:lb(a1, a0, 1))
+    ].
+
+lbu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0, 0)
+        ),
+        ?_assertAsmEqual(<<16#00054503:32/little>>, "lbu a0, 0(a0)", jit_riscv32_asm:lbu(a0, a0)),
+        ?_assertAsmEqual(<<16#00154583:32/little>>, "lbu a1, 1(a0)", jit_riscv32_asm:lbu(a1, a0, 1))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Store instruction tests
+%%-----------------------------------------------------------------------------
+
+sw_test_() ->
+    [
+        ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a0, a1, 0)),
+        ?_assertAsmEqual(<<16#c10c:16/little>>, "sw a1, 0(a0)", jit_riscv32_asm:sw(a1, a0)),
+        ?_assertAsmEqual(<<16#c14c:16/little>>, "sw a1, 4(a0)", jit_riscv32_asm:sw(a0, a1, 4)),
+        ?_assertAsmEqual(<<16#feb52e23:32/little>>, "sw a1, -4(a0)", jit_riscv32_asm:sw(a0, a1, -4))
+    ].
+
+sh_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a0, a1, 0)),
+        ?_assertAsmEqual(<<16#00b51023:32/little>>, "sh a1, 0(a0)", jit_riscv32_asm:sh(a1, a0)),
+        ?_assertAsmEqual(<<16#00b51123:32/little>>, "sh a1, 2(a0)", jit_riscv32_asm:sh(a0, a1, 2))
+    ].
+
+sb_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a0, a1, 0)),
+        ?_assertAsmEqual(<<16#00b50023:32/little>>, "sb a1, 0(a0)", jit_riscv32_asm:sb(a1, a0)),
+        ?_assertAsmEqual(<<16#00b500a3:32/little>>, "sb a1, 1(a0)", jit_riscv32_asm:sb(a0, a1, 1))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Branch instruction tests
+%%-----------------------------------------------------------------------------
+
+beq_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00628463:32/little>>, "beq t0, t1, .+8", jit_riscv32_asm:beq(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb50ee3:32/little>>, "beq a0, a1, .-4", jit_riscv32_asm:beq(a0, a1, -4)
+        ),
+        ?_assertAsmEqual(
+            <<16#c101:16/little>>, "beq a0, zero, .", jit_riscv32_asm:beq(a0, zero, 0)
+        )
+    ].
+
+bne_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#00629463:32/little>>, "bne t0, t1, .+8", jit_riscv32_asm:bne(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb51ee3:32/little>>, "bne a0, a1, .-4", jit_riscv32_asm:bne(a0, a1, -4)
+        )
+    ].
+
+blt_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062c463:32/little>>, "blt t0, t1, .+8", jit_riscv32_asm:blt(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb54ee3:32/little>>, "blt a0, a1, .-4", jit_riscv32_asm:blt(a0, a1, -4)
+        )
+    ].
+
+bge_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062d463:32/little>>, "bge t0, t1, .+8", jit_riscv32_asm:bge(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb55ee3:32/little>>, "bge a0, a1, .-4", jit_riscv32_asm:bge(a0, a1, -4)
+        )
+    ].
+
+bltu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062e463:32/little>>, "bltu t0, t1, .+8", jit_riscv32_asm:bltu(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb56ee3:32/little>>, "bltu a0, a1, .-4", jit_riscv32_asm:bltu(a0, a1, -4)
+        )
+    ].
+
+bgeu_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0062f463:32/little>>, "bgeu t0, t1, .+8", jit_riscv32_asm:bgeu(t0, t1, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#feb57ee3:32/little>>, "bgeu a0, a1, .-4", jit_riscv32_asm:bgeu(a0, a1, -4)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Jump instruction tests
+%%-----------------------------------------------------------------------------
+
+jal_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#2021:16/little>>, "jal .+8", jit_riscv32_asm:jal(ra, 8)
+        ),
+        ?_assertAsmEqual(
+            <<16#3ff5:16/little>>, "jal .-4", jit_riscv32_asm:jal(ra, -4)
+        ),
+        ?_assertAsmEqual(
+            <<16#00000517:32/little, 16#9502:16/little>>,
+            "auipc a0, 0\njalr a0",
+            jit_riscv32_asm:call(a0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#00002517:32/little, 16#800500e7:32/little>>,
+            "auipc a0, 0x2\njalr -2048(a0)",
+            jit_riscv32_asm:call(a0, 16#1800)
+        )
+    ].
+
+jalr_test_() ->
+    [
+        ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0, 0)),
+        ?_assertAsmEqual(<<16#9502:16/little>>, "jalr a0", jit_riscv32_asm:jalr(ra, a0)),
+        ?_assertAsmEqual(<<16#004500e7:32/little>>, "jalr 4(a0)", jit_riscv32_asm:jalr(ra, a0, 4))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Upper immediate instruction tests
+%%-----------------------------------------------------------------------------
+
+lui_test_() ->
+    [
+        ?_assertAsmEqual(<<16#65c9:16/little>>, "lui a1, 18", jit_riscv32_asm:lui(a1, 18)),
+        ?_assertAsmEqual(<<16#6505:16/little>>, "lui a0, 1", jit_riscv32_asm:lui(a0, 1)),
+        ?_assertAsmEqual(<<16#75fd:16/little>>, "lui a1, 0xfffff", jit_riscv32_asm:lui(a1, -1))
+    ].
+
+auipc_test_() ->
+    [
+        ?_assertAsmEqual(<<16#00012597:32/little>>, "auipc a1, 18", jit_riscv32_asm:auipc(a1, 18)),
+        ?_assertAsmEqual(<<16#00001517:32/little>>, "auipc a0, 1", jit_riscv32_asm:auipc(a0, 1))
+    ].
+
+%%-----------------------------------------------------------------------------
+%% Pseudo-instruction tests
+%%-----------------------------------------------------------------------------
+
+nop_test_() ->
+    [
+        % We want a 4-byte NOP for padding, so use .option norvc to force non-compressed
+        ?_assertAsmEqual(<<16#00000013:32/little>>, ".option norvc\nnop", jit_riscv32_asm:nop())
+    ].
+
+li_small_test_() ->
+    [
+        ?_assertAsmEqual(<<16#4529:16/little>>, "li a0, 10", jit_riscv32_asm:li(a0, 10)),
+        ?_assertAsmEqual(<<16#557d:16/little>>, "li a0, -1", jit_riscv32_asm:li(a0, -1)),
+        ?_assertAsmEqual(<<16#7ff00513:32/little>>, "li a0, 2047", jit_riscv32_asm:li(a0, 2047))
+    ].
+
+li_large_test_() ->
+    [
+        % 0x12345 = 74565 - requires lui + addi
+        ?_assertAsmEqual(
+            <<16#6549:16/little, 16#34550513:32/little>>,
+            "lui a0, 0x12\naddi a0, a0, 0x345",
+            jit_riscv32_asm:li(a0, 16#12345)
+        ),
+        % 0x80000000 = -2147483648 (minimum 32-bit signed)
+        ?_assertAsmEqual(
+            <<16#800005b7:32/little, 16#0581:16/little>>,
+            "lui a1, 0x80000\nc.addi a1, 0",
+            jit_riscv32_asm:li(a1, -16#80000000)
+        ),
+        % 0x7FFFFFFF = 2147483647 (maximum 32-bit signed)
+        ?_assertAsmEqual(
+            <<16#80000537:32/little, 16#157d:16/little>>,
+            "lui a0, 0x80000\naddi a0, a0, -1",
+            jit_riscv32_asm:li(a0, 16#7FFFFFFF)
+        )
+    ].
+
+mv_test_() ->
+    [
+        ?_assertAsmEqual(<<16#852a:16/little>>, "mv a0, a0", jit_riscv32_asm:mv(a0, a0)),
+        ?_assertAsmEqual(<<16#85ae:16/little>>, "mv a1, a1", jit_riscv32_asm:mv(a1, a1))
+    ].
+
+not_test_() ->
+    [
+        ?_assertAsmEqual(<<16#fff54513:32/little>>, "not a0, a0", jit_riscv32_asm:not_(a0, a0)),
+        ?_assertAsmEqual(<<16#fff5c593:32/little>>, "not a1, a1", jit_riscv32_asm:not_(a1, a1))
+    ].
+
+neg_test_() ->
+    [
+        ?_assertAsmEqual(<<16#40a00533:32/little>>, "neg a0, a0", jit_riscv32_asm:neg(a0, a0)),
+        ?_assertAsmEqual(<<16#40b005b3:32/little>>, "neg a1, a1", jit_riscv32_asm:neg(a1, a1))
+    ].
+
+j_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#a021:16/little>>, "j .+8", jit_riscv32_asm:j(8)
+        ),
+        ?_assertAsmEqual(
+            <<16#bff5:16/little>>, "j .-4", jit_riscv32_asm:j(-4)
+        )
+    ].
+
+jr_test_() ->
+    [
+        ?_assertAsmEqual(<<16#8502:16/little>>, "jr a0", jit_riscv32_asm:jr(a0)),
+        ?_assertAsmEqual(<<16#8282:16/little>>, "jr t0", jit_riscv32_asm:jr(t0))
+    ].
+
+ret_test_() ->
+    [
+        ?_assertAsmEqual(<<16#8082:16/little>>, "ret", jit_riscv32_asm:ret())
+    ].
+
+%%-----------------------------------------------------------------------------
+%% M Extension (Multiply/Divide) instruction tests
+%%-----------------------------------------------------------------------------
+
+mul_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#02f50533:32/little>>, "mul a0, a0, a5", jit_riscv32_asm:mul(a0, a0, a5)
+        ),
+        ?_assertAsmEqual(
+            <<16#03f60633:32/little>>, "mul a2, a2, t6", jit_riscv32_asm:mul(a2, a2, t6)
+        ),
+        ?_assertAsmEqual(
+            <<16#026585b3:32/little>>, "mul a1, a1, t1", jit_riscv32_asm:mul(a1, a1, t1)
+        ),
+        ?_assertAsmEqual(
+            <<16#02d282b3:32/little>>, "mul t0, t0, a3", jit_riscv32_asm:mul(t0, t0, a3)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% System instruction tests
+%%-----------------------------------------------------------------------------
+
+c_ebreak_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9002:16/little>>, "c.ebreak", jit_riscv32_asm:c_ebreak()
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Arithmetic and Logical instruction tests
+%%-----------------------------------------------------------------------------
+
+c_add_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9532:16/little>>, "c.add a0, a2", jit_riscv32_asm:c_add(a0, a2)
+        ),
+        ?_assertAsmEqual(
+            <<16#95be:16/little>>, "c.add a1, a5", jit_riscv32_asm:c_add(a1, a5)
+        ),
+        ?_assertAsmEqual(
+            <<16#9522:16/little>>, "c.add a0, s0", jit_riscv32_asm:c_add(a0, s0)
+        )
+    ].
+
+c_mv_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8532:16/little>>, "c.mv a0, a2", jit_riscv32_asm:c_mv(a0, a2)
+        ),
+        ?_assertAsmEqual(
+            <<16#85be:16/little>>, "c.mv a1, a5", jit_riscv32_asm:c_mv(a1, a5)
+        ),
+        ?_assertAsmEqual(
+            <<16#842a:16/little>>, "c.mv s0, a0", jit_riscv32_asm:c_mv(s0, a0)
+        )
+    ].
+
+c_sub_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d09:16/little>>, "c.sub a0, a0", jit_riscv32_asm:c_sub(a0, a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#8d8d:16/little>>, "c.sub a1, a1", jit_riscv32_asm:c_sub(a1, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c0d:16/little>>, "c.sub s0, a1", jit_riscv32_asm:c_sub(s0, a1)
+        )
+    ].
+
+c_and_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d6d:16/little>>, "c.and a0, a1", jit_riscv32_asm:c_and(a0, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8fed:16/little>>, "c.and a5, a1", jit_riscv32_asm:c_and(a5, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c6d:16/little>>, "c.and s0, a1", jit_riscv32_asm:c_and(s0, a1)
+        )
+    ].
+
+c_or_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d4d:16/little>>, "c.or a0, a1", jit_riscv32_asm:c_or(a0, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8fcd:16/little>>, "c.or a5, a1", jit_riscv32_asm:c_or(a5, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c4d:16/little>>, "c.or s0, a1", jit_riscv32_asm:c_or(s0, a1)
+        )
+    ].
+
+c_xor_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8d2d:16/little>>, "c.xor a0, a1", jit_riscv32_asm:c_xor(a0, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8fad:16/little>>, "c.xor a5, a1", jit_riscv32_asm:c_xor(a5, a1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8c2d:16/little>>, "c.xor s0, a1", jit_riscv32_asm:c_xor(s0, a1)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Immediate instruction tests
+%%-----------------------------------------------------------------------------
+
+c_addi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0511:16/little>>, "c.addi a0, 4", jit_riscv32_asm:c_addi(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#15fd:16/little>>, "c.addi a1, -1", jit_riscv32_asm:c_addi(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#0541:16/little>>, "c.addi a0, 16", jit_riscv32_asm:c_addi(a0, 16)
+        ),
+        ?_assertAsmEqual(
+            <<16#1561:16/little>>, "c.addi a0, -8", jit_riscv32_asm:c_addi(a0, -8)
+        )
+    ].
+
+c_andi_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8929:16/little>>, "c.andi a0, 10", jit_riscv32_asm:c_andi(a0, 10)
+        ),
+        ?_assertAsmEqual(
+            <<16#99fd:16/little>>, "c.andi a1, -1", jit_riscv32_asm:c_andi(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#8941:16/little>>, "c.andi a0, 16", jit_riscv32_asm:c_andi(a0, 16)
+        )
+    ].
+
+c_li_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4529:16/little>>, "c.li a0, 10", jit_riscv32_asm:c_li(a0, 10)
+        ),
+        ?_assertAsmEqual(
+            <<16#55fd:16/little>>, "c.li a1, -1", jit_riscv32_asm:c_li(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#4505:16/little>>, "c.li a0, 1", jit_riscv32_asm:c_li(a0, 1)
+        ),
+        ?_assertAsmEqual(
+            <<16#5501:16/little>>, "c.li a0, -32", jit_riscv32_asm:c_li(a0, -32)
+        )
+    ].
+
+c_lui_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#6529:16/little>>, "c.lui a0, 10", jit_riscv32_asm:c_lui(a0, 10)
+        ),
+        ?_assertAsmEqual(
+            <<16#75fd:16/little>>, "c.lui a1, 0xfffff", jit_riscv32_asm:c_lui(a1, -1)
+        ),
+        ?_assertAsmEqual(
+            <<16#6505:16/little>>, "c.lui a0, 1", jit_riscv32_asm:c_lui(a0, 1)
+        )
+    ].
+
+c_addi16sp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#6141:16/little>>, "c.addi16sp sp, 16", jit_riscv32_asm:c_addi16sp(16)
+        ),
+        ?_assertAsmEqual(
+            <<16#7101:16/little>>, "c.addi16sp sp, -512", jit_riscv32_asm:c_addi16sp(-512)
+        ),
+        ?_assertAsmEqual(
+            <<16#6161:16/little>>, "c.addi16sp sp, 80", jit_riscv32_asm:c_addi16sp(80)
+        )
+    ].
+
+c_addi4spn_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0048:16/little>>, "c.addi4spn a0, sp, 4", jit_riscv32_asm:c_addi4spn(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#1010:16/little>>, "c.addi4spn a2, sp, 32", jit_riscv32_asm:c_addi4spn(a2, 32)
+        ),
+        ?_assertAsmEqual(
+            <<16#1ffc:16/little>>,
+            "c.addi4spn a5, sp, 1020",
+            jit_riscv32_asm:c_addi4spn(a5, 1020)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Shift instruction tests
+%%-----------------------------------------------------------------------------
+
+c_slli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#050e:16/little>>, "c.slli a0, 3", jit_riscv32_asm:c_slli(a0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#05fe:16/little>>, "c.slli a1, 31", jit_riscv32_asm:c_slli(a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#0542:16/little>>, "c.slli a0, 16", jit_riscv32_asm:c_slli(a0, 16)
+        )
+    ].
+
+c_srli_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#810d:16/little>>, "c.srli a0, 3", jit_riscv32_asm:c_srli(a0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#81fd:16/little>>, "c.srli a1, 31", jit_riscv32_asm:c_srli(a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#8141:16/little>>, "c.srli a0, 16", jit_riscv32_asm:c_srli(a0, 16)
+        )
+    ].
+
+c_srai_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#850d:16/little>>, "c.srai a0, 3", jit_riscv32_asm:c_srai(a0, 3)
+        ),
+        ?_assertAsmEqual(
+            <<16#85fd:16/little>>, "c.srai a1, 31", jit_riscv32_asm:c_srai(a1, 31)
+        ),
+        ?_assertAsmEqual(
+            <<16#8541:16/little>>, "c.srai a0, 16", jit_riscv32_asm:c_srai(a0, 16)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Load/Store instruction tests
+%%-----------------------------------------------------------------------------
+
+c_lw_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4188:16/little>>, "c.lw a0, 0(a1)", jit_riscv32_asm:c_lw(a0, {a1, 0})
+        ),
+        ?_assertAsmEqual(
+            <<16#41d8:16/little>>, "c.lw a4, 4(a1)", jit_riscv32_asm:c_lw(a4, {a1, 4})
+        ),
+        ?_assertAsmEqual(
+            <<16#5ffc:16/little>>, "c.lw a5, 124(a5)", jit_riscv32_asm:c_lw(a5, {a5, 124})
+        )
+    ].
+
+c_sw_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#c188:16/little>>, "c.sw a0, 0(a1)", jit_riscv32_asm:c_sw(a0, {a1, 0})
+        ),
+        ?_assertAsmEqual(
+            <<16#c1d8:16/little>>, "c.sw a4, 4(a1)", jit_riscv32_asm:c_sw(a4, {a1, 4})
+        ),
+        ?_assertAsmEqual(
+            <<16#dffc:16/little>>, "c.sw a5, 124(a5)", jit_riscv32_asm:c_sw(a5, {a5, 124})
+        )
+    ].
+
+c_lwsp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#4502:16/little>>, "c.lwsp a0, 0(sp)", jit_riscv32_asm:c_lwsp(a0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#4512:16/little>>, "c.lwsp a0, 4(sp)", jit_riscv32_asm:c_lwsp(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#50fe:16/little>>, "c.lwsp ra, 252(sp)", jit_riscv32_asm:c_lwsp(ra, 252)
+        )
+    ].
+
+c_swsp_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#c02a:16/little>>, "c.swsp a0, 0(sp)", jit_riscv32_asm:c_swsp(a0, 0)
+        ),
+        ?_assertAsmEqual(
+            <<16#c22a:16/little>>, "c.swsp a0, 4(sp)", jit_riscv32_asm:c_swsp(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#dfe6:16/little>>, "c.swsp s9, 252(sp)", jit_riscv32_asm:c_swsp(s9, 252)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Branch and Jump instruction tests
+%%-----------------------------------------------------------------------------
+
+c_beqz_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#c111:16/little>>, "c.beqz a0, .+4", jit_riscv32_asm:c_beqz(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#dced:16/little>>, "c.beqz s1, .-6", jit_riscv32_asm:c_beqz(s1, -6)
+        ),
+        ?_assertAsmEqual(
+            <<16#c101:16/little>>, "c.beqz a0, .", jit_riscv32_asm:c_beqz(a0, 0)
+        )
+    ].
+
+c_bnez_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#e111:16/little>>, "c.bnez a0, .+4", jit_riscv32_asm:c_bnez(a0, 4)
+        ),
+        ?_assertAsmEqual(
+            <<16#fced:16/little>>, "c.bnez s1, .-6", jit_riscv32_asm:c_bnez(s1, -6)
+        ),
+        ?_assertAsmEqual(
+            <<16#e101:16/little>>, "c.bnez a0, .", jit_riscv32_asm:c_bnez(a0, 0)
+        )
+    ].
+
+c_j_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#a011:16/little>>, "c.j .+4", jit_riscv32_asm:c_j(4)
+        ),
+        ?_assertAsmEqual(
+            <<16#bfed:16/little>>, "c.j .-6", jit_riscv32_asm:c_j(-6)
+        ),
+        ?_assertAsmEqual(
+            <<16#a001:16/little>>, "c.j .", jit_riscv32_asm:c_j(0)
+        )
+    ].
+
+c_jal_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#2021:16/little>>, "c.jal .+8", jit_riscv32_asm:c_jal(8)
+        ),
+        ?_assertAsmEqual(
+            <<16#3ff5:16/little>>, "c.jal .-4", jit_riscv32_asm:c_jal(-4)
+        ),
+        ?_assertAsmEqual(
+            <<16#2001:16/little>>, "c.jal .", jit_riscv32_asm:c_jal(0)
+        )
+    ].
+
+c_jr_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#8502:16/little>>, "c.jr a0", jit_riscv32_asm:c_jr(a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#8402:16/little>>, "c.jr s0", jit_riscv32_asm:c_jr(s0)
+        ),
+        ?_assertAsmEqual(
+            <<16#8082:16/little>>, "c.jr ra", jit_riscv32_asm:c_jr(ra)
+        )
+    ].
+
+c_jalr_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#9502:16/little>>, "c.jalr a0", jit_riscv32_asm:c_jalr(a0)
+        ),
+        ?_assertAsmEqual(
+            <<16#9402:16/little>>, "c.jalr s0", jit_riscv32_asm:c_jalr(s0)
+        )
+    ].
+
+%%-----------------------------------------------------------------------------
+%% C Extension - Pseudo-instruction tests
+%%-----------------------------------------------------------------------------
+
+c_nop_test_() ->
+    [
+        ?_assertAsmEqual(
+            <<16#0001:16/little>>, "c.nop", jit_riscv32_asm:c_nop()
+        )
+    ].
diff --git a/tests/libs/jit/jit_riscv32_tests.erl b/tests/libs/jit/jit_riscv32_tests.erl
new file mode 100644
index 0000000000..f398cb3f49
--- /dev/null
+++ b/tests/libs/jit/jit_riscv32_tests.erl
@@ -0,0 +1,3432 @@
+%
+% This file is part of AtomVM.
+%
+% Copyright 2025 Paul Guyot <pguyot@kallisys.net>
+%
+% Licensed under the Apache License, Version 2.0 (the "License");
+% you may not use this file except in compliance with the License.
+% You may obtain a copy of the License at
+%
+%    http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS,
+% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+% See the License for the specific language governing permissions and
+% limitations under the License.
+%
+% SPDX-License-Identifier: Apache-2.0 OR LGPL-2.1-or-later
+%
+
+-module(jit_riscv32_tests).
+
+-ifdef(TEST).
+-include_lib("eunit/include/eunit.hrl").
+-endif.
+
+-include("jit/include/jit.hrl").
+-include("jit/src/term.hrl").
+-include("jit/src/default_atoms.hrl").
+-include("jit/src/primitives.hrl").
+
+-define(BACKEND, jit_riscv32).
+
+% disassembly obtained with:
+% arm-elf-objdump -b binary -D dump.bin -M arm
+
+call_primitive_0_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 0, [ctx, jit_state]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  00062f83            lw  t6,0(a2)\n"
+            "   4:  1141                addi    sp,sp,-16\n"
+            "   6:  c006                sw  ra,0(sp)\n"
+            "   8:  c22a                sw  a0,4(sp)\n"
+            "   a:  c42e                sw  a1,8(sp)\n"
+            "   c:  c632                sw  a2,12(sp)\n"
+            "   e:  9f82                jalr    t6\n"
+            "  10:  8faa                mv  t6,a0\n"
+            "  12:  4082                lw  ra,0(sp)\n"
+            "  14:  4512                lw  a0,4(sp)\n"
+            "  16:  45a2                lw  a1,8(sp)\n"
+            "  18:  4632                lw  a2,12(sp)\n"
+            "  1a:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_1_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 1, [ctx, jit_state]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  00462f83            lw  t6,4(a2)\n"
+            "   4:  1141                addi    sp,sp,-16\n"
+            "   6:  c006                sw  ra,0(sp)\n"
+            "   8:  c22a                sw  a0,4(sp)\n"
+            "   a:  c42e                sw  a1,8(sp)\n"
+            "   c:  c632                sw  a2,12(sp)\n"
+            "   e:  9f82                jalr    t6\n"
+            "  10:  8faa                mv  t6,a0\n"
+            "  12:  4082                lw  ra,0(sp)\n"
+            "  14:  4512                lw  a0,4(sp)\n"
+            "  16:  45a2                lw  a1,8(sp)\n"
+            "  18:  4632                lw  a2,12(sp)\n"
+            "  1a:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_2_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, 2, [ctx, 42, 43, 44]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  00862f83            lw  t6,8(a2)\n"
+            "   4:  1141                addi    sp,sp,-16\n"
+            "   6:  c006                sw  ra,0(sp)\n"
+            "   8:  c22a                sw  a0,4(sp)\n"
+            "   a:  c42e                sw  a1,8(sp)\n"
+            "   c:  c632                sw  a2,12(sp)\n"
+            "   e:  02a00593            li  a1,42\n"
+            "  12:  02b00613            li  a2,43\n"
+            "  16:  02c00693            li  a3,44\n"
+            "  1a:  9f82                jalr    t6\n"
+            "  1c:  8faa                mv  t6,a0\n"
+            "  1e:  4082                lw  ra,0(sp)\n"
+            "  20:  4512                lw  a0,4(sp)\n"
+            "  22:  45a2                lw  a1,8(sp)\n"
+            "  24:  4632                lw  a2,12(sp)\n"
+            "  26:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, ?PRIM_ALLOCATE, [ctx, jit_state, 16, 32, 2]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  01462f83            lw  t6,20(a2)\n"
+            "   4:  4641                li  a2,16\n"
+            "   6:  02000693            li  a3,32\n"
+            "   a:  4709                li  a4,2\n"
+            "   c:  8f82                jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_6_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get bin_ptr from x_reg 0 (similar to get_list_test pattern)
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:and_(State1, RegA, ?TERM_PRIMARY_CLEAR_MASK),
+    % Get another register for the last parameter to test {free, Reg} handling
+    {State3, OtherReg} = ?BACKEND:move_to_native_register(State2, {x_reg, 1}),
+    % Call PRIM_BITSTRING_EXTRACT_INTEGER with 6 arguments
+    {State4, _ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_BITSTRING_EXTRACT_INTEGER, [
+        ctx, jit_state, {free, RegA}, 64, 8, {free, OtherReg}
+    ]),
+    Stream = ?BACKEND:stream(State4),
+    Dump =
+        <<
+            "   0:  01852f83            lw  t6,24(a0)\n"
+            "   4:  4f0d                li  t5,3\n"
+            "   6:  ffff4f13            not t5,t5\n"
+            "   a:  01efffb3            and t6,t6,t5\n"
+            "   e:  01c52f03            lw  t5,28(a0)\n"
+            "  12:  0b800e93            li  t4,184\n"
+            "  16:  9eb2                add t4,t4,a2\n"
+            "  18:  000eae83            lw  t4,0(t4)\n"
+            "  1c:  1141                addi    sp,sp,-16\n"
+            "  1e:  c006                sw  ra,0(sp)\n"
+            "  20:  c22a                sw  a0,4(sp)\n"
+            "  22:  c42e                sw  a1,8(sp)\n"
+            "  24:  c632                sw  a2,12(sp)\n"
+            "  26:  867e                mv  a2,t6\n"
+            "  28:  04000693            li  a3,64\n"
+            "  2c:  4721                li  a4,8\n"
+            "  2e:  87fa                mv  a5,t5\n"
+            "  30:  9e82                jalr    t4\n"
+            "  32:  8eaa                mv  t4,a0\n"
+            "  34:  4082                lw  ra,0(sp)\n"
+            "  36:  4512                lw  a0,4(sp)\n"
+            "  38:  45a2                lw  a1,8(sp)\n"
+            "  3a:  4632                lw  a2,12(sp)\n"
+            "  3c:  0141                addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_extended_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:call_primitive(State0, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State2, RegB} = ?BACKEND:call_primitive(State1, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 20]),
+    {State3, RegC} = ?BACKEND:call_primitive(State2, ?PRIM_EXTENDED_REGISTER_PTR, [ctx, 19]),
+    {State4, ResultReg} = ?BACKEND:call_primitive(State3, ?PRIM_PUT_LIST, [
+        ctx, {free, {ptr, RegA}}, {free, {ptr, RegB}}
+    ]),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {ptr, RegC}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg, {ptr, RegC}]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump = <<
+        "   0:  04862f83            lw  t6,72(a2)\n"
+        "   4:  1141                addi    sp,sp,-16\n"
+        "   6:  c006                sw  ra,0(sp)\n"
+        "   8:  c22a                sw  a0,4(sp)\n"
+        "   a:  c42e                sw  a1,8(sp)\n"
+        "   c:  c632                sw  a2,12(sp)\n"
+        "   e:  45cd                li  a1,19\n"
+        "  10:  9f82                jalr    t6\n"
+        "  12:  8faa                mv  t6,a0\n"
+        "  14:  4082                lw  ra,0(sp)\n"
+        "  16:  4512                lw  a0,4(sp)\n"
+        "  18:  45a2                lw  a1,8(sp)\n"
+        "  1a:  4632                lw  a2,12(sp)\n"
+        "  1c:  0141                addi    sp,sp,16\n"
+        "  1e:  04862f03            lw  t5,72(a2)\n"
+        "  22:  1101                addi    sp,sp,-32\n"
+        "  24:  c006                sw  ra,0(sp)\n"
+        "  26:  c22a                sw  a0,4(sp)\n"
+        "  28:  c42e                sw  a1,8(sp)\n"
+        "  2a:  c632                sw  a2,12(sp)\n"
+        "  2c:  c87e                sw  t6,16(sp)\n"
+        "  2e:  45d1                li  a1,20\n"
+        "  30:  9f02                jalr    t5\n"
+        "  32:  8f2a                mv  t5,a0\n"
+        "  34:  4082                lw  ra,0(sp)\n"
+        "  36:  4512                lw  a0,4(sp)\n"
+        "  38:  45a2                lw  a1,8(sp)\n"
+        "  3a:  4632                lw  a2,12(sp)\n"
+        "  3c:  4fc2                lw  t6,16(sp)\n"
+        "  3e:  02010113            addi    sp,sp,32\n"
+        "  42:  04862e83            lw  t4,72(a2)\n"
+        "  46:  1101                addi    sp,sp,-32\n"
+        "  48:  c006                sw  ra,0(sp)\n"
+        "  4a:  c22a                sw  a0,4(sp)\n"
+        "  4c:  c42e                sw  a1,8(sp)\n"
+        "  4e:  c632                sw  a2,12(sp)\n"
+        "  50:  c87a                sw  t5,16(sp)\n"
+        "  52:  ca7e                sw  t6,20(sp)\n"
+        "  54:  45cd                li  a1,19\n"
+        "  56:  9e82                jalr    t4\n"
+        "  58:  8eaa                mv  t4,a0\n"
+        "  5a:  4082                lw  ra,0(sp)\n"
+        "  5c:  4512                lw  a0,4(sp)\n"
+        "  5e:  45a2                lw  a1,8(sp)\n"
+        "  60:  4632                lw  a2,12(sp)\n"
+        "  62:  4f42                lw  t5,16(sp)\n"
+        "  64:  4fd2                lw  t6,20(sp)\n"
+        "  66:  02010113            addi    sp,sp,32\n"
+        "  6a:  03462e03            lw  t3,52(a2)\n"
+        "  6e:  1101                addi    sp,sp,-32\n"
+        "  70:  c006                sw  ra,0(sp)\n"
+        "  72:  c22a                sw  a0,4(sp)\n"
+        "  74:  c42e                sw  a1,8(sp)\n"
+        "  76:  c632                sw  a2,12(sp)\n"
+        "  78:  c876                sw  t4,16(sp)\n"
+        "  7a:  000fa583            lw  a1,0(t6)\n"
+        "  7e:  000f2603            lw  a2,0(t5)\n"
+        "  82:  9e02                jalr    t3\n"
+        "  84:  8e2a                mv  t3,a0\n"
+        "  86:  4082                lw  ra,0(sp)\n"
+        "  88:  4512                lw  a0,4(sp)\n"
+        "  8a:  45a2                lw  a1,8(sp)\n"
+        "  8c:  4632                lw  a2,12(sp)\n"
+        "  8e:  4ec2                lw  t4,16(sp)\n"
+        "  90:  02010113            addi    sp,sp,32\n"
+        "  94:  01cea023            sw  t3,0(t4)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_few_free_regs_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, 1),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, 2),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, 3),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, 4),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, 5),
+    {State6, ResultReg} = ?BACKEND:call_primitive(State5, ?PRIM_BITSTRING_INSERT_INTEGER, [
+        t5, t6, {free, t3}, t4, {free, t2}
+    ]),
+    State7 = ?BACKEND:free_native_registers(State6, [ResultReg, t5, t6, t4]),
+    ?BACKEND:assert_all_native_free(State7),
+    Stream = ?BACKEND:stream(State7),
+    Dump = <<
+        "   0:  4f85                li  t6,1\n"
+        "   2:  4f09                li  t5,2\n"
+        "   4:  4e8d                li  t4,3\n"
+        "   6:  4e11                li  t3,4\n"
+        "   8:  4395                li  t2,5\n"
+        "   a:  0e400313            li  t1,228\n"
+        "   e:  9332                add t1,t1,a2\n"
+        "  10:  00032303            lw  t1,0(t1)\n"
+        "  14:  1101                addi    sp,sp,-32\n"
+        "  16:  c006                sw  ra,0(sp)\n"
+        "  18:  c22a                sw  a0,4(sp)\n"
+        "  1a:  c42e                sw  a1,8(sp)\n"
+        "  1c:  c632                sw  a2,12(sp)\n"
+        "  1e:  c876                sw  t4,16(sp)\n"
+        "  20:  ca7a                sw  t5,20(sp)\n"
+        "  22:  cc7e                sw  t6,24(sp)\n"
+        "  24:  857a                mv  a0,t5\n"
+        "  26:  85fe                mv  a1,t6\n"
+        "  28:  8672                mv  a2,t3\n"
+        "  2a:  86f6                mv  a3,t4\n"
+        "  2c:  871e                mv  a4,t2\n"
+        "  2e:  9302                jalr    t1\n"
+        "  30:  832a                mv  t1,a0\n"
+        "  32:  4082                lw  ra,0(sp)\n"
+        "  34:  4512                lw  a0,4(sp)\n"
+        "  36:  45a2                lw  a1,8(sp)\n"
+        "  38:  4632                lw  a2,12(sp)\n"
+        "  3a:  4ec2                lw  t4,16(sp)\n"
+        "  3c:  4f52                lw  t5,20(sp)\n"
+        "  3e:  4fe2                lw  t6,24(sp)\n"
+        "  40:  02010113            addi    sp,sp,32"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_only_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, -1]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0:  0085af83            lw  t6,8(a1)\n"
+        "   4:  1ffd                addi    t6,t6,-1\n"
+        "   6:  01f5a423            sw  t6,8(a1)\n"
+        "   a:  000f9b63            bnez    t6,0x20\n"
+        "   e:  00000f97            auipc   t6,0x0\n"
+        "  12:  0fc9                addi    t6,t6,18 # 0x20\n"
+        "  14:  0001                nop\n"
+        "  16:  01f5a223            sw  t6,4(a1)\n"
+        "  1a:  00862f83            lw  t6,8(a2)\n"
+        "  1e:  8f82                jr  t6\n"
+        "  20:  01062f83            lw  t6,16(a2)\n"
+        "  24:  02400613            li  a2,36\n"
+        "  28:  4689                li  a3,2\n"
+        "  2a:  4709                li  a4,2\n"
+        "  2c:  57fd                li  a5,-1\n"
+        "  2e:  8f82                jr  t6"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_last_5_args_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?CASE_CLAUSE_ATOM, {free, RegA}
+    ]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0: 01852f83            lw  t6,24(a0)\n"
+        "   4: 04c62f03            lw  t5,76(a2)\n"
+        "   8: 4621                li  a2,8\n"
+        "   a: 2cb00693            li  a3,715\n"
+        "   e: 877e                mv  a4,t6\n"
+        "  10: 8f02                jr  t5"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_CALL_EXT, [ctx, jit_state, offset, 2, 2, 10]),
+    Stream = ?BACKEND:stream(State2),
+    Dump = <<
+        "   0: 0085af83            lw  t6,8(a1)\n"
+        "   4: 1ffd                    addi    t6,t6,-1\n"
+        "   6: 01f5a423            sw  t6,8(a1)\n"
+        "   a: 000f9b63            bnez    t6,0x20\n"
+        "   e: 00000f97            auipc   t6,0x0\n"
+        "   12:    0fc9                    addi    t6,t6,18 # 0x20\n"
+        "   14:    0001                    nop\n"
+        "   16:    01f5a223            sw  t6,4(a1)\n"
+        "   1a:    00862f83            lw  t6,8(a2)\n"
+        "   1e:    8f82                    jr  t6\n"
+        "   20:    01062f83            lw  t6,16(a2)\n"
+        "   24:    02400613            li  a2,36\n"
+        "   28:    4689                    li  a3,2\n"
+        "   2a:    4709                    li  a4,2\n"
+        "   2c:    47a9                    li  a5,10\n"
+        "   2e:    8f82                    jr  t6"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_primitive_last_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:call_primitive_last(State0, 0, [ctx, jit_state, 42]),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0: 00062f83            lw  t6,0(a2)\n"
+            "   4: 02a00613            li  a2,42\n"
+            "   8: 8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+return_if_not_equal_to_ctx_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(t6, ResultReg),
+                    State2 = ?BACKEND:return_if_not_equal_to_ctx(State1, {free, ResultReg}),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump =
+                        <<
+                            "   0:  05462f83            lw  t6,84(a2)\n"
+                            "   4:  1141                addi    sp,sp,-16\n"
+                            "   6:  c006                sw  ra,0(sp)\n"
+                            "   8:  c22a                sw  a0,4(sp)\n"
+                            "   a:  c42e                sw  a1,8(sp)\n"
+                            "   c:  c632                sw  a2,12(sp)\n"
+                            "   e:  9f82                jalr    t6\n"
+                            "  10:  8faa                mv  t6,a0\n"
+                            "  12:  4082                lw  ra,0(sp)\n"
+                            "  14:  4512                lw  a0,4(sp)\n"
+                            "  16:  45a2                lw  a1,8(sp)\n"
+                            "  18:  4632                lw  a2,12(sp)\n"
+                            "  1a:  0141                addi    sp,sp,16\n"
+                            "  1c:  00af8463            beq t6,a0,0x24\n"
+                            "  20:  857e                mv  a0,t6\n"
+                            "  22:  8082                ret"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State1, ResultReg} = ?BACKEND:call_primitive(
+                        State0, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+                            ctx, jit_state
+                        ]
+                    ),
+                    ?assertEqual(t6, ResultReg),
+                    {State2, OtherReg} = ?BACKEND:copy_to_native_register(State1, ResultReg),
+                    ?assertEqual(t5, OtherReg),
+                    State3 = ?BACKEND:return_if_not_equal_to_ctx(State2, {free, OtherReg}),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump =
+                        <<
+                            "   0:  05462f83            lw  t6,84(a2)\n"
+                            "   4:  1141                addi    sp,sp,-16\n"
+                            "   6:  c006                sw  ra,0(sp)\n"
+                            "   8:  c22a                sw  a0,4(sp)\n"
+                            "   a:  c42e                sw  a1,8(sp)\n"
+                            "   c:  c632                sw  a2,12(sp)\n"
+                            "   e:  9f82                jalr    t6\n"
+                            "  10:  8faa                mv  t6,a0\n"
+                            "  12:  4082                lw  ra,0(sp)\n"
+                            "  14:  4512                lw  a0,4(sp)\n"
+                            "  16:  45a2                lw  a1,8(sp)\n"
+                            "  18:  4632                lw  a2,12(sp)\n"
+                            "  1a:  0141                addi    sp,sp,16\n"
+                            "  1c:  8f7e                mv  t5,t6\n"
+                            "  1e:  00af0463            beq t5,a0,0x26\n"
+                            "  22:  857a                mv  a0,t5\n"
+                            "  24:  8082                ret"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_cp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:move_to_cp(State0, {y_reg, 0}),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  01452f03            lw  t5,20(a0)\n"
+            "   4:  000f2f83            lw  t6,0(t5)\n"
+            "   8:  05f52e23            sw  t6,92(a0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+increment_sp_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:increment_sp(State0, 7),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0: 01452f83            lw  t6,20(a0)\n"
+            "   4: 0ff1                addi    t6,t6,28\n"
+            "   6: 01f52a23            sw  t6,20(a0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+if_block_test_() ->
+    {setup,
+        fun() ->
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, RegB} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+            {State2, RegA, RegB}
+        end,
+        fun({State0, RegA, RegB}) ->
+            [
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000fd363            bgez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', RegB},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  01efd363            bge t6,t5,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  02a00e93            li  t4,42\n"
+                        "   c:  01dfd363            bge t6,t4,0x12\n"
+                        "  10:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '<', 1024},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  40000e93            li  t4,1024\n"
+                        "   c:  01dfd363            bge t6,t4,0x12\n"
+                        "  10:  0f09                addi    t5,t5,2\n"
+                        "  12:  a0fd                j   0x100"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', -1},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  5efd                li  t4,-1\n"
+                        "   a:  01df9363            bne t6,t4,0x10\n"
+                        "   e:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  000f9363            bnez    t6,0xe\n"
+                        "   c:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  03b00e93            li  t4,59\n"
+                        "   c:  01df8363            beq t6,t4,0x12\n"
+                        "  10:  0f09                addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '!=', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03b00e93            li  t4,59\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    % Test large immediate (1995) that requires temporary register
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '!=', 1995},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 1)
+                        end
+                    ),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   7cb00e93            li  t4,1995\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f05                    addi    t5,t5,1\n"
+                        "     12:   a0fd                    j   0x100"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '!=', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df8363            beq t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03b00e93            li  t4,59\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '==', ?TERM_NIL},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03b00e93            li  t4,59\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', RegA, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(int)', {free, RegA}, '==', 42},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   02a00e93            li  t4,42\n"
+                        "      c:   01df9363            bne t6,t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ec363            bltz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '==', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ec363            bltz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', RegA, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ed363            bgez    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {'(bool)', {free, RegA}, '!=', false},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   01ff9e93            slli    t4,t6,0x1f\n"
+                        "      c:   000ed363            bgez    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   007ffe93            andi    t4,t6,7\n"
+                        "      c:   000e8363            beqz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#5, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   005ffe93            andi    t4,t6,5\n"
+                        "      c:   000e8363            beqz    t4,0x12\n"
+                        "     10:   0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', 16#7, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  007ffe93            andi    t4,t6,7\n"
+                        "   c:  000e8363            beqz    t4,0x12\n"
+                        "  10:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   ffffce93            not t4,t6\n"
+                        "      c:   0ef2                    slli    t4,t4,0x1c\n"
+                        "      e:   000e8363            beqz    t4,0x14\n"
+                        "      12:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  ffffcf93            not t6,t6\n"
+                        "   c:  0ff2                    slli    t6,t6,0x1c\n"
+                        "   e:  000f8363            beqz    t6,0x14\n"
+                        "  12:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  8efe                    mv  t4,t6\n"
+                        "   a:  03f00e13            li  t3,63\n"
+                        "   e:  01cefeb3            and t4,t4,t3\n"
+                        "  12:  4e21                    li  t3,8\n"
+                        "  14:  01ce8363            beq t4,t3,0x1a\n"
+                        "  18:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {{free, RegA}, '<', RegB},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01c52f03            lw  t5,28(a0)\n"
+                        "   8:  01efd363            bge t6,t5,0xe\n"
+                        "   c:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {
+                            {free, RegA},
+                            '&',
+                            ?TERM_BOXED_TAG_MASK,
+                            '!=',
+                            ?TERM_BOXED_POSITIVE_INTEGER
+                        },
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   03f00e93            li  t4,63\n"
+                        "      c:   01dfffb3            and t6,t6,t4\n"
+                        "      10:  4ea1                    li  t4,8\n"
+                        "      12:  01df8363            beq t6,t4,0x18\n"
+                        "      16:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB], ?BACKEND:used_regs(State1))
+                end),
+                %% Test {RegA, '&', 16#3, '!=', 0} using ANDI instruction
+                ?_test(begin
+                    State1 = ?BACKEND:if_block(
+                        State0,
+                        {RegA, '&', 16#3, '!=', 0},
+                        fun(BSt0) ->
+                            ?BACKEND:add(BSt0, RegB, 2)
+                        end
+                    ),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01c52f03            lw  t5,28(a0)\n"
+                        "      8:   003ffe93            andi    t4,t6,3\n"
+                        "      c:   000e8363            beqz    t4,0x12\n"
+                        "      10:  0f09                    addi    t5,t5,2"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual([RegB, RegA], ?BACKEND:used_regs(State1))
+                end)
+            ]
+        end}.
+
+if_else_block_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg1} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, Reg2} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    State3 = ?BACKEND:if_else_block(
+        State2,
+        {Reg1, '==', ?TERM_NIL},
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 2)
+        end,
+        fun(BSt0) ->
+            ?BACKEND:add(BSt0, Reg2, 4)
+        end
+    ),
+    Stream = ?BACKEND:stream(State3),
+    Dump =
+        <<
+            "0: 01852f83            lw  t6,24(a0)\n"
+            "4: 01c52f03            lw  t5,28(a0)\n"
+            "8: 03b00e93            li  t4,59\n"
+            "c: 01df9463            bne t6,t4,0x14\n"
+            "10:    0f09                    addi    t5,t5,2\n"
+            "12:    a011                    j   0x16\n"
+            "14:    0f11                    addi    t5,t5,4"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+shift_right_test_() ->
+    [
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:  01852f83            lw  t6,24(a0)\n"
+                    "   4:  003fdf93            srli    t6,t6,0x3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end),
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3),
+            ?assertNotEqual(OtherReg, Reg),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:  01852f83            lw  t6,24(a0)\n"
+                    "   4:  003fdf13            srli    t5,t6,0x3"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end)
+    ].
+
+shift_left_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:shift_left(State1, Reg, 3),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "0: 01852f83            lw  t6,24(a0)\n"
+            "4: 0f8e                    slli    t6,t6,0x3"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_only_or_schedule_next_and_label_relocation_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    Dump =
+        <<
+            "   0:  00000697            auipc   a3,0x0\n"
+            "   4:  04668067            jr  70(a3) # 0x46\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  01068067            jr  16(a3) # 0x18\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  03068067            jr  48(a3) # 0x40\n"
+            "  18:  0085af83            lw  t6,8(a1)\n"
+            "  1c:  1ffd                    addi    t6,t6,-1\n"
+            "  1e:  01f5a423            sw  t6,8(a1)\n"
+            "  22:  000f8663            beqz    t6,0x2e\n"
+            "  26:  a829                    j   0x40\n"
+            "  28:  0001                    nop\n"
+            "  2a:  00000013            nop\n"
+            "  2e:  00000f97            auipc   t6,0x0\n"
+            "  32:  0fd1                    addi    t6,t6,20 # 0x42\n"
+            "  34:  0001                    nop\n"
+            "  36:  01f5a223            sw  t6,4(a1)\n"
+            "  3a:  00862f83            lw  t6,8(a2)\n"
+            "  3e:  8f82                    jr  t6\n"
+            "  40:  00062f83            lw  t6,0(a2)\n"
+            "  44:  8f82                    jr  t6\n"
+            "  46:  00462f83            lw  t6,4(a2)\n"
+            "  4a:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test with large gap (256+ bytes) to force mov_immediate path
+call_only_or_schedule_next_and_label_relocation_large_gap_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 2),
+    % Add large padding by emitting many move_to_native_register operations
+    % This creates a large gap between the jump table and the actual function bodies
+    % Each operation emits ~2 bytes, so 128 operations = ~256 bytes
+    StatePadded = lists:foldl(
+        fun(_, S) ->
+            ?BACKEND:move_to_native_register(S, {x_reg, 2}, a3)
+        end,
+        State1,
+        lists:seq(1, 128)
+    ),
+    State2 = ?BACKEND:add_label(StatePadded, 1),
+    State3 = ?BACKEND:call_only_or_schedule_next(State2, 2),
+    State4 = ?BACKEND:add_label(State3, 2),
+    State5 = ?BACKEND:call_primitive_last(State4, 0, [ctx, jit_state]),
+    % OP_INT_CALL_END
+    State6 = ?BACKEND:add_label(State5, 0),
+    State7 = ?BACKEND:call_primitive_last(State6, 1, [ctx, jit_state]),
+    State8 = ?BACKEND:update_branches(State7),
+    Stream = ?BACKEND:stream(State8),
+    % Extract the final section starting at 0x118 (after jump table 24 bytes + 128 loads 256 bytes)
+    % RISC-V: Jump table is 3×8=24 bytes, loads are 2 bytes each (compressed)
+    Dump = <<
+        "   0:  0085af83            lw  t6,8(a1)\n"
+        "   4:  1ffd                    addi    t6,t6,-1\n"
+        "   6:  01f5a423            sw  t6,8(a1)\n"
+        "   a:  000f8663            beqz    t6,0x16\n"
+        "   e:  a829                    j   0x28\n"
+        "  10:  0001                    nop\n"
+        "  12:  00000013            nop\n"
+        "  16:  00000f97            auipc   t6,0x0\n"
+        "  1a:  0fd1                    addi    t6,t6,20 # 0x2a\n"
+        "  1c:  0001                    nop\n"
+        "  1e:  01f5a223            sw  t6,4(a1)\n"
+        "  22:  00862f83            lw  t6,8(a2)\n"
+        "  26:  8f82                    jr  t6\n"
+        "  28:  00062f83            lw  t6,0(a2)\n"
+        "  2c:  8f82                    jr  t6\n"
+        "  2e:  00462f83            lw  t6,4(a2)\n"
+        "  32:  8f82                    jr  t6"
+    >>,
+    {_, RelevantBinary} = split_binary(Stream, 16#118),
+    ?assertEqual(dump_to_bin(Dump), RelevantBinary).
+
+call_bif_with_large_literal_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, 8, [jit_state, 2]),
+    {State2, ArgReg} = ?BACKEND:call_primitive(State1, 15, [ctx, 998238357]),
+    {State3, ResultReg} = ?BACKEND:call_func_ptr(State2, {free, FuncPtr}, [
+        ctx, 0, 1, {free, {x_reg, 0}}, {free, ArgReg}
+    ]),
+    State4 = ?BACKEND:if_block(State3, {ResultReg, '==', 0}, fun(BSt0) ->
+        ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset])
+    end),
+    State5 = ?BACKEND:move_to_vm_register(State4, ResultReg, {x_reg, 0}),
+    State6 = ?BACKEND:free_native_registers(State5, [ResultReg]),
+    ?BACKEND:assert_all_native_free(State6),
+    Stream = ?BACKEND:stream(State6),
+    Dump =
+        <<
+            "      0:   02062f83            lw  t6,32(a2)\n"
+            "      4:   1141                    addi    sp,sp,-16\n"
+            "      6:   c006                    sw  ra,0(sp)\n"
+            "      8:   c22a                    sw  a0,4(sp)\n"
+            "      a:   c42e                    sw  a1,8(sp)\n"
+            "      c:   c632                    sw  a2,12(sp)\n"
+            "      e:   852e                    mv  a0,a1\n"
+            "     10:   4589                    li  a1,2\n"
+            "     12:   9f82                    jalr    t6\n"
+            "     14:   8faa                    mv  t6,a0\n"
+            "     16:   4082                    lw  ra,0(sp)\n"
+            "     18:   4512                    lw  a0,4(sp)\n"
+            "     1a:   45a2                    lw  a1,8(sp)\n"
+            "     1c:   4632                    lw  a2,12(sp)\n"
+            "     1e:   0141                    addi    sp,sp,16\n"
+            "     20:   03c62f03            lw  t5,60(a2)\n"
+            "     24:   1101                    addi    sp,sp,-32\n"
+            "     26:   c006                    sw  ra,0(sp)\n"
+            "     28:   c22a                    sw  a0,4(sp)\n"
+            "     2a:   c42e                    sw  a1,8(sp)\n"
+            "     2c:   c632                    sw  a2,12(sp)\n"
+            "     2e:   c87e                    sw  t6,16(sp)\n"
+            "     30:   3b7ff5b7            lui a1,0x3b7ff\n"
+            "     34:   89558593            addi    a1,a1,-1899 # 0x3b7fe895\n"
+            "     38:   9f02                    jalr    t5\n"
+            "     3a:   8f2a                    mv  t5,a0\n"
+            "     3c:   4082                    lw  ra,0(sp)\n"
+            "     3e:   4512                    lw  a0,4(sp)\n"
+            "     40:   45a2                    lw  a1,8(sp)\n"
+            "     42:   4632                    lw  a2,12(sp)\n"
+            "     44:   4fc2                    lw  t6,16(sp)\n"
+            "     46:   02010113            addi    sp,sp,32\n"
+            "     4a:   1141                    addi    sp,sp,-16\n"
+            "     4c:   c006                    sw  ra,0(sp)\n"
+            "     4e:   c22a                    sw  a0,4(sp)\n"
+            "     50:   c42e                    sw  a1,8(sp)\n"
+            "     52:   c632                    sw  a2,12(sp)\n"
+            "     54:   4581                    li  a1,0\n"
+            "     56:   4605                    li  a2,1\n"
+            "     58:   4d14                    lw  a3,24(a0)\n"
+            "     5a:   877a                    mv  a4,t5\n"
+            "     5c:   9f82                    jalr    t6\n"
+            "     5e:   8faa                    mv  t6,a0\n"
+            "     60:   4082                    lw  ra,0(sp)\n"
+            "     62:   4512                    lw  a0,4(sp)\n"
+            "     64:   45a2                    lw  a1,8(sp)\n"
+            "     66:   4632                    lw  a2,12(sp)\n"
+            "     68:   0141                    addi    sp,sp,16\n"
+            "     6a:   000f9763            bnez    t6,0x78\n"
+            "     6e:   01862f83            lw  t6,24(a2)\n"
+            "     72:   07200613            li  a2,114\n"
+            "     76:   8f82                    jr  t6\n"
+            "     78:   01f52c23            sw  t6,24(a0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+get_list_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:and_(State1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+    State3 = ?BACKEND:move_array_element(State2, Reg, 1, {y_reg, 1}),
+    State4 = ?BACKEND:move_array_element(State3, Reg, 0, {y_reg, 0}),
+    State5 = ?BACKEND:free_native_registers(State4, [Reg]),
+    ?BACKEND:assert_all_native_free(State5),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "0: 01852f83            lw  t6,24(a0)\n"
+            "4: 4f0d                    li  t5,3\n"
+            "6: ffff4f13            not t5,t5\n"
+            "a: 01efffb3            and t6,t6,t5\n"
+            "e: 004fae83            lw  t4,4(t6)\n"
+            "12:    01452f03            lw  t5,20(a0)\n"
+            "16:    01df2223            sw  t4,4(t5)\n"
+            "1a:    000fae83            lw  t4,0(t6)\n"
+            "1e:    01452f03            lw  t5,20(a0)\n"
+            "22:    01df2023            sw  t4,0(t5)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_integer_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
+    State2 = ?BACKEND:if_block(
+        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(MSt0) ->
+            MSt1 = ?BACKEND:if_block(
+                MSt0, {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            ),
+            MSt2 = ?BACKEND:and_(MSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            MSt3 = ?BACKEND:move_array_element(MSt2, Reg, 0, Reg),
+            ?BACKEND:if_block(
+                MSt3,
+                {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                fun(BSt0) ->
+                    ?BACKEND:jump_to_label(BSt0, Label)
+                end
+            )
+        end
+    ),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "   0:  01852f83            lw  t6,24(a0)\n"
+            "   4:  ffffcf13            not t5,t6\n"
+            "   8:  0f72                    slli    t5,t5,0x1c\n"
+            "   a:  020f0f63            beqz    t5,0x48\n"
+            "   e:  8f7e                    mv  t5,t6\n"
+            "  10:  4e8d                    li  t4,3\n"
+            "  12:  01df7f33            and t5,t5,t4\n"
+            "  16:  4e89                    li  t4,2\n"
+            "  18:  01df0663            beq t5,t4,0x24\n"
+            "  1c:  a0d5                    j   0x100\n"
+            "  1e:  0001                    nop\n"
+            "  20:  00000013            nop\n"
+            "  24:  4f0d                    li  t5,3\n"
+            "  26:  ffff4f13            not t5,t5\n"
+            "  2a:  01efffb3            and t6,t6,t5\n"
+            "  2e:  000faf83            lw  t6,0(t6)\n"
+            "  32:  03f00f13            li  t5,63\n"
+            "  36:  01efffb3            and t6,t6,t5\n"
+            "  3a:  4f21                    li  t5,8\n"
+            "  3c:  01ef8663            beq t6,t5,0x48\n"
+            "  40:  a0c1                    j   0x100\n"
+            "  42:  0001                    nop\n"
+            "  44:  00000013            nop"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+cond_jump_to_label(Cond, Label, MMod, MSt0) ->
+    MMod:if_block(MSt0, Cond, fun(BSt0) ->
+        MMod:jump_to_label(BSt0, Label)
+    end).
+
+%% Keep the unoptimized version to test the and case.
+is_number_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    Arg1 = {x_reg, 0},
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, Arg1),
+    State2 = ?BACKEND:if_block(
+        State1, {Reg, '&', ?TERM_IMMED_TAG_MASK, '!=', ?TERM_INTEGER_TAG}, fun(BSt0) ->
+            BSt1 = cond_jump_to_label(
+                {Reg, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, Label, ?BACKEND, BSt0
+            ),
+            BSt2 = ?BACKEND:and_(BSt1, Reg, ?TERM_PRIMARY_CLEAR_MASK),
+            BSt3 = ?BACKEND:move_array_element(BSt2, Reg, 0, Reg),
+            cond_jump_to_label(
+                {'and', [
+                    {Reg, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_POSITIVE_INTEGER},
+                    {{free, Reg}, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FLOAT}
+                ]},
+                Label,
+                ?BACKEND,
+                BSt3
+            )
+        end
+    ),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "   0:  01852f83            lw  t6,24(a0)\n"
+            "   4:  ffffcf13            not t5,t6\n"
+            "   8:  0f72                    slli    t5,t5,0x1c\n"
+            "   a:  040f0763            beqz    t5,0x58\n"
+            "   e:  8f7e                    mv  t5,t6\n"
+            "  10:  4e8d                    li  t4,3\n"
+            "  12:  01df7f33            and t5,t5,t4\n"
+            "  16:  4e89                    li  t4,2\n"
+            "  18:  01df0663            beq t5,t4,0x24\n"
+            "  1c:  a0d5                    j   0x100\n"
+            "  1e:  0001                    nop\n"
+            "  20:  00000013            nop\n"
+            "  24:  4f0d                    li  t5,3\n"
+            "  26:  ffff4f13            not t5,t5\n"
+            "  2a:  01efffb3            and t6,t6,t5\n"
+            "  2e:  000faf83            lw  t6,0(t6)\n"
+            "  32:  8f7e                    mv  t5,t6\n"
+            "  34:  03f00e93            li  t4,63\n"
+            "  38:  01df7f33            and t5,t5,t4\n"
+            "  3c:  4ea1                    li  t4,8\n"
+            "  3e:  01df0d63            beq t5,t4,0x58\n"
+            "  42:  03f00f13            li  t5,63\n"
+            "  46:  01efffb3            and t6,t6,t5\n"
+            "  4a:  4f61                    li  t5,24\n"
+            "  4c:  01ef8663            beq t6,t5,0x58\n"
+            "  50:  a845                    j   0x100\n"
+            "  52:  0001                    nop\n"
+            "  54:  00000013            nop"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#100),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump = <<
+        "   0:  01852f83            lw  t6,24(a0)\n"
+        "   4:  04b00f13            li  t5,75\n"
+        "   8:  01ef8963            beq t6,t5,0x1a\n"
+        "   c:  4f2d                    li  t5,11\n"
+        "   e:  01ef8663            beq t6,t5,0x1a\n"
+        "  12:  a0fd                    j   0x100\n"
+        "  14:  0001                    nop\n"
+        "  16:  00000013            nop"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    State2 = ?BACKEND:if_block(State1, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State3 = ?BACKEND:free_native_registers(State2, [Reg]),
+    ?BACKEND:assert_all_native_free(State3),
+    State4 = ?BACKEND:add_label(State3, Label, 16#1000),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "   0:  01852f83            lw  t6,24(a0)\n"
+            "   4:  04b00f13            li  t5,75\n"
+            "   8:  01ef8963            beq t6,t5,0x1a\n"
+            "   c:  4f2d                    li  t5,11\n"
+            "   e:  01ef8663            beq t6,t5,0x1a\n"
+            "  12:  7ef0006f            j   0x1000\n"
+            "  16:  00000013            nop"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+is_boolean_far_known_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    Label = 1,
+    State1 = ?BACKEND:add_label(State0, Label, 16#1000),
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, {x_reg, 0}),
+    State3 = ?BACKEND:if_block(State2, {Reg, '!=', ?TRUE_ATOM}, fun(BSt0) ->
+        ?BACKEND:if_block(BSt0, {Reg, '!=', ?FALSE_ATOM}, fun(BSt1) ->
+            ?BACKEND:jump_to_label(BSt1, Label)
+        end)
+    end),
+    State4 = ?BACKEND:free_native_registers(State3, [Reg]),
+    ?BACKEND:assert_all_native_free(State4),
+    State5 = ?BACKEND:update_branches(State4),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "   0:  01852f83            lw  t6,24(a0)\n"
+            "   4:  04b00f13            li  t5,75\n"
+            "   8:  01ef8963            beq t6,t5,0x1a\n"
+            "   c:  4f2d                    li  t5,11\n"
+            "   e:  01ef8663            beq t6,t5,0x1a\n"
+            "  12:  00001f17            auipc   t5,0x1\n"
+            "  16:  feef0067            jr  -18(t5) # 0x1000"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT_TIMEOUT pattern that uses set_continuation_to_offset and continuation_entry_point
+wait_timeout_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    Label = 42,
+    {State1, OffsetRef0} = ?BACKEND:set_continuation_to_offset(State0),
+    {State2, TimeoutReg} = ?BACKEND:move_to_native_register(State1, 5000),
+    State3 = ?BACKEND:call_primitive_last(State2, ?PRIM_WAIT_TIMEOUT, [
+        ctx, jit_state, {free, TimeoutReg}, Label
+    ]),
+    State4 = ?BACKEND:add_label(State3, OffsetRef0),
+    State5 = ?BACKEND:continuation_entry_point(State4),
+    {State6, ResultReg0} = ?BACKEND:call_primitive(State5, ?PRIM_PROCESS_SIGNAL_MESSAGES, [
+        ctx, jit_state
+    ]),
+    State7 = ?BACKEND:return_if_not_equal_to_ctx(State6, {free, ResultReg0}),
+    % ?WAITING_TIMEOUT_EXPIRED
+    {State8, ResultReg1} = ?BACKEND:call_primitive(State7, ?PRIM_CONTEXT_GET_FLAGS, [ctx, 2]),
+    State9 = ?BACKEND:if_block(State8, {{free, ResultReg1}, '==', 0}, fun(BlockSt) ->
+        ?BACKEND:call_primitive_last(BlockSt, ?PRIM_WAIT_TIMEOUT_TRAP_HANDLER, [
+            ctx, jit_state, Label
+        ])
+    end),
+    State10 = ?BACKEND:update_branches(State9),
+
+    Stream = ?BACKEND:stream(State10),
+    Dump =
+        <<
+            "   0:  00000f97            auipc   t6,0x0\n"
+            "   4:  020f8f93            addi    t6,t6,32 # 0x20\n"
+            "   8:  01f5a223            sw  t6,4(a1)\n"
+            "   c:  6f85                    lui t6,0x1\n"
+            "   e:  388f8f93            addi    t6,t6,904 # 0x1388\n"
+            "  12:  07862f03            lw  t5,120(a2)\n"
+            "  16:  867e                    mv  a2,t6\n"
+            "  18:  02a00693            li  a3,42\n"
+            "  1c:  8f02                    jr  t5\n"
+            "  1e:  05462f83            lw  t6,84(a2)\n"
+            "  22:  1141                    addi    sp,sp,-16\n"
+            "  24:  c006                    sw  ra,0(sp)\n"
+            "  26:  c22a                    sw  a0,4(sp)\n"
+            "  28:  c42e                    sw  a1,8(sp)\n"
+            "  2a:  c632                    sw  a2,12(sp)\n"
+            "  2c:  9f82                    jalr    t6\n"
+            "  2e:  8faa                    mv  t6,a0\n"
+            "  30:  4082                    lw  ra,0(sp)\n"
+            "  32:  4512                    lw  a0,4(sp)\n"
+            "  34:  45a2                    lw  a1,8(sp)\n"
+            "  36:  4632                    lw  a2,12(sp)\n"
+            "  38:  0141                    addi    sp,sp,16\n"
+            "  3a:  00af8463            beq t6,a0,0x42\n"
+            "  3e:  857e                    mv  a0,t6\n"
+            "  40:  8082                    ret\n"
+            "  42:  08400f93            li  t6,132\n"
+            "  46:  9fb2                    add t6,t6,a2\n"
+            "  48:  000faf83            lw  t6,0(t6)\n"
+            "  4c:  1141                    addi    sp,sp,-16\n"
+            "  4e:  c006                    sw  ra,0(sp)\n"
+            "  50:  c22a                    sw  a0,4(sp)\n"
+            "  52:  c42e                    sw  a1,8(sp)\n"
+            "  54:  c632                    sw  a2,12(sp)\n"
+            "  56:  4589                    li  a1,2\n"
+            "  58:  9f82                    jalr    t6\n"
+            "  5a:  8faa                    mv  t6,a0\n"
+            "  5c:  4082                    lw  ra,0(sp)\n"
+            "  5e:  4512                    lw  a0,4(sp)\n"
+            "  60:  45a2                    lw  a1,8(sp)\n"
+            "  62:  4632                    lw  a2,12(sp)\n"
+            "  64:  0141                    addi    sp,sp,16\n"
+            "  66:  000f9763            bnez    t6,0x74\n"
+            "  6a:  07c62f83            lw  t6,124(a2)\n"
+            "  6e:  02a00613            li  a2,42\n"
+            "  72:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test OP_WAIT pattern that uses set_continuation_to_label
+wait_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    State1 = ?BACKEND:jump_table(State0, 5),
+    State2 = ?BACKEND:add_label(State1, 1),
+    Label = 2,
+    State3 = ?BACKEND:set_continuation_to_label(State2, Label),
+    State4 = ?BACKEND:call_primitive_last(State3, ?PRIM_SCHEDULE_WAIT_CP, [ctx, jit_state]),
+
+    Stream = ?BACKEND:stream(State4),
+    Dump =
+        <<
+            "   0:  ffffffff            .insn   4, 0xffffffff\n"
+            "   4:  ffffffff            .insn   4, 0xffffffff\n"
+            "   6:  ffffffff            .insn   4, 0xffffffff\n"
+            "   a:  ffffffff            .insn   4, 0xffffffff\n"
+            "   c:  ffffffff            .insn   4, 0xffffffff\n"
+            "  10:  ffffffff            .insn   4, 0xffffffff\n"
+            "  12:  ffffffff            .insn   4, 0xffffffff\n"
+            "  16:  ffffffff            .insn   4, 0xffffffff\n"
+            "  18:  ffffffff            .insn   4, 0xffffffff\n"
+            "  1c:  ffffffff            .insn   4, 0xffffffff\n"
+            "  1e:  ffffffff            .insn   4, 0xffffffff\n"
+            "  22:  ffffffff            .insn   4, 0xffffffff\n"
+            "  24:  ffffffff            .insn   4, 0xffffffff\n"
+            "  28:  ffffffff            .insn   4, 0xffffffff\n"
+            "  2c:  01f5a223            sw  t6,4(a1)\n"
+            "  30:  07462f83            lw  t6,116(a2)\n"
+            "  34:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test return_labels_and_lines/2 function
+return_labels_and_lines_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    % Test return_labels_and_lines with some sample labels and lines
+    State1 = ?BACKEND:add_label(State0, 2, 32),
+    State2 = ?BACKEND:add_label(State1, 1, 16),
+
+    % {Line, Offset} pairs
+    SortedLines = [{10, 16}, {20, 32}],
+
+    State3 = ?BACKEND:return_labels_and_lines(State2, SortedLines),
+    Stream = ?BACKEND:stream(State3),
+
+    % Should have generated auipc + addi + ret + labels table + lines table
+    % auipc = 4 bytes, addi = 2 bytes (compressed), ret = 2 bytes, labels table = 3*2*2 = 12 bytes, lines table = 3*2*2 = 12 bytes
+    % Total: 4 + 2 + 2 + 12 + 12 = 32 bytes
+    ?assert(byte_size(Stream) >= 32),
+
+    % Expected: auipc a0, 0 + addi a0, a0, 12 + ret + labels table + lines table
+    % The data tables start at offset 0x8 (8), but we need offset 0xC (12) because of alignment
+    Dump =
+        <<
+            "   0:  00000517            auipc   a0,0x0\n"
+            "   4:  0531                    addi    a0,a0,12 # 0xc\n"
+            "   6:  8082                    ret\n"
+            "   8:  0200                    addi    s0,sp,256\n"
+            "   a:  0100                    addi    s0,sp,128\n"
+            "   c:  0000                    unimp\n"
+            "   e:  1000                    addi    s0,sp,32\n"
+            "  10:  0200                    addi    s0,sp,256\n"
+            "  12:  0000                    unimp\n"
+            "  14:  2000                    fld fs0,0(s0)\n"
+            "  16:  0200                    addi    s0,sp,256\n"
+            "  18:  0a00                    addi    s0,sp,272\n"
+            "  1a:  0000                    unimp\n"
+            "  1c:  1000                    addi    s0,sp,32\n"
+            "  1e:  1400                    addi    s0,sp,544\n"
+            "  20:  0000                    unimp\n"
+            "  22:  2000                    fld fs0,0(s0)"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test call_primitive with {free, {x_reg, X}}
+gc_bif2_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_GET_IMPORTED_BIF, [jit_state, 42]),
+    {State2, _ResultReg} = ?BACKEND:call_func_ptr(State1, {free, FuncPtr}, [
+        ctx, 0, 3, {y_reg, 0}, {free, {x_reg, 0}}
+    ]),
+
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:  02062f83            lw  t6,32(a2)\n"
+            "   4:  1141                    addi    sp,sp,-16\n"
+            "   6:  c006                    sw  ra,0(sp)\n"
+            "   8:  c22a                    sw  a0,4(sp)\n"
+            "   a:  c42e                    sw  a1,8(sp)\n"
+            "   c:  c632                    sw  a2,12(sp)\n"
+            "   e:  852e                    mv  a0,a1\n"
+            "  10:  02a00593            li  a1,42\n"
+            "  14:  9f82                    jalr    t6\n"
+            "  16:  8faa                    mv  t6,a0\n"
+            "  18:  4082                    lw  ra,0(sp)\n"
+            "  1a:  4512                    lw  a0,4(sp)\n"
+            "  1c:  45a2                    lw  a1,8(sp)\n"
+            "  1e:  4632                    lw  a2,12(sp)\n"
+            "  20:  0141                    addi    sp,sp,16\n"
+            "  22:  1141                    addi    sp,sp,-16\n"
+            "  24:  c006                    sw  ra,0(sp)\n"
+            "  26:  c22a                    sw  a0,4(sp)\n"
+            "  28:  c42e                    sw  a1,8(sp)\n"
+            "  2a:  c632                    sw  a2,12(sp)\n"
+            "  2c:  4581                    li  a1,0\n"
+            "  2e:  460d                    li  a2,3\n"
+            "  30:  01452f03            lw  t5,20(a0)\n"
+            "  34:  000f2683            lw  a3,0(t5)\n"
+            "  38:  4d18                    lw  a4,24(a0)\n"
+            "  3a:  9f82                    jalr    t6\n"
+            "  3c:  8faa                    mv  t6,a0\n"
+            "  3e:  4082                    lw  ra,0(sp)\n"
+            "  40:  4512                    lw  a0,4(sp)\n"
+            "  42:  45a2                    lw  a1,8(sp)\n"
+            "  44:  4632                    lw  a2,12(sp)\n"
+            "  46:  0141                    addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test case where parameter value is in a1
+memory_ensure_free_with_roots_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, _FuncPtr} = ?BACKEND:call_primitive(State0, ?PRIM_MEMORY_ENSURE_FREE_WITH_ROOTS, [
+        ctx, jit_state, {free, a1}, 4, 1
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "   0:  0b000f93            li  t6,176\n"
+            "   4:  9fb2                    add t6,t6,a2\n"
+            "   6:  000faf83            lw  t6,0(t6)\n"
+            "   a:  1141                    addi    sp,sp,-16\n"
+            "   c:  c006                    sw  ra,0(sp)\n"
+            "   e:  c22a                    sw  a0,4(sp)\n"
+            "  10:  c42e                    sw  a1,8(sp)\n"
+            "  12:  c632                    sw  a2,12(sp)\n"
+            "  14:  8f2e                    mv  t5,a1\n"
+            "  16:  867a                    mv  a2,t5\n"
+            "  18:  4691                    li  a3,4\n"
+            "  1a:  4705                    li  a4,1\n"
+            "  1c:  9f82                    jalr    t6\n"
+            "  1e:  8faa                    mv  t6,a0\n"
+            "  20:  4082                    lw  ra,0(sp)\n"
+            "  22:  4512                    lw  a0,4(sp)\n"
+            "  24:  45a2                    lw  a1,8(sp)\n"
+            "  26:  4632                    lw  a2,12(sp)\n"
+            "  28:  0141                    addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_ext_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    State2 = ?BACKEND:call_primitive_with_cp(State1, 4, [ctx, jit_state, 2, 5, -1]),
+    ?BACKEND:assert_all_native_free(State2),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "   0:  0085af83            lw  t6,8(a1)\n"
+            "   4:  1ffd                    addi    t6,t6,-1\n"
+            "   6:  01f5a423            sw  t6,8(a1)\n"
+            "   a:  000f9b63            bnez    t6,0x20\n"
+            "   e:  00000f97            auipc   t6,0x0\n"
+            "  12:  0fc9                    addi    t6,t6,18 # 0x20\n"
+            "  14:  0001                    nop\n"
+            "  16:  01f5a223            sw  t6,4(a1)\n"
+            "  1a:  00862f83            lw  t6,8(a2)\n"
+            "  1e:  8f82                    jr  t6\n"
+            "  20:  0005af03            lw  t5,0(a1)\n"
+            "  24:  000f2f03            lw  t5,0(t5)\n"
+            "  28:  0f62                    slli    t5,t5,0x18\n"
+            "  2a:  11800f93            li  t6,280\n"
+            "  2e:  00000013            nop\n"
+            "  32:  01ff6f33            or  t5,t5,t6\n"
+            "  36:  05e52e23            sw  t5,92(a0)\n"
+            "  3a:  01062f83            lw  t6,16(a2)\n"
+            "  3e:  4609                    li  a2,2\n"
+            "  40:  4695                    li  a3,5\n"
+            "  42:  577d                    li  a4,-1\n"
+            "  44:  8f82                    jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+call_fun_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:decrement_reductions_and_maybe_schedule_next(State0),
+    FuncReg = {x_reg, 0},
+    ArgsCount = 0,
+    {State2, Reg} = ?BACKEND:move_to_native_register(State1, FuncReg),
+    {State3, RegCopy} = ?BACKEND:copy_to_native_register(State2, Reg),
+    State4 = ?BACKEND:if_block(
+        State3, {RegCopy, '&', ?TERM_PRIMARY_MASK, '!=', ?TERM_PRIMARY_BOXED}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    State5 = ?BACKEND:and_(State4, RegCopy, ?TERM_PRIMARY_CLEAR_MASK),
+    State6 = ?BACKEND:move_array_element(State5, RegCopy, 0, RegCopy),
+    State7 = ?BACKEND:if_block(
+        State6, {RegCopy, '&', ?TERM_BOXED_TAG_MASK, '!=', ?TERM_BOXED_FUN}, fun(BSt0) ->
+            ?BACKEND:call_primitive_last(BSt0, ?PRIM_RAISE_ERROR_TUPLE, [
+                ctx, jit_state, offset, ?BADFUN_ATOM, RegCopy
+            ])
+        end
+    ),
+    State8 = ?BACKEND:free_native_registers(State7, [RegCopy]),
+    State9 = ?BACKEND:call_primitive_with_cp(State8, ?PRIM_CALL_FUN, [
+        ctx, jit_state, Reg, ArgsCount
+    ]),
+    ?BACKEND:assert_all_native_free(State9),
+    Stream = ?BACKEND:stream(State9),
+    Dump =
+        <<
+            "   0:  0085af83            lw  t6,8(a1)\n"
+            "   4:  1ffd                    addi    t6,t6,-1\n"
+            "   6:  01f5a423            sw  t6,8(a1)\n"
+            "   a:  000f9b63            bnez    t6,0x20\n"
+            "   e:  00000f97            auipc   t6,0x0\n"
+            "  12:  0fc9                    addi    t6,t6,18 # 0x20\n"
+            "  14:  0001                    nop\n"
+            "  16:  01f5a223            sw  t6,4(a1)\n"
+            "  1a:  00862f83            lw  t6,8(a2)\n"
+            "  1e:  8f82                    jr  t6\n"
+            "  20:  01852f83            lw  t6,24(a0)\n"
+            "  24:  8f7e                    mv  t5,t6\n"
+            "  26:  8efa                    mv  t4,t5\n"
+            "  28:  4e0d                    li  t3,3\n"
+            "  2a:  01cefeb3            and t4,t4,t3\n"
+            "  2e:  4e09                    li  t3,2\n"
+            "  30:  01ce8a63            beq t4,t3,0x44\n"
+            "  34:  04c62f83            lw  t6,76(a2)\n"
+            "  38:  03800613            li  a2,56\n"
+            "  3c:  18b00693            li  a3,395\n"
+            "  40:  877a                    mv  a4,t5\n"
+            "  42:  8f82                    jr  t6\n"
+            "  44:  4e8d                    li  t4,3\n"
+            "  46:  fffece93            not t4,t4\n"
+            "  4a:  01df7f33            and t5,t5,t4\n"
+            "  4e:  000f2f03            lw  t5,0(t5)\n"
+            "  52:  8efa                    mv  t4,t5\n"
+            "  54:  03f00e13            li  t3,63\n"
+            "  58:  01cefeb3            and t4,t4,t3\n"
+            "  5c:  4e51                    li  t3,20\n"
+            "  5e:  01ce8a63            beq t4,t3,0x72\n"
+            "  62:  04c62f83            lw  t6,76(a2)\n"
+            "  66:  06600613            li  a2,102\n"
+            "  6a:  18b00693            li  a3,395\n"
+            "  6e:  877a                    mv  a4,t5\n"
+            "  70:  8f82                    jr  t6\n"
+            "  72:  0005ae83            lw  t4,0(a1)\n"
+            "  76:  000eae83            lw  t4,0(t4)\n"
+            "  7a:  0ee2                    slli    t4,t4,0x18\n"
+            "  7c:  27000f13            li  t5,624\n"
+            "  80:  00000013            nop\n"
+            "  84:  01eeeeb3            or  t4,t4,t5\n"
+            "  88:  05d52e23            sw  t4,92(a0)\n"
+            "  8c:  08000f13            li  t5,128\n"
+            "  90:  9f32                    add t5,t5,a2\n"
+            "  92:  000f2f03            lw  t5,0(t5)\n"
+            "  96:  867e                    mv  a2,t6\n"
+            "  98:  4681                    li  a3,0\n"
+            "  9a:  8f02                    jr  t5"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test0(State, Source, Dest, Dump) ->
+    State1 = ?BACKEND:move_to_vm_register(State, Source, Dest),
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_to_vm_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, 0}, <<
+                        "      0:   4f81                    li  t6,0\n"
+                        "      2:   01f52c23            sw  t6,24(a0)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {x_reg, extra}, <<
+                        "      0:   4f81                    li  t6,0\n"
+                        "      2:   05f52c23            sw  t6,88(a0)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {ptr, t5}, <<
+                        "      0:   4f81                    li  t6,0\n"
+                        "      2:   01ff2023            sw  t6,0(t5)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 2}, <<
+                        "      0:   4f01                    li  t5,0\n"
+                        "      2:   01452f83            lw  t6,20(a0)\n"
+                        "      6:   01efa423            sw  t5,8(t6)\n"
+                        "      a:   a8dd                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 0, {y_reg, 20}, <<
+                        "      0:   4f01                    li  t5,0\n"
+                        "      2:   01452f83            lw  t6,20(a0)\n"
+                        "      6:   05efa823            sw  t5,80(t6)\n"
+                        "      a:   a8dd                    j   0x100"
+                    >>)
+                end),
+                %% Test: Immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, 0}, <<
+                        "      0:   02a00f93            li  t6,42\n"
+                        "      4:   01f52c23            sw  t6,24(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {x_reg, extra}, <<
+                        "      0:   02a00f93            li  t6,42\n"
+                        "      4:   05f52c23            sw  t6,88(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 2}, <<
+                        "      0:   02a00f13            li  t5,42\n"
+                        "      4:   01452f83            lw  t6,20(a0)\n"
+                        "      8:   01efa423            sw  t5,8(t6)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 20}, <<
+                        "      0:   02a00f13            li  t5,42\n"
+                        "      4:   01452f83            lw  t6,20(a0)\n"
+                        "      8:   05efa823            sw  t5,80(t6)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 99, {ptr, a3}, <<
+                        "      0:   06300f93            li  t6,99\n"
+                        "      4:   01f6a023            sw  t6,0(a3)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {x_reg, 2}, <<
+                        "      0:   01c52f83            lw  t6,28(a0)\n"
+                        "      4:   03f52023            sw  t6,32(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 1}, {ptr, a1}, <<
+                        "      0:   01c52f83            lw  t6,28(a0)\n"
+                        "      4:   01f5a023            sw  t6,0(a1)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: ptr to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {ptr, t3}, {x_reg, 3}, <<
+                        "      0:   000e2f83            lw  t6,0(t3)\n"
+                        "      4:   03f52223            sw  t6,36(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 0}, {y_reg, 1}, <<
+                        "      0:   01852f83            lw  t6,24(a0)\n"
+                        "      4:   01452f03            lw  t5,20(a0)\n"
+                        "      8:   01ff2223            sw  t6,4(t5)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 0}, {x_reg, 3}, <<
+                        "      0:   01452f03            lw  t5,20(a0)\n"
+                        "      4:   000f2f83            lw  t6,0(t5)\n"
+                        "      8:   03f52223            sw  t6,36(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: y_reg to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 1}, {x_reg, 3}, <<
+                        "      0:   01452f03            lw  t5,20(a0)\n"
+                        "      4:   004f2f83            lw  t6,4(t5)\n"
+                        "      8:   03f52223            sw  t6,36(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Native register to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, t4, {x_reg, 0}, <<
+                        "      0:   01d52c23            sw  t4,24(a0)\n"
+                        "      4:   a8f5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, t5, {x_reg, extra}, <<
+                        "      0:   05e52c23            sw  t5,88(a0)\n"
+                        "      4:   a8f5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Native register to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, t3, {ptr, a3}, <<
+                        "      0:   01c6a023            sw  t3,0(a3)\n"
+                        "      4:   a8f5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Native register to y_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, a1, {y_reg, 0}, <<
+                        "      0:   01452f83            lw  t6,20(a0)\n"
+                        "      4:   00bfa023            sw  a1,0(t6)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Large immediate to x_reg (uses lui + addi in RISC-V)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {x_reg, 0}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01f52c23            sw  t6,24(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {x_reg, extra}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   05f52c23            sw  t6,88(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {y_reg, 2}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01452f03            lw  t5,20(a0)\n"
+                        "      c:   01ff2423            sw  t6,8(t5)\n"
+                        "      10:  a8c5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {y_reg, 20}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01452f03            lw  t5,20(a0)\n"
+                        "      c:   05ff2823            sw  t6,80(t5)\n"
+                        "      10:  a8c5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Large immediate to ptr
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 16#12345678, {ptr, a3}, <<
+                        "      0:   12345fb7            lui t6,0x12345\n"
+                        "      4:   678f8f93            addi    t6,t6,1656 # 0x12345678\n"
+                        "      8:   01f6a023            sw  t6,0(a3)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: x_reg to y_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {x_reg, 15}, {y_reg, 31}, <<
+                        "      0:   05452f83            lw  t6,84(a0)\n"
+                        "      4:   01452f03            lw  t5,20(a0)\n"
+                        "      8:   07ff2e23            sw  t6,124(t5)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: y_reg to x_reg (high index)
+                ?_test(begin
+                    move_to_vm_register_test0(State0, {y_reg, 31}, {x_reg, 15}, <<
+                        "      0:   01452f03            lw  t5,20(a0)\n"
+                        "      4:   07cf2f83            lw  t6,124(t5)\n"
+                        "      8:   05f52a23            sw  t6,84(a0)\n"
+                        "      c:   a8d5                    j   0x100"
+                    >>)
+                end),
+                %% Test: Large y_reg index (32) that exceeds str immediate offset limit
+                ?_test(begin
+                    move_to_vm_register_test0(State0, 42, {y_reg, 32}, <<
+                        "   0:  02a00f13            li  t5,42\n"
+                        "   4:  01452f83            lw  t6,20(a0)\n"
+                        "   8:  08000e93            li  t4,128\n"
+                        "   c:  9efe                    add t4,t4,t6\n"
+                        "   e:  01eea023            sw  t5,0(t4)\n"
+                        "  12:  a0fd                    j   0x100"
+                    >>)
+                end),
+                %% Test: Negative immediate to x_reg
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -1, {x_reg, 0}, <<
+                        "      0:   5ffd                    li  t6,-1\n"
+                        "      2:   01f52c23            sw  t6,24(a0)\n"
+                        "      6:   a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -100, {x_reg, 0}, <<
+                        "      0:   f9c00f93            li  t6,-100\n"
+                        "      4:   01f52c23            sw  t6,24(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    move_to_vm_register_test0(State0, -1000, {x_reg, 0}, <<
+                        "      0:   c1800f93            li  t6,-1000\n"
+                        "      4:   01f52c23            sw  t6,24(a0)\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+move_array_element_test0(State, Reg, Index, Dest, Dump) ->
+    State1 = ?BACKEND:move_array_element(State, Reg, Index, Dest),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+move_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 2, {x_reg, 0}, <<
+                        "   0:  0086af83            lw  t6,8(a3)\n"
+                        "   4:  01f52c23            sw  t6,24(a0)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to ptr
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 3, {ptr, t4}, <<
+                        "   0:  00c6af83            lw  t6,12(a3)\n"
+                        "   4:  01fea023            sw  t6,0(t4)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 1, {y_reg, 2}, <<
+                        "   0:  0046af03            lw  t5,4(a3)\n"
+                        "   4:  01452f83            lw  t6,20(a0)\n"
+                        "   8:  01efa423            sw  t5,8(t6)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to native reg (t4)
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 1, t4, <<
+                        "   0:  0046ae83            lw  t4,4(a3)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to y_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 7, {y_reg, 31}, <<
+                        "   0:  01c6af03            lw  t5,28(a3)\n"
+                        "   4:  01452f83            lw  t6,20(a0)\n"
+                        "   8:  07efae23            sw  t5,124(t6)"
+                    >>)
+                end),
+                %% move_array_element: reg[x] to x_reg
+                ?_test(begin
+                    move_array_element_test0(State0, a3, 7, {x_reg, 15}, <<
+                        "   0:  01c6af83            lw  t6,28(a3)\n"
+                        "   4:  05f52a23            sw  t6,84(a0)"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to x_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4),
+                    move_array_element_test0(State1, a3, {free, Reg}, {x_reg, 2}, <<
+                        "   0:  0106af83            lw  t6,16(a3)\n"
+                        "   4:  0f8a                    slli    t6,t6,0x2\n"
+                        "   6:  01f68fb3            add t6,a3,t6\n"
+                        "   a:  000faf83            lw  t6,0(t6)\n"
+                        "   e:  03f52023            sw  t6,32(a0)"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to pointer (large x reg)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4),
+                    move_array_element_test0(State1, a3, {free, Reg}, {ptr, t4}, <<
+                        "   0:  0106af83            lw  t6,16(a3)\n"
+                        "   4:  0f8a                    slli    t6,t6,0x2\n"
+                        "   6:  01f68fb3            add t6,a3,t6\n"
+                        "   a:  000faf83            lw  t6,0(t6)\n"
+                        "   e:  01fea023            sw  t6,0(t4)"
+                    >>)
+                end),
+                %% move_array_element: reg_x[reg_y] to y_reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, a3, 4),
+                    move_array_element_test0(State1, a3, {free, Reg}, {y_reg, 31}, <<
+                        "   0:  0106af83            lw  t6,16(a3)\n"
+                        "   4:  0f8a                    slli    t6,t6,0x2\n"
+                        "   6:  01f68fb3            add t6,a3,t6\n"
+                        "   a:  000faf83            lw  t6,0(t6)\n"
+                        "   e:  01452f03            lw  t5,20(a0)\n"
+                        "  12:  07ff2e23            sw  t6,124(t5)"
+                    >>)
+                end),
+                %% move_array_element with integer index and x_reg destination
+                ?_test(begin
+                    {State1, BaseReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+                    move_array_element_test0(State1, BaseReg, 2, {x_reg, 5}, <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  008faf03            lw  t5,8(t6)\n"
+                        "   8:  03e52623            sw  t5,44(a0)"
+                    >>)
+                end)
+            ]
+        end}.
+
+get_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% get_array_element: reg[x] to new native reg
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:get_array_element(State0, t3, 4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  010e2f83            lw  t6,16(t3)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(t6, Reg)
+                end)
+            ]
+        end}.
+
+move_to_array_element_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_array_element/4: x_reg to reg[x]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01f6a423            sw  t6,8(a3)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: x_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  8f72                    mv  t5,t3\n"
+                        "   6:  0f0a                    slli    t5,t5,0x2\n"
+                        "   8:  01e68f33            add t5,a3,t5\n"
+                        "   c:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: ptr to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {ptr, t6}, a3, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  000faf83            lw  t6,0(t6)\n"
+                        "   4:  8f72                    mv  t5,t3\n"
+                        "   6:  0f0a                    slli    t5,t5,0x2\n"
+                        "   8:  01e68f33            add t5,a3,t5\n"
+                        "   c:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/4: y_reg to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {y_reg, 2}, a3, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01452f03            lw  t5,20(a0)\n"
+                        "   4:  008f2f83            lw  t6,8(t5)\n"
+                        "   8:  8f72                    mv  t5,t3\n"
+                        "   a:  0f0a                    slli    t5,t5,0x2\n"
+                        "   c:  01e68f33            add t5,a3,t5\n"
+                        "  10:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_array_element(State0, {x_reg, 0}, a3, 2, 1),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  01f6a423            sw  t6,8(a3)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: x_reg to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]),
+                    State2 = setelement(7, State1, [a3, t3]),
+                    [a3, t3] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, {x_reg, 0}, a3, t3, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  001e0f13            addi    t5,t3,1\n"
+                        "   8:  0f0a                    slli    t5,t5,0x2\n"
+                        "   a:  01e68f33            add t5,a3,t5\n"
+                        "   e:  01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_array_element/5: imm to reg[x+offset]
+                ?_test(begin
+                    State1 = setelement(6, State0, ?BACKEND:available_regs(State0) -- [a3, t3]),
+                    State2 = setelement(7, State1, [a3, t3]),
+                    [a3, t3] = ?BACKEND:used_regs(State2),
+                    State3 = ?BACKEND:move_to_array_element(State2, 42, a3, t3, 1),
+                    Stream = ?BACKEND:stream(State3),
+                    Dump = <<
+                        "      0:   02a00f93            li  t6,42\n"
+                        "      4:   001e0f13            addi    t5,t3,1\n"
+                        "      8:   0f0a                    slli    t5,t5,0x2\n"
+                        "      a:   01e68f33            add t5,a3,t5\n"
+                        "      e:   01ff2023            sw  t6,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+move_to_native_register_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                %% move_to_native_register/2: imm
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, 42),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  02a00f93            li  t6,42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: negative value
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -42),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  fd600f93            li  t6,-42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: -255 (boundary case)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -255),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  f0100f93            li  t6,-255"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: -256 (boundary case, fits in immediate for RISC-V)
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, -256),
+                    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+                    Stream = ?BACKEND:stream(State2),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  f0000f93            li  t6,-256\n"
+                        "   4:  a8f5                    j   0x100"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {ptr, reg}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {ptr, t5}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t5, Reg),
+                    Dump = <<
+                        "   0:  000f2f03            lw  t5,0(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {x_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 5}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  02c52f83            lw  t6,44(a0)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/2: {y_reg, N}
+                ?_test(begin
+                    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 3}),
+                    Stream = ?BACKEND:stream(State1),
+                    ?assertEqual(t6, Reg),
+                    Dump = <<
+                        "   0:  01452f03            lw  t5,20(a0)\n"
+                        "   4:  00cf2f83            lw  t6,12(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: imm to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, 42, t5),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  02a00f13            li  t5,42"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: reg to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, t6, t4),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  8efe                    mv  t4,t6"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {ptr, reg} to reg
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {ptr, t6}, t3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  000fae03            lw  t3,0(t6)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {x_reg, x} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {x_reg, 2}, a3),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  5114                    lw  a3,32(a0)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% move_to_native_register/3: {y_reg, y} to reg[reg]
+                ?_test(begin
+                    State1 = ?BACKEND:move_to_native_register(State0, {y_reg, 2}, a1),
+                    Stream = ?BACKEND:stream(State1),
+                    Dump = <<
+                        "   0:  01452f83            lw  t6,20(a0)\n"
+                        "   4:  008fa583            lw  a1,8(t6)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                %% Test: ptr with offset to fp_reg (term_to_float)
+                ?_test(begin
+                    {State1, RegA} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+                    State2 = ?BACKEND:move_to_vm_register(
+                        State1, {free, {ptr, RegA, 1}}, {fp_reg, 3}
+                    ),
+                    Stream = ?BACKEND:stream(State2),
+                    Dump = <<
+                        "   0:  01852f83            lw  t6,24(a0)\n"
+                        "   4:  06052f03            lw  t5,96(a0)\n"
+                        "   8:  004fae83            lw  t4,4(t6)\n"
+                        "   c:  01df2c23            sw  t4,24(t5)\n"
+                        "  10:  008fae83            lw  t4,8(t6)\n"
+                        "  14:  01df2e23            sw  t4,28(t5)"
+                    >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+add_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:add(State0, Reg, Imm),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+add_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    add_test0(State0, a2, 2, <<
+                        "   0:  0609                    addi    a2,a2,2\n"
+                        "   2:  a8fd                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, a2, 256, <<
+                        "   0:  10000f93            li  t6,256\n"
+                        "   4:  967e                    add a2,a2,t6\n"
+                        "   6:  a8ed                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    add_test0(State0, a2, a3, <<
+                        "   0:  9636                    add a2,a2,a3\n"
+                        "   2:  a8fd                    j   0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+sub_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:sub(State0, Reg, Imm),
+    % Force emission of literal pool
+    State2 = ?BACKEND:jump_to_offset(State1, 16#100),
+    Stream = ?BACKEND:stream(State2),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+sub_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    sub_test0(State0, a2, 2, <<
+                        "   0:  1679                    addi    a2,a2,-2\n"
+                        "   2:  a8fd                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, a2, 256, <<
+                        "      0:   10000f93            li  t6,256\n"
+                        "      4:   41f60633            sub a2,a2,t6\n"
+                        "      8:   a8e5                    j   0x100"
+                    >>)
+                end),
+                ?_test(begin
+                    sub_test0(State0, a2, a3, <<
+                        "      0:   8e15                    sub a2,a2,a3\n"
+                        "      2:   a8fd                    j   0x100"
+                    >>)
+                end)
+            ]
+        end}.
+
+mul_test0(State0, Reg, Imm, Dump) ->
+    State1 = ?BACKEND:mul(State0, Reg, Imm),
+    Stream = ?BACKEND:stream(State1),
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+mul_test_() ->
+    {setup,
+        fun() ->
+            ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0))
+        end,
+        fun(State0) ->
+            [
+                ?_test(begin
+                    mul_test0(State0, a2, 2, <<
+                        "      0:   0606                    slli    a2,a2,0x1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 3, <<
+                        "      0:   00161f93            slli    t6,a2,0x1\n"
+                        "      4:   00cf8633            add a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 4, <<
+                        "      0:   060a                    slli    a2,a2,0x2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 5, <<
+                        "      0:   00261f93            slli    t6,a2,0x2\n"
+                        "      4:   00cf8633            add a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 6, <<
+                        "      0:   00161f93            slli    t6,a2,0x1\n"
+                        "      4:   00cf8633            add a2,t6,a2\n"
+                        "      8:   0606                    slli    a2,a2,0x1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 7, <<
+                        "      0:   00361f93            slli    t6,a2,0x3\n"
+                        "      4:   40cf8633            sub a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 8, <<
+                        "      0:   060e                    slli    a2,a2,0x3"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 9, <<
+                        "      0:   00361f93            slli    t6,a2,0x3\n"
+                        "      4:   00cf8633            add a2,t6,a2"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 10, <<
+                        "      0:   00261f93            slli    t6,a2,0x2\n"
+                        "      4:   00cf8633            add a2,t6,a2\n"
+                        "      8:   0606                    slli    a2,a2,0x1"
+                    >>)
+                end),
+                ?_test(begin
+                    mul_test0(State0, a2, 11, <<
+                        "      0:   4fad                    li  t6,11\n"
+                        "      2:   03f60633            mul a2,a2,t6"
+                    >>)
+                end)
+            ]
+        end}.
+
+%% Test set_args1 with y_reg pattern
+set_args1_y_reg_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+    % Call primitive with y_reg argument to trigger {y_reg, X} pattern in set_args1
+    % This mirrors: {MSt2, Value} = MMod:call_primitive(MSt1, ?PRIM_BITSTRING_GET_UTF8, [{free, Src}])
+    % but with {y_reg, 5} instead of {free, Src}
+    {State1, _ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_BITSTRING_GET_UTF8, [
+        {y_reg, 5}
+    ]),
+
+    Stream = ?BACKEND:stream(State1),
+    % Expected disassembly for loading from y_reg and calling primitive
+    Dump = <<
+        "   0:  04300f93            li  t6,67\n"
+        "   4:  0f8a                    slli    t6,t6,0x2\n"
+        "   6:  9fb2                    add t6,t6,a2\n"
+        "   8:  000faf83            lw  t6,0(t6)\n"
+        "   c:  1141                    addi    sp,sp,-16\n"
+        "   e:  c006                    sw  ra,0(sp)\n"
+        "  10:  c22a                    sw  a0,4(sp)\n"
+        "  12:  c42e                    sw  a1,8(sp)\n"
+        "  14:  c632                    sw  a2,12(sp)\n"
+        "  16:  01452f03            lw  t5,20(a0)\n"
+        "  1a:  014f2503            lw  a0,20(t5)\n"
+        "  1e:  9f82                    jalr    t6\n"
+        "  20:  8faa                    mv  t6,a0\n"
+        "  22:  4082                    lw  ra,0(sp)\n"
+        "  24:  4512                    lw  a0,4(sp)\n"
+        "  26:  45a2                    lw  a1,8(sp)\n"
+        "  28:  4632                    lw  a2,12(sp)\n"
+        "  2a:  0141                    addi    sp,sp,16"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test large Y register read (Y=123, offset=492, exceeds immediate limit)
+large_y_reg_read_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Move from a large Y register (123 * 4 = 492 bytes, exceeds immediate limit)
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 123}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses helper with temp register for large offset
+    Dump = <<
+        "   0:  01452f03            lw  t5,20(a0)\n"
+        "   4:  1ec00f93            li  t6,492\n"
+        "   8:  9ffa                    add t6,t6,t5\n"
+        "   a:  000faf83            lw  t6,0(t6)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(t6, Reg).
+
+%% Test large Y register write with immediate value
+large_y_reg_write_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Move immediate to a large Y register (123 * 4 = 492 bytes)
+    State1 = ?BACKEND:move_to_vm_register(State0, 42, {y_reg, 123}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses helper with temp registers for large offset
+    Dump = <<
+        "   0:  02a00f13            li  t5,42\n"
+        "   4:  01452f83            lw  t6,20(a0)\n"
+        "   8:  1ec00e93            li  t4,492\n"
+        "   c:  9efe                    add t4,t4,t6\n"
+        "   e:  01eea023            sw  t5,0(t4)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test large Y register read with limited registers (uses IP_REG fallback)
+large_y_reg_read_register_exhaustion_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate most available registers to simulate near-exhaustion (leave 1 for the y_reg helper)
+    {State1, _} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, _} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, _} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, _} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, _} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    % Leave one register available so the y_reg helper can work, but it will need IP_REG fallback
+    {StateFinal, ResultReg} = ?BACKEND:move_to_native_register(State5, {y_reg, 35}),
+    Stream = ?BACKEND:stream(StateFinal),
+    % Expected: uses t0+t1 fallback sequence when temps are exhausted
+    Dump = <<
+        "   0:  01852f83            lw  t6,24(a0)\n"
+        "   4:  01c52f03            lw  t5,28(a0)\n"
+        "   8:  02052e83            lw  t4,32(a0)\n"
+        "   c:  02452e03            lw  t3,36(a0)\n"
+        "  10:  02852383            lw  t2,40(a0)\n"
+        "  14:  01452283            lw  t0,20(a0)\n"
+        "  18:  08c00313            li  t1,140\n"
+        "  1c:  9316                    add t1,t1,t0\n"
+        "  1e:  00032303            lw  t1,0(t1)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(t1, ResultReg).
+
+%% Test large Y register write with register exhaustion (uses t1/t0 fallback)
+large_y_reg_write_register_exhaustion_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Get a source register first
+    {State1, SrcReg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    % Allocate most remaining registers to simulate exhaustion
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    % Try to write to large Y register when only one temp register is available
+    StateFinal = ?BACKEND:move_to_vm_register(State5, SrcReg, {y_reg, 50}),
+    Stream = ?BACKEND:stream(StateFinal),
+    % Expected: uses t1/t0 fallback sequence
+    Dump = <<
+        "      0:   01852f83            lw  t6,24(a0)\n"
+        "      4:   01c52f03            lw  t5,28(a0)\n"
+        "      8:   02052e83            lw  t4,32(a0)\n"
+        "      c:   02452e03            lw  t3,36(a0)\n"
+        "     10:   02852383            lw  t2,40(a0)\n"
+        "     14:   01452303            lw  t1,20(a0)\n"
+        "     18:   0c800293            li  t0,200\n"
+        "     1c:   929a                    add t0,t0,t1\n"
+        "     1e:   01f2a023            sw  t6,0(t0)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test boundary case: Y=31 (124 bytes, exactly at limit, should use direct addressing)
+y_reg_boundary_direct_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {y_reg, 31}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: uses direct addressing since 31 * 4 = 124 < 2048
+    Dump = <<
+        "   0:  01452f03            lw  t5,20(a0)\n"
+        "   4:  07cf2f83            lw  t6,124(t5)"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream),
+    ?assertEqual(t6, Reg).
+
+%% Test debugger function
+debugger_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:debugger(State0),
+    Stream = ?BACKEND:stream(State1),
+    Dump = <<
+        "      0:   9002                    ebreak"
+    >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+and_register_exhaustion_negative_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate all available registers to simulate register exhaustion
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+    % Test negative immediate (-4) which should use NOT+AND with t0 as temp
+    StateResult = ?BACKEND:and_(StateNoRegs, t6, -4),
+    Stream = ?BACKEND:stream(StateResult),
+    ExpectedDump = <<
+        "      0:   01852f83            lw  t6,24(a0)\n"
+        "      4:   01c52f03            lw  t5,28(a0)\n"
+        "      8:   02052e83            lw  t4,32(a0)\n"
+        "      c:   02452e03            lw  t3,36(a0)\n"
+        "     10:   02852383            lw  t2,40(a0)\n"
+        "     14:   02c52303            lw  t1,44(a0)\n"
+        "     18:   428d                    li  t0,3\n"
+        "     1a:   fff2c293            not t0,t0\n"
+        "     1e:   005fffb3            and t6,t6,t0"
+    >>,
+    ?assertEqual(dump_to_bin(ExpectedDump), Stream).
+
+and_register_exhaustion_positive_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    % Allocate all available registers to simulate register exhaustion
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+    {StateNoRegs, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+    % Test positive immediate (0x3F) which should use AND with t0 as temp
+    StateResult = ?BACKEND:and_(StateNoRegs, t6, 16#3F),
+    Stream = ?BACKEND:stream(StateResult),
+    ExpectedDump = <<
+        "   0:  01852f83            lw  t6,24(a0)\n"
+        "   4:  01c52f03            lw  t5,28(a0)\n"
+        "   8:  02052e83            lw  t4,32(a0)\n"
+        "   c:  02452e03            lw  t3,36(a0)\n"
+        "  10:  02852383            lw  t2,40(a0)\n"
+        "  14:  02c52303            lw  t1,44(a0)\n"
+        "  18:  03f00293            li  t0,63\n"
+        "  1c:  005fffb3            and t6,t6,t0"
+    >>,
+    ?assertEqual(dump_to_bin(ExpectedDump), Stream).
+
+jump_table_large_labels_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 512),
+    Stream = ?BACKEND:stream(State1),
+    % RISC-V: Each jump table entry is 8 bytes (AUIPC + JALR)
+    ?assertEqual((512 + 1) * 8, byte_size(Stream)).
+
+alloc_boxed_integer_fragment_small_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [
+        ctx, {avm_int64_t, 42}
+    ]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State1),
+    Dump =
+        <<
+            "      0:   03c62f83            lw  t6,60(a2)\n"
+            "      4:   1141                    addi    sp,sp,-16\n"
+            "      6:   c006                    sw  ra,0(sp)\n"
+            "      8:   c22a                    sw  a0,4(sp)\n"
+            "      a:   c42e                    sw  a1,8(sp)\n"
+            "      c:   c632                    sw  a2,12(sp)\n"
+            "      e:   02a00593            li  a1,42\n"
+            "     12:   4601                    li  a2,0\n"
+            "     14:   9f82                    jalr    t6\n"
+            "     16:   8faa                    mv  t6,a0\n"
+            "     18:   4082                    lw  ra,0(sp)\n"
+            "     1a:   4512                    lw  a0,4(sp)\n"
+            "     1c:   45a2                    lw  a1,8(sp)\n"
+            "     1e:   4632                    lw  a2,12(sp)\n"
+            "     20:   0141                    addi    sp,sp,16"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+alloc_boxed_integer_fragment_large_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, ResultReg} = ?BACKEND:call_primitive(State0, ?PRIM_ALLOC_BOXED_INTEGER_FRAGMENT, [
+        ctx, {avm_int64_t, 16#123456789ABCDEF0}
+    ]),
+    % Add a call primitive last to emit literal pool
+    State2 = ?BACKEND:call_primitive_last(State1, ?PRIM_RAISE_ERROR_TUPLE, [
+        ctx, jit_state, offset, ?BADMATCH_ATOM, {free, ResultReg}
+    ]),
+    ?assertEqual(t6, ResultReg),
+    Stream = ?BACKEND:stream(State2),
+    Dump =
+        <<
+            "      0:   03c62f83            lw  t6,60(a2)\n"
+            "      4:   1141                    addi    sp,sp,-16\n"
+            "      6:   c006                    sw  ra,0(sp)\n"
+            "      8:   c22a                    sw  a0,4(sp)\n"
+            "      a:   c42e                    sw  a1,8(sp)\n"
+            "      c:   c632                    sw  a2,12(sp)\n"
+            "      e:   9abce5b7            lui a1,0x9abce\n"
+            "     12:   ef058593            addi    a1,a1,-272 # 0x9abcdef0\n"
+            "     16:   12345637            lui a2,0x12345\n"
+            "     1a:   67860613            addi    a2,a2,1656 # 0x12345678\n"
+            "     1e:   9f82                    jalr    t6\n"
+            "     20:   8faa                    mv  t6,a0\n"
+            "     22:   4082                    lw  ra,0(sp)\n"
+            "     24:   4512                    lw  a0,4(sp)\n"
+            "     26:   45a2                    lw  a1,8(sp)\n"
+            "     28:   4632                    lw  a2,12(sp)\n"
+            "     2a:   0141                    addi    sp,sp,16\n"
+            "     2c:   04c62f03            lw  t5,76(a2)\n"
+            "     30:   03000613            li  a2,48\n"
+            "     34:   28b00693            li  a3,651\n"
+            "     38:   877e                    mv  a4,t6\n"
+            "     3a:   8f02                    jr  t5"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test for stack alignment issue in call_func_ptr
+%% RISC-V maintains 16-byte stack alignment (RISC-V calling convention)
+call_func_ptr_stack_alignment_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+    {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+    {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+    {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+    {State5, _ResultReg} = ?BACKEND:call_func_ptr(State4, {free, t3}, [42]),
+    Stream = ?BACKEND:stream(State5),
+    Dump =
+        <<
+            "      0:   01852f83            lw  t6,24(a0)\n"
+            "      4:   01c52f03            lw  t5,28(a0)\n"
+            "      8:   02052e83            lw  t4,32(a0)\n"
+            "      c:   02452e03            lw  t3,36(a0)\n"
+            "     10:   1101                    addi    sp,sp,-32\n"
+            "     12:   c006                    sw  ra,0(sp)\n"
+            "     14:   c22a                    sw  a0,4(sp)\n"
+            "     16:   c42e                    sw  a1,8(sp)\n"
+            "     18:   c632                    sw  a2,12(sp)\n"
+            "     1a:   c876                    sw  t4,16(sp)\n"
+            "     1c:   ca7a                    sw  t5,20(sp)\n"
+            "     1e:   cc7e                    sw  t6,24(sp)\n"
+            "     20:   02a00513            li  a0,42\n"
+            "     24:   9e02                    jalr    t3\n"
+            "     26:   8e2a                    mv  t3,a0\n"
+            "     28:   4082                    lw  ra,0(sp)\n"
+            "     2a:   4512                    lw  a0,4(sp)\n"
+            "     2c:   45a2                    lw  a1,8(sp)\n"
+            "     2e:   4632                    lw  a2,12(sp)\n"
+            "     30:   4ec2                    lw  t4,16(sp)\n"
+            "     32:   4f52                    lw  t5,20(sp)\n"
+            "     34:   4fe2                    lw  t6,24(sp)\n"
+            "     36:   02010113            addi    sp,sp,32"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+%% Test for register exhaustion issue in call_func_ptr with 5+ arguments
+%% When all registers are used and we call a function with 5+ args,
+%% set_args needs temporary registers but none are available
+call_func_ptr_register_exhaustion_test_() ->
+    {setup,
+        fun() ->
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+
+            % Allocate all available registers to simulate register pressure
+            {State1, t6} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, t5} = ?BACKEND:move_to_native_register(State1, {x_reg, 1}),
+            {State3, t4} = ?BACKEND:move_to_native_register(State2, {x_reg, 2}),
+            {State4, t3} = ?BACKEND:move_to_native_register(State3, {x_reg, 3}),
+            {State5, t2} = ?BACKEND:move_to_native_register(State4, {x_reg, 4}),
+            {State6, t1} = ?BACKEND:move_to_native_register(State5, {x_reg, 5}),
+            State6
+        end,
+        fun(State6) ->
+            [
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, t5},
+                        [ctx, jit_state, {free, t2}, 3, 1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "      0:   01852f83            lw  t6,24(a0)\n"
+                            "      4:   01c52f03            lw  t5,28(a0)\n"
+                            "      8:   02052e83            lw  t4,32(a0)\n"
+                            "      c:   02452e03            lw  t3,36(a0)\n"
+                            "     10:   02852383            lw  t2,40(a0)\n"
+                            "     14:   02c52303            lw  t1,44(a0)\n"
+                            "     18:   1101                    addi    sp,sp,-32\n"
+                            "     1a:   c006                    sw  ra,0(sp)\n"
+                            "     1c:   c22a                    sw  a0,4(sp)\n"
+                            "     1e:   c42e                    sw  a1,8(sp)\n"
+                            "     20:   c632                    sw  a2,12(sp)\n"
+                            "     22:   c81a                    sw  t1,16(sp)\n"
+                            "     24:   ca72                    sw  t3,20(sp)\n"
+                            "     26:   cc76                    sw  t4,24(sp)\n"
+                            "     28:   ce7e                    sw  t6,28(sp)\n"
+                            "     2a:   861e                    mv  a2,t2\n"
+                            "     2c:   468d                    li  a3,3\n"
+                            "     2e:   4705                    li  a4,1\n"
+                            "     30:   9f02                    jalr    t5\n"
+                            "     32:   8f2a                    mv  t5,a0\n"
+                            "     34:   4082                    lw  ra,0(sp)\n"
+                            "     36:   4512                    lw  a0,4(sp)\n"
+                            "     38:   45a2                    lw  a1,8(sp)\n"
+                            "     3a:   4632                    lw  a2,12(sp)\n"
+                            "     3c:   4342                    lw  t1,16(sp)\n"
+                            "     3e:   4e52                    lw  t3,20(sp)\n"
+                            "     40:   4ee2                    lw  t4,24(sp)\n"
+                            "     42:   4ff2                    lw  t6,28(sp)\n"
+                            "     44:   02010113            addi    sp,sp,32"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, t5},
+                        [ctx, jit_state, {free, t2}, 1, t1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "      0:   01852f83            lw  t6,24(a0)\n"
+                            "      4:   01c52f03            lw  t5,28(a0)\n"
+                            "      8:   02052e83            lw  t4,32(a0)\n"
+                            "      c:   02452e03            lw  t3,36(a0)\n"
+                            "     10:   02852383            lw  t2,40(a0)\n"
+                            "     14:   02c52303            lw  t1,44(a0)\n"
+                            "     18:   1101                    addi    sp,sp,-32\n"
+                            "     1a:   c006                    sw  ra,0(sp)\n"
+                            "     1c:   c22a                    sw  a0,4(sp)\n"
+                            "     1e:   c42e                    sw  a1,8(sp)\n"
+                            "     20:   c632                    sw  a2,12(sp)\n"
+                            "     22:   c81a                    sw  t1,16(sp)\n"
+                            "     24:   ca72                    sw  t3,20(sp)\n"
+                            "     26:   cc76                    sw  t4,24(sp)\n"
+                            "     28:   ce7e                    sw  t6,28(sp)\n"
+                            "     2a:   861e                    mv  a2,t2\n"
+                            "     2c:   4685                    li  a3,1\n"
+                            "     2e:   871a                    mv  a4,t1\n"
+                            "     30:   9f02                    jalr    t5\n"
+                            "     32:   8f2a                    mv  t5,a0\n"
+                            "     34:   4082                    lw  ra,0(sp)\n"
+                            "     36:   4512                    lw  a0,4(sp)\n"
+                            "     38:   45a2                    lw  a1,8(sp)\n"
+                            "     3a:   4632                    lw  a2,12(sp)\n"
+                            "     3c:   4342                    lw  t1,16(sp)\n"
+                            "     3e:   4e52                    lw  t3,20(sp)\n"
+                            "     40:   4ee2                    lw  t4,24(sp)\n"
+                            "     42:   4ff2                    lw  t6,28(sp)\n"
+                            "     44:   02010113            addi    sp,sp,32"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, t5},
+                        [ctx, jit_state, {free, t2}, t1, 1]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "      0:   01852f83            lw  t6,24(a0)\n"
+                            "      4:   01c52f03            lw  t5,28(a0)\n"
+                            "      8:   02052e83            lw  t4,32(a0)\n"
+                            "      c:   02452e03            lw  t3,36(a0)\n"
+                            "     10:   02852383            lw  t2,40(a0)\n"
+                            "     14:   02c52303            lw  t1,44(a0)\n"
+                            "     18:   1101                    addi    sp,sp,-32\n"
+                            "     1a:   c006                    sw  ra,0(sp)\n"
+                            "     1c:   c22a                    sw  a0,4(sp)\n"
+                            "     1e:   c42e                    sw  a1,8(sp)\n"
+                            "     20:   c632                    sw  a2,12(sp)\n"
+                            "     22:   c81a                    sw  t1,16(sp)\n"
+                            "     24:   ca72                    sw  t3,20(sp)\n"
+                            "     26:   cc76                    sw  t4,24(sp)\n"
+                            "     28:   ce7e                    sw  t6,28(sp)\n"
+                            "     2a:   861e                    mv  a2,t2\n"
+                            "     2c:   869a                    mv  a3,t1\n"
+                            "     2e:   4705                    li  a4,1\n"
+                            "     30:   9f02                    jalr    t5\n"
+                            "     32:   8f2a                    mv  t5,a0\n"
+                            "     34:   4082                    lw  ra,0(sp)\n"
+                            "     36:   4512                    lw  a0,4(sp)\n"
+                            "     38:   45a2                    lw  a1,8(sp)\n"
+                            "     3a:   4632                    lw  a2,12(sp)\n"
+                            "     3c:   4342                    lw  t1,16(sp)\n"
+                            "     3e:   4e52                    lw  t3,20(sp)\n"
+                            "     40:   4ee2                    lw  t4,24(sp)\n"
+                            "     42:   4ff2                    lw  t6,28(sp)\n"
+                            "     44:   02010113            addi    sp,sp,32"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream),
+                    ?assertEqual(t5, ResultReg)
+                end),
+                ?_test(begin
+                    {State7, _ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {free, a1},
+                        [t5, a3]
+                    ),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:  01852f83            lw  t6,24(a0)\n"
+                            "   4:  01c52f03            lw  t5,28(a0)\n"
+                            "   8:  02052e83            lw  t4,32(a0)\n"
+                            "   c:  02452e03            lw  t3,36(a0)\n"
+                            "  10:  02852383            lw  t2,40(a0)\n"
+                            "  14:  02c52303            lw  t1,44(a0)\n"
+                            "  18:  fd010113            addi    sp,sp,-48\n"
+                            "  1c:  c006                sw  ra,0(sp)\n"
+                            "  1e:  c22a                sw  a0,4(sp)\n"
+                            "  20:  c42e                sw  a1,8(sp)\n"
+                            "  22:  c632                sw  a2,12(sp)\n"
+                            "  24:  c81a                sw  t1,16(sp)\n"
+                            "  26:  ca1e                sw  t2,20(sp)\n"
+                            "  28:  cc72                sw  t3,24(sp)\n"
+                            "  2a:  ce76                sw  t4,28(sp)\n"
+                            "  2c:  d07a                sw  t5,32(sp)\n"
+                            "  2e:  d27e                sw  t6,36(sp)\n"
+                            "  30:  832e                mv  t1,a1\n"
+                            "  32:  857a                mv  a0,t5\n"
+                            "  34:  85b6                mv  a1,a3\n"
+                            "  36:  9302                jalr    t1\n"
+                            "  38:  c42a                sw  a0,8(sp)\n"
+                            "  3a:  4082                lw  ra,0(sp)\n"
+                            "  3c:  4512                lw  a0,4(sp)\n"
+                            "  3e:  45a2                lw  a1,8(sp)\n"
+                            "  40:  4632                lw  a2,12(sp)\n"
+                            "  42:  4342                lw  t1,16(sp)\n"
+                            "  44:  43d2                lw  t2,20(sp)\n"
+                            "  46:  4e62                lw  t3,24(sp)\n"
+                            "  48:  4ef2                lw  t4,28(sp)\n"
+                            "  4a:  5f02                lw  t5,32(sp)\n"
+                            "  4c:  5f92                lw  t6,36(sp)\n"
+                            "  4e:  03010113            addi    sp,sp,48"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end),
+                ?_test(begin
+                    {State7, ResultReg} = ?BACKEND:call_func_ptr(
+                        State6,
+                        {primitive, 2},
+                        [{free, t5}, a3]
+                    ),
+                    ?assertEqual(ResultReg, t5),
+                    Stream = ?BACKEND:stream(State7),
+                    Dump =
+                        <<
+                            "   0:  01852f83            lw  t6,24(a0)\n"
+                            "   4:  01c52f03            lw  t5,28(a0)\n"
+                            "   8:  02052e83            lw  t4,32(a0)\n"
+                            "   c:  02452e03            lw  t3,36(a0)\n"
+                            "  10:  02852383            lw  t2,40(a0)\n"
+                            "  14:  02c52303            lw  t1,44(a0)\n"
+                            "  18:  fd010113            addi    sp,sp,-48\n"
+                            "  1c:  c006                sw  ra,0(sp)\n"
+                            "  1e:  c22a                sw  a0,4(sp)\n"
+                            "  20:  c42e                sw  a1,8(sp)\n"
+                            "  22:  c632                sw  a2,12(sp)\n"
+                            "  24:  c81a                sw  t1,16(sp)\n"
+                            "  26:  ca1e                sw  t2,20(sp)\n"
+                            "  28:  cc72                sw  t3,24(sp)\n"
+                            "  2a:  ce76                sw  t4,28(sp)\n"
+                            "  2c:  d07e                sw  t6,32(sp)\n"
+                            "  2e:  00862303            lw  t1,8(a2)\n"
+                            "  32:  857a                mv  a0,t5\n"
+                            "  34:  85b6                mv  a1,a3\n"
+                            "  36:  9302                jalr    t1\n"
+                            "  38:  8f2a                mv  t5,a0\n"
+                            "  3a:  4082                lw  ra,0(sp)\n"
+                            "  3c:  4512                lw  a0,4(sp)\n"
+                            "  3e:  45a2                lw  a1,8(sp)\n"
+                            "  40:  4632                lw  a2,12(sp)\n"
+                            "  42:  4342                lw  t1,16(sp)\n"
+                            "  44:  43d2                lw  t2,20(sp)\n"
+                            "  46:  4e62                lw  t3,24(sp)\n"
+                            "  48:  4ef2                lw  t4,28(sp)\n"
+                            "  4a:  5f82                lw  t6,32(sp)\n"
+                            "  4c:  03010113            addi    sp,sp,48"
+                        >>,
+                    ?assertEqual(dump_to_bin(Dump), Stream)
+                end)
+            ]
+        end}.
+
+%% Test jump_to_continuation optimization for intra-module returns
+jump_to_continuation_test_() ->
+    [
+        ?_test(begin
+            % Test 1: jump_to_continuation at offset 0
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            State1 = ?BACKEND:jump_to_continuation(State0, {free, a0}),
+            Stream = ?BACKEND:stream(State1),
+            % Expected: riscv32 PIC sequence
+            Dump =
+                <<
+                    "   0:  00000f97            auipc   t6,0x0\n"
+                    "   4:  9faa                add t6,t6,a0\n"
+                    "   6:  8f82                jr  t6"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end),
+        ?_test(begin
+            % Test 2: jump_to_continuation after jump table (non-zero relative address)
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            % Generate a jump table for 3 labels (4 entries * 8 bytes = 32 bytes)
+            State1 = ?BACKEND:jump_table(State0, 3),
+            State2 = ?BACKEND:jump_to_continuation(State1, {free, a0}),
+            Stream = ?BACKEND:stream(State2),
+            % Expected: jump table (32 bytes) + jump_to_continuation
+            % NetOffset = 0 - 32 = -32 (0xFFFFFFE0)
+            Dump =
+                <<
+                    "   0:  ffffffff            .insn   4, 0xffffffff\n"
+                    "   4:  ffffffff            .insn   4, 0xffffffff\n"
+                    "   8:  ffffffff            .insn   4, 0xffffffff\n"
+                    "   c:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  10:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  14:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  18:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  1c:  ffffffff            .insn   4, 0xffffffff\n"
+                    "  20:  00000f97            auipc   t6,0x0\n"
+                    "  24:  1f81                addi    t6,t6,-32 # 0x0\n"
+                    "  26:  9faa                add t6,t6,a0\n"
+                    "  28:  8f82                jr  t6"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end)
+    ].
+
+%% Mimic part of add.beam
+add_beam_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_table(State0, 3),
+    State2 = ?BACKEND:add_label(State1, 1),
+    State3 = ?BACKEND:move_to_vm_register(State2, 16#9f, {x_reg, 1}),
+    State4 = ?BACKEND:move_to_vm_register(State3, 16#8f, {x_reg, 0}),
+    State5 = ?BACKEND:call_only_or_schedule_next(State4, 2),
+    State6 = ?BACKEND:add_label(State5, 2),
+    {State7, ResultReg} = ?BACKEND:call_primitive(State6, ?PRIM_ALLOCATE, [
+        ctx, jit_state, 1, 0, 1
+    ]),
+    State8 = ?BACKEND:if_block(State7, {'(bool)', {free, ResultReg}, '==', false}, fun(BSt0) ->
+        ?BACKEND:call_primitive_last(BSt0, ?PRIM_HANDLE_ERROR, [ctx, jit_state, offset])
+    end),
+    State9 = ?BACKEND:move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}),
+    State10 = ?BACKEND:call_or_schedule_next(State9, 3),
+    State11 = ?BACKEND:add_label(State10, 3),
+    State12 = ?BACKEND:call_primitive_last(State11, ?PRIM_RETURN, [
+        ctx, jit_state
+    ]),
+    % OP_INT_CALL_END
+    State13 = ?BACKEND:add_label(State12, 0),
+    State14 = ?BACKEND:call_primitive_last(State13, 1, [ctx, jit_state]),
+    State15 = ?BACKEND:update_branches(State14),
+    Stream = ?BACKEND:stream(State15),
+    Dump =
+        <<
+            % jump table (new 8-byte format)
+            "   0:  00000697            auipc   a3,0x0\n"
+            "   4:  0e068067            jr  224(a3) # 0xe0\n"
+            "   8:  00000697            auipc   a3,0x0\n"
+            "   c:  01868067            jr  24(a3) # 0x20\n"
+            "  10:  00000697            auipc   a3,0x0\n"
+            "  14:  04868067            jr  72(a3) # 0x58\n"
+            "  18:  00000697            auipc   a3,0x0\n"
+            "  1c:  0c268067            jr  194(a3) # 0xda\n"
+            % label 1
+            % {move,{integer,9},{x,1}}.
+            "  20:  09f00f93            li  t6,159\n"
+            "  24:  01f52e23            sw  t6,28(a0)\n"
+            % {move,{integer,8},{x,0}}
+            "  28:  08f00f93            li  t6,143\n"
+            "  2c:  01f52c23            sw  t6,24(a0)\n"
+            % {call_only,2,{f,2}}.
+            "  30:  0085af83            lw  t6,8(a1)\n"
+            "  34:  1ffd                addi    t6,t6,-1\n"
+            "  36:  01f5a423            sw  t6,8(a1)\n"
+            "  3a:  000f8663            beqz    t6,0x46\n"
+            "  3e:  a829                j   0x58\n"
+            "  40:  0001                nop\n"
+            "  42:  00000013            nop\n"
+            "  46:  00000f97            auipc   t6,0x0\n"
+            "  4a:  0fd1                addi    t6,t6,20 # 0x5a\n"
+            "  4c:  0001                nop\n"
+            "  4e:  01f5a223            sw  t6,4(a1)\n"
+            "  52:  00862f83            lw  t6,8(a2)\n"
+            "  56:  8f82                jr  t6\n"
+            % label 2
+            % {allocate,1,1}.
+            "  58:  01462f83            lw  t6,20(a2)\n"
+            "  5c:  1141                addi    sp,sp,-16\n"
+            "  5e:  c006                sw  ra,0(sp)\n"
+            "  60:  c22a                sw  a0,4(sp)\n"
+            "  62:  c42e                sw  a1,8(sp)\n"
+            "  64:  c632                sw  a2,12(sp)\n"
+            "  66:  4605                li  a2,1\n"
+            "  68:  4681                li  a3,0\n"
+            "  6a:  4705                li  a4,1\n"
+            "  6c:  9f82                jalr    t6\n"
+            "  6e:  8faa                mv  t6,a0\n"
+            "  70:  4082                lw  ra,0(sp)\n"
+            "  72:  4512                lw  a0,4(sp)\n"
+            "  74:  45a2                lw  a1,8(sp)\n"
+            "  76:  4632                lw  a2,12(sp)\n"
+            "  78:  0141                addi    sp,sp,16\n"
+            "  7a:  01ff9f13            slli    t5,t6,0x1f\n"
+            "  7e:  000f4763            bltz    t5,0x8c\n"
+            "  82:  01862f83            lw  t6,24(a2)\n"
+            "  86:  08600613            li  a2,134\n"
+            "  8a:  8f82                jr  t6\n"
+            % {init_yregs,{list,[{y,0}]}}.
+            %% move_to_vm_register(State8, ?TERM_NIL, {y_reg, 0}),
+            "  8c:  03b00f13            li  t5,59\n"
+            "  90:  01452f83            lw  t6,20(a0)\n"
+            "  94:  01efa023            sw  t5,0(t6)\n"
+            % {call,1,{f,3}}
+            %% call_or_schedule_next(State9, 3),
+            "  98:  0005af03            lw  t5,0(a1)\n"
+            "  9c:  000f2f03            lw  t5,0(t5)\n"
+            "  a0:  0f62                slli    t5,t5,0x18\n"
+            "  a2:  36800f93            li  t6,872\n"
+            "  a6:  00000013            nop\n"
+            "  aa:  01ff6f33            or  t5,t5,t6\n"
+            "  ae:  05e52e23            sw  t5,92(a0)\n"
+            "  b2:  0085af83            lw  t6,8(a1)\n"
+            "  b6:  1ffd                addi    t6,t6,-1\n"
+            "  b8:  01f5a423            sw  t6,8(a1)\n"
+            "  bc:  000f8663            beqz    t6,0xc8\n"
+            "  c0:  a829                j   0xda\n"
+            "  c2:  0001                nop\n"
+            "  c4:  00000013            nop\n"
+            "  c8:  00000f97            auipc   t6,0x0\n"
+            "  cc:  0fd1                addi    t6,t6,20 # 0xdc\n"
+            "  ce:  0001                nop\n"
+            "  d0:  01f5a223            sw  t6,4(a1)\n"
+            "  d4:  00862f83            lw  t6,8(a2)\n"
+            "  d8:  8f82                jr  t6\n"
+            %% (continuation)
+            % label 3
+            "  da:  00462f83            lw  t6,4(a2)\n"
+            "  de:  8f82                jr  t6\n"
+            % label 0
+            "  e0:  00462f83            lw  t6,4(a2)\n"
+            "  e4:  8f82                jr  t6"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
+dump_to_bin(Dump) ->
+    dump_to_bin0(Dump, addr, []).
+
+-define(IS_HEX_DIGIT(C),
+    ((C >= $0 andalso C =< $9) orelse (C >= $a andalso C =< $f) orelse (C >= $A andalso C =< $F))
+).
+
+dump_to_bin0(<<N, $:, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<N, Tail/binary>>, addr, Acc) when ?IS_HEX_DIGIT(N) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, addr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\s, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+dump_to_bin0(<<$\t, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, hex, Acc);
+%% Handle RISC-V 32-bit instructions (8 consecutive hex digits)
+dump_to_bin0(<<H1, H2, H3, H4, H5, H6, H7, H8, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4) andalso
+        ?IS_HEX_DIGIT(H5) andalso
+        ?IS_HEX_DIGIT(H6) andalso
+        ?IS_HEX_DIGIT(H7) andalso
+        ?IS_HEX_DIGIT(H8)
+->
+    %% RISC-V instructions are 32-bit little-endian
+    Instr = list_to_integer([H1, H2, H3, H4, H5, H6, H7, H8], 16),
+    dump_to_bin0(Rest, instr, [<<Instr:32/little>> | Acc]);
+%% Handle 32-bits undefined instruction (ARM format with space: "1234 5678")
+dump_to_bin0(<<H1, H2, H3, H4, $\s, H5, H6, H7, H8, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4) andalso
+        ?IS_HEX_DIGIT(H5) andalso
+        ?IS_HEX_DIGIT(H6) andalso
+        ?IS_HEX_DIGIT(H7) andalso
+        ?IS_HEX_DIGIT(H8)
+->
+    InstrA = list_to_integer([H1, H2, H3, H4], 16),
+    InstrB = list_to_integer([H5, H6, H7, H8], 16),
+    dump_to_bin0(Rest, instr, [<<InstrB:16/little>>, <<InstrA:16/little>> | Acc]);
+%% Handle 16-bit ARM32 Thumb instructions (4 hex digits)
+dump_to_bin0(<<H1, H2, H3, H4, Sp, Rest/binary>>, hex, Acc) when
+    (Sp =:= $\t orelse Sp =:= $\s) andalso
+        ?IS_HEX_DIGIT(H1) andalso
+        ?IS_HEX_DIGIT(H2) andalso
+        ?IS_HEX_DIGIT(H3) andalso
+        ?IS_HEX_DIGIT(H4)
+->
+    %% Parse 4 hex digits (ARM32 Thumb 16-bit instruction)
+    Instr = list_to_integer([H1, H2, H3, H4], 16),
+    dump_to_bin0(Rest, instr, [<<Instr:16/little>> | Acc]);
+dump_to_bin0(<<$\n, Tail/binary>>, hex, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<$\n, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, addr, Acc);
+dump_to_bin0(<<_Other, Tail/binary>>, instr, Acc) ->
+    dump_to_bin0(Tail, instr, Acc);
+dump_to_bin0(<<>>, _, Acc) ->
+    list_to_binary(lists:reverse(Acc)).
diff --git a/tests/libs/jit/jit_tests.erl b/tests/libs/jit/jit_tests.erl
index 72a356ae3c..56206f7eac 100644
--- a/tests/libs/jit/jit_tests.erl
+++ b/tests/libs/jit/jit_tests.erl
@@ -62,13 +62,73 @@
     <<0, 0, 0, 3, 0, 0, 0, 2, 15, 255, 0, 16>>
 ).
 
-compile_minimal_x86_64_test() ->
+% Code chunk from bool_min2.erl - tests tail-call cache optimization
+% This module has multiple return opcodes which trigger the tail-call cache:
+% - The first return creates a cached implementation
+% - Subsequent returns use jump_to_offset to jump back to the cached code
+-define(CODE_CHUNK_3,
+    <<16#00, 16#00, 16#00, 16#10, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#B2, 16#00,
+        16#00, 16#00, 16#09, 16#00, 16#00, 16#00, 16#03, 16#01, 16#10, 16#99, 16#10, 16#02, 16#12,
+        16#22, 16#00, 16#01, 16#20, 16#0C, 16#10, 16#00, 16#AC, 16#17, 16#10, 16#04, 16#40, 16#32,
+        16#23, 16#40, 16#32, 16#33, 16#40, 16#32, 16#13, 16#40, 16#42, 16#43, 16#40, 16#32, 16#03,
+        16#99, 16#20, 16#04, 16#50, 16#45, 16#04, 16#10, 16#65, 16#40, 16#03, 16#04, 16#40, 16#42,
+        16#23, 16#40, 16#42, 16#33, 16#40, 16#32, 16#13, 16#40, 16#42, 16#43, 16#40, 16#42, 16#03,
+        16#99, 16#30, 16#04, 16#50, 16#45, 16#04, 16#10, 16#65, 16#99, 16#20, 16#7D, 16#05, 16#10,
+        16#00, 16#57, 16#04, 16#10, 16#57, 16#03, 16#10, 16#03, 16#12, 16#10, 16#13, 16#01, 16#30,
+        16#99, 16#40, 16#02, 16#12, 16#72, 16#50, 16#01, 16#40, 16#99, 16#50, 16#0B, 16#05, 16#10,
+        16#03, 16#13, 16#03, 16#0B, 16#05, 16#10, 16#23, 16#33, 16#13, 16#0B, 16#05, 16#20, 16#57,
+        16#03, 16#20, 16#57, 16#13, 16#20, 16#03, 16#0A, 16#05, 16#30, 16#43, 16#13, 16#0B, 16#05,
+        16#20, 16#57, 16#03, 16#20, 16#57, 16#13, 16#20, 16#03, 16#13, 16#01, 16#50, 16#99, 16#60,
+        16#02, 16#12, 16#B2, 16#10, 16#01, 16#60, 16#3B, 16#03, 16#55, 16#17, 16#40, 16#32, 16#85,
+        16#42, 16#75, 16#01, 16#70, 16#40, 16#11, 16#03, 16#13, 16#01, 16#80, 16#40, 16#01, 16#03,
+        16#13, 16#03>>
+).
+-define(ATU8_CHUNK_3,
+    <<16#FF, 16#FF, 16#FF, 16#F5, 16#90, 16#62, 16#6F, 16#6F, 16#6C, 16#5F, 16#6D, 16#69, 16#6E,
+        16#32, 16#50, 16#73, 16#74, 16#61, 16#72, 16#74, 16#50, 16#66, 16#61, 16#6C, 16#73, 16#65,
+        16#40, 16#74, 16#72, 16#75, 16#65, 16#60, 16#65, 16#72, 16#6C, 16#61, 16#6E, 16#67, 16#10,
+        16#2B, 16#10, 16#66, 16#30, 16#61, 16#6E, 16#64, 16#20, 16#6F, 16#72, 16#30, 16#6E, 16#6F,
+        16#74, 16#B0, 16#6F, 16#6E, 16#65, 16#5F, 16#69, 16#66, 16#5F, 16#74, 16#72, 16#75, 16#65>>
+).
+-define(TYPE_CHUNK_3,
+    <<16#00, 16#00, 16#00, 16#03, 16#00, 16#00, 16#00, 16#03, 16#0F, 16#FF, 16#30, 16#20, 16#00,
+        16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00,
+        16#00, 16#01, 16#00, 16#01>>
+).
+-define(LINE_CHUNK_3,
+    <<16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#00, 16#07, 16#00,
+        16#00, 16#00, 16#06, 16#00, 16#00, 16#00, 16#00, 16#41, 16#51, 16#61, 16#81, 16#91, 16#B1>>
+).
+
+-ifdef(JIT_DWARF).
+compile_stream_setup(CodeChunk) ->
+    Stream0 = jit_dwarf:new(jit_x86_64, test_module, jit_stream_binary, 0),
+    <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk,
+    Stream1 = jit_dwarf:append(
+        Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC)
+    ),
+    Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_dwarf, Stream1),
+    {LabelsCount, Stream2}.
+
+compile_stream_finalize(Stream3) ->
+    DwarfStream = jit_x86_64:stream(Stream3),
+    jit_dwarf:stream(DwarfStream).
+-else.
+compile_stream_setup(CodeChunk) ->
     Stream0 = jit_stream_binary:new(0),
-    <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_0,
+    <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = CodeChunk,
     Stream1 = jit_stream_binary:append(
         Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC)
     ),
     Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1),
+    {LabelsCount, Stream2}.
+
+compile_stream_finalize(Stream3) ->
+    jit_x86_64:stream(Stream3).
+-endif.
+
+compile_minimal_x86_64_test() ->
+    {LabelsCount, Stream2} = compile_stream_setup(?CODE_CHUNK_0),
     {_LabelsCount, Stream3} = jit:compile(
         ?CODE_CHUNK_0,
         fun(_) -> undefined end,
@@ -77,7 +137,7 @@ compile_minimal_x86_64_test() ->
         jit_x86_64,
         Stream2
     ),
-    Stream4 = jit_x86_64:stream(Stream3),
+    Stream4 = compile_stream_finalize(Stream3),
     <<16:32, LabelsCount:32, ?JIT_FORMAT_VERSION:16, 1:16, ?JIT_ARCH_X86_64:16, ?JIT_VARIANT_PIC:16,
         0:32, Code/binary>> = Stream4,
     {JumpTable, _} = split_binary(Code, (LabelsCount + 1) * 5),
@@ -105,24 +165,27 @@ check_labels_table0(N, <<N:16, _Offset:32, Rest/binary>>) -> check_labels_table0
 
 check_lines_table(<<LinesCount:16, _Lines:(LinesCount * 6)/binary>>) -> ok.
 
-term_to_int_verify_is_match_state_typed_optimization_x86_64_test() ->
-    % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization
-    Stream0 = jit_stream_binary:new(0),
-    <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_1,
-    Stream1 = jit_stream_binary:append(
-        Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC)
-    ),
-    Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1),
+backend_to_arch(jit_x86_64) -> ?JIT_ARCH_X86_64;
+backend_to_arch(jit_aarch64) -> ?JIT_ARCH_AARCH64;
+backend_to_arch(jit_armv6m) -> ?JIT_ARCH_ARMV6M.
+
+compile_stream_for_backend(Backend, CodeChunk, AtomChunk, TypeChunk) ->
+    {LabelsCount, Stream2} = compile_stream_setup(CodeChunk),
 
-    AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_1),
+    AtomResolver = jit_precompile:atom_resolver(AtomChunk),
     LiteralResolver = fun(_) -> test_literal end,
-    TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_1),
+    TypeResolver = jit_precompile:type_resolver(TypeChunk),
 
     % Compile with typed register support
-    {_LabelsCount, Stream3} = jit:compile(
-        ?CODE_CHUNK_1, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2
+    {LabelsCount, Stream3} = jit:compile(
+        CodeChunk, AtomResolver, LiteralResolver, TypeResolver, Backend, Stream2
+    ),
+    compile_stream_finalize(Stream3).
+
+term_to_int_verify_is_match_state_typed_optimization_x86_64_test() ->
+    CompiledCode = compile_stream_for_backend(
+        jit_x86_64, ?CODE_CHUNK_1, ?ATU8_CHUNK_1, ?TYPE_CHUNK_1
     ),
-    CompiledCode = jit_x86_64:stream(Stream3),
 
     % Check the reading of x[1] is immediatly followed by a shift right.
     % 15c:	4c 8b 5f 38          	mov    0x38(%rdi),%r11
@@ -183,23 +246,9 @@ term_to_int_verify_is_match_state_typed_optimization_x86_64_test() ->
     ok.
 
 verify_is_function_typed_optimization_x86_64_test() ->
-    % Compile CODE_CHUNK_1 which contains a typed register for term_to_int optimization
-    Stream0 = jit_stream_binary:new(0),
-    <<16:32, 0:32, _OpcodeMax:32, LabelsCount:32, _FunctionsCount:32, _Opcodes/binary>> = ?CODE_CHUNK_2,
-    Stream1 = jit_stream_binary:append(
-        Stream0, jit:beam_chunk_header(LabelsCount, ?JIT_ARCH_X86_64, ?JIT_VARIANT_PIC)
-    ),
-    Stream2 = jit_x86_64:new(?JIT_VARIANT_PIC, jit_stream_binary, Stream1),
-
-    AtomResolver = jit_precompile:atom_resolver(?ATU8_CHUNK_2),
-    LiteralResolver = fun(_) -> test_literal end,
-    TypeResolver = jit_precompile:type_resolver(?TYPE_CHUNK_2),
-
-    % Compile with typed register support
-    {_LabelsCount, Stream3} = jit:compile(
-        ?CODE_CHUNK_2, AtomResolver, LiteralResolver, TypeResolver, jit_x86_64, Stream2
+    CompiledCode = compile_stream_for_backend(
+        jit_x86_64, ?CODE_CHUNK_2, ?ATU8_CHUNK_2, ?TYPE_CHUNK_2
     ),
-    CompiledCode = jit_x86_64:stream(Stream3),
 
     % Check that call to allocate is directly followed by the building the cp
     % for call
@@ -250,3 +299,36 @@ verify_is_function_typed_optimization_x86_64_test() ->
         )
     ),
     ok.
+
+tail_call_cache_armv6m_test() ->
+    CompiledCode = compile_stream_for_backend(
+        jit_armv6m, ?CODE_CHUNK_3, ?ATU8_CHUNK_3, ?TYPE_CHUNK_3
+    ),
+
+    % Check that we have the following pattern:
+    %  8c:	278c      	movs	r7, #140	@ 0x8c
+    %  8e:	6816      	ldr	r6, [r2, #0]
+    %  90:	463a      	mov	r2, r7
+    %  92:	4b02      	ldr	r3, [pc, #8]	@ (0x9c)
+    %  94:	9f05      	ldr	r7, [sp, #20]
+    %  96:	9605      	str	r6, [sp, #20]
+    %  98:	46be      	mov	lr, r7
+
+    % Check for the first return implementation (call_primitive_last for PRIM_RETURN)
+    ?assertMatch(
+        {_, _},
+        binary:match(
+            CompiledCode,
+            <<16#278c:16/little, 16#6816:16/little, 16#463a:16/little, 16#4b02:16/little,
+                16#9f05:16/little, 16#9605:16/little, 16#46be:16/little>>
+        )
+    ),
+
+    % Check for tail-call cache jump: ldr r7, [pc, #0] followed by b.n (backward branch)
+    %   29c:	4f00      	ldr	r7, [pc, #0]	@ (0x2a0)
+    %   29e:	e6f5      	b.n	0x8c
+    ?assertMatch(
+        {_, _},
+        binary:match(CompiledCode, <<16#4f00:16/little, 16#e6f5:16/little>>)
+    ),
+    ok.
diff --git a/tests/libs/jit/jit_tests_common.erl b/tests/libs/jit/jit_tests_common.erl
index cfabfcf15f..cf989e746d 100644
--- a/tests/libs/jit/jit_tests_common.erl
+++ b/tests/libs/jit/jit_tests_common.erl
@@ -77,6 +77,8 @@ asm(Arch, Bin, Str) ->
 find_binutils(Arch) ->
     ArchStr = atom_to_list(Arch),
     BinutilsList = [
+        {ArchStr ++ "-esp-elf-as", ArchStr ++ "-esp-elf-objdump"},
+        {ArchStr ++ "-unknown-elf-as", ArchStr ++ "-unknown-elf-objdump"},
         {ArchStr ++ "-elf-as", ArchStr ++ "-elf-objdump"},
         {ArchStr ++ "-none-eabi-as", ArchStr ++ "-none-eabi-objdump"},
         {ArchStr ++ "-linux-gnu-as", ArchStr ++ "-linux-gnu-objdump"}
@@ -104,6 +106,8 @@ get_asm_header(arm) ->
 get_asm_header(aarch64) ->
     ".text\n";
 get_asm_header(x86_64) ->
+    ".text\n";
+get_asm_header(riscv32) ->
     ".text\n".
 
 %% Get architecture-specific assembler flags
@@ -113,7 +117,9 @@ get_as_flags(arm) ->
 get_as_flags(aarch64) ->
     "";
 get_as_flags(x86_64) ->
-    "--64".
+    "--64";
+get_as_flags(riscv32) ->
+    "-march=rv32imac".
 
 %% Parse objdump output lines and extract binary data
 -spec asm_lines([binary()], binary(), atom()) -> binary().
diff --git a/tests/libs/jit/jit_x86_64_tests.erl b/tests/libs/jit/jit_x86_64_tests.erl
index abdb0d6773..9aa86b6427 100644
--- a/tests/libs/jit/jit_x86_64_tests.erl
+++ b/tests/libs/jit/jit_x86_64_tests.erl
@@ -820,17 +820,35 @@ if_else_block_test() ->
         >>,
     ?assertEqual(dump_to_bin(Dump), Stream).
 
-shift_right_test() ->
-    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
-    {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
-    State2 = ?BACKEND:shift_right(State1, Reg, 3),
-    Stream = ?BACKEND:stream(State2),
-    Dump =
-        <<
-            "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
-            "   4:	48 c1 e8 03          	shr    $0x3,%rax"
-        >>,
-    ?assertEqual(dump_to_bin(Dump), Stream).
+shift_right_test_() ->
+    [
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, Reg} = ?BACKEND:shift_right(State1, {free, Reg}, 3),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                    "   4:	48 c1 e8 03          	shr    $0x3,%rax"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end),
+        ?_test(begin
+            State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+            {State1, Reg} = ?BACKEND:move_to_native_register(State0, {x_reg, 0}),
+            {State2, OtherReg} = ?BACKEND:shift_right(State1, Reg, 3),
+            ?assertNotEqual(OtherReg, Reg),
+            Stream = ?BACKEND:stream(State2),
+            Dump =
+                <<
+                    "   0:	48 8b 47 30          	mov    0x30(%rdi),%rax\n"
+                    "   4:	49 89 c3             	mov    %rax,%r11\n"
+                    "   7:	49 c1 eb 03          	shr    $0x3,%r11"
+                >>,
+            ?assertEqual(dump_to_bin(Dump), Stream)
+        end)
+    ].
 
 shift_left_test() ->
     State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
@@ -1559,6 +1577,21 @@ move_to_array_element_test_() ->
             ]
         end}.
 
+%% Test jump_to_continuation optimization for intra-module returns
+jump_to_continuation_test() ->
+    State0 = ?BACKEND:new(?JIT_VARIANT_PIC, jit_stream_binary, jit_stream_binary:new(0)),
+    State1 = ?BACKEND:jump_to_continuation(State0, {free, rax}),
+    Stream = ?BACKEND:stream(State1),
+    % Expected: leaq -0x7(%rip), %rax; addq %rax, %rax; jmpq *%rax
+    % With default offset 0, NetOffset = 0 - 0 = 0, but RIP-relative needs adjustment for instruction length
+    Dump =
+        <<
+            "   0:	48 8d 05 f9 ff ff ff 	lea    -0x7(%rip),%rax\n"
+            "   7:	48 01 c0             	add    %rax,%rax\n"
+            "   a:	ff e0                	jmpq   *%rax"
+        >>,
+    ?assertEqual(dump_to_bin(Dump), Stream).
+
 dump_to_bin(Dump) ->
     dump_to_bin0(Dump, addr, []).
 
diff --git a/tests/libs/jit/tests.erl b/tests/libs/jit/tests.erl
index a435ab17e0..5411862592 100644
--- a/tests/libs/jit/tests.erl
+++ b/tests/libs/jit/tests.erl
@@ -27,8 +27,13 @@
 start() ->
     etest:test([
         jit_tests,
+        jit_dwarf_tests,
         jit_aarch64_tests,
         jit_aarch64_asm_tests,
+        jit_armv6m_tests,
+        jit_armv6m_asm_tests,
+        jit_riscv32_tests,
+        jit_riscv32_asm_tests,
         jit_x86_64_tests,
         jit_x86_64_asm_tests
     ]).
diff --git a/tests/test.c b/tests/test.c
index 79aa2ec121..5db01bfff0 100644
--- a/tests/test.c
+++ b/tests/test.c
@@ -708,6 +708,16 @@ int test_modules_execution(bool beam, bool skip, int count, char **item)
             perror("Error: cannot find aarch64 directory");
             return EXIT_FAILURE;
         }
+#elif JIT_ARCH_TARGET == JIT_ARCH_ARMV6M
+        if (chdir("armv6m") != 0) {
+            perror("Error: cannot find armv6m directory");
+            return EXIT_FAILURE;
+        }
+#elif JIT_ARCH_TARGET == JIT_ARCH_RISCV32
+        if (chdir("riscv32") != 0) {
+            perror("Error: cannot find riscv32 directory");
+            return EXIT_FAILURE;
+        }
 #else
 #error Unknown JIT target
 #endif