From f570e025938fbdbc2b3c20d9648f2086bc06d157 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 7 May 2025 19:44:22 +0200 Subject: [PATCH 01/12] Intepreter to JIT/AOT calls This change adds support for making calls from the interpreter to JIT/AOT generated code. For each target method, it parses the signature and creates a list of hand written asm routines that transfer the arguments from the interpreter stack to the CPU registers / stack based on the native calling convention, call the target method and then places the return value to the interpreter stack. This list is cached in the MethodDescData so that for repeated calls to the same method, it doesn't need to be re-generated. --- src/coreclr/vm/CMakeLists.txt | 2 + src/coreclr/vm/amd64/AsmHelpers.asm | 261 ++++++++ src/coreclr/vm/amd64/asmhelpers.S | 725 ++++++++++++++++++++ src/coreclr/vm/arm64/asmhelpers.S | 671 +++++++++++++++++++ src/coreclr/vm/arm64/asmhelpers.asm | 671 +++++++++++++++++++ src/coreclr/vm/callstubgenerator.cpp | 952 +++++++++++++++++++++++++++ src/coreclr/vm/callstubgenerator.h | 44 ++ src/coreclr/vm/interpexec.cpp | 41 +- src/coreclr/vm/method.cpp | 26 + src/coreclr/vm/method.hpp | 12 +- 10 files changed, 3401 insertions(+), 4 deletions(-) create mode 100644 src/coreclr/vm/callstubgenerator.cpp create mode 100644 src/coreclr/vm/callstubgenerator.h diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt index c3eb6e1c8bb778..54c8ebd5dcb1bc 100644 --- a/src/coreclr/vm/CMakeLists.txt +++ b/src/coreclr/vm/CMakeLists.txt @@ -304,6 +304,7 @@ set(VM_SOURCES_WKS callconvbuilder.cpp callhelpers.cpp callsiteinspect.cpp + callstubgenerator.cpp clrconfignative.cpp clrex.cpp clrvarargs.cpp @@ -449,6 +450,7 @@ set(VM_HEADERS_WKS interoputil.inl interpexec.h interpframeallocator.h + callstubgenerator.h invokeutil.h managedmdimport.hpp marshalnative.h diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index b80a2e51c69809..29fbe4a2d465a6 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -628,5 +628,266 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFilterFunclet, _TEXT +LEAF_ENTRY Load_Stack, _TEXT + push rdi + push rsi + push rcx + mov edi, dword ptr [r11 + 8] ; SP offset + mov ecx, dword ptr [r11 + 12] ; number of stack slots + add edi, 20h ; the 3 pushes above plus return address + add rdi, rsp + mov rsi, r10 + shr rcx, 3 + rep movsq + mov r10, rsi + pop rcx + pop rsi + pop rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Stack, _TEXT + +LEAF_ENTRY Load_Ref_RCX, _TEXT + mov rcx, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RCX, _TEXT + +LEAF_ENTRY Load_Ref_RDX, _TEXT + mov rdx, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RDX, _TEXT + +LEAF_ENTRY Load_Ref_R8, _TEXT + mov r8, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_R8, _TEXT + +LEAF_ENTRY Load_Ref_R9, _TEXT + mov r9, r10 + add r10, [r11 + 8] ; size of the value type + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_R9, _TEXT + +LEAF_ENTRY Load_RCX, _TEXT + mov rcx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX, _TEXT + +LEAF_ENTRY Load_RCX_RDX, _TEXT + mov rcx, [r10] + mov rdx, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_RDX, _TEXT + +LEAF_ENTRY Load_RCX_RDX_R8, _TEXT + mov rcx, [r10] + mov rdx, [r10 + 8] + mov r8, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_RDX_R8, _TEXT + +LEAF_ENTRY Load_RCX_RDX_R8_R9, _TEXT + mov rcx, [r10] + mov rdx, [r10 + 8] + mov r8, [r10 + 16] + mov r9, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_RDX_R8_R9, _TEXT + +LEAF_ENTRY Load_RDX, _TEXT + mov rdx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX, _TEXT + +LEAF_ENTRY Load_RDX_R8, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8, _TEXT + +LEAF_ENTRY Load_RDX_R8_R9, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + mov r9, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8_R9, _TEXT + +LEAF_ENTRY Load_R8, _TEXT + mov r8, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8, _TEXT + +LEAF_ENTRY Load_R8_R9, _TEXT + mov r8, [r10] + mov r9, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8_R9, _TEXT + +LEAF_ENTRY Load_R9, _TEXT + mov r9, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R9, _TEXT + +LEAF_ENTRY Load_XMM0, _TEXT + movsd xmm0, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + add r10, 10h + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM1, _TEXT + movsd xmm1, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM2, _TEXT + movsd xmm2, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM3, _TEXT + movsd xmm3, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3, _TEXT + +NESTED_ENTRY CallJittedMethodRetVoid, _TEXT + push_vol_reg rbp + mov rbp, rsp +END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + call qword ptr [r11] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetVoid, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuff, _TEXT + push_vol_reg rbp + mov rbp, rsp +END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + mov rcx, r8 ; return buffer + call qword ptr [r11] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetBuff, _TEXT + +NESTED_ENTRY CallJittedMethodRetDouble, _TEXT + push_vol_reg r8 + alloc_stack 20h +END_PROLOGUE + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + call qword ptr [r11] + add rsp, 20h + pop r8 + movsd real8 ptr [r8], xmm0 + ret +NESTED_END CallJittedMethodRetDouble, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8, _TEXT + push_vol_reg r8 + alloc_stack 20h +END_PROLOGUE + mov r11, rcx ; The routines list + mov r10, rdx ; interpreter stack args + call qword ptr [r11] + add rsp, 20h + pop r8 + mov qword ptr [r8], rax + ret +NESTED_END CallJittedMethodRetI8, _TEXT end diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index c90c535a493840..87528c031f9776 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -452,3 +452,728 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFilterFunclet, _TEXT +LEAF_ENTRY Load_Stack, _TEXT + push rdi + push rsi + push rcx + mov edi, dword ptr [r11 + 8] // SP offset + mov ecx, dword ptr [r11 + 12] // number of stack slots + add edi, 0x20 // the 3 pushes above plus return address + add rdi, rsp + mov rsi, r10 + shr rcx, 3 + rep movsq + mov r10, rsi + pop rcx + pop rsi + pop rdi + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Stack, _TEXT + +LEAF_ENTRY Load_Ref_RDI, _TEXT + mov rdi, r10 + add r10, [r11 + 8] + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RDI, _TEXT + +LEAF_ENTRY Load_Ref_RSI, _TEXT + mov rsi, r10 + add r10, [r11 + 8] + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RSI, _TEXT + +LEAF_ENTRY Load_Ref_RDX, _TEXT + mov rdx, r10 + add r10, [r11 + 8] + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RDX, _TEXT + +LEAF_ENTRY Load_Ref_RCX, _TEXT + mov rcx, r10 + add r10, [r11 + 8] + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_RCX, _TEXT + +LEAF_ENTRY Load_Ref_R8, _TEXT + mov r8, r10 + add r10, [r11 + 8] + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_R8, _TEXT + +LEAF_ENTRY Load_Ref_R9, _TEXT + mov r9, r10 + add r10, [r11 + 8] + add r11, 16 + jmp qword ptr [r11] +LEAF_END Load_Ref_R9, _TEXT + +LEAF_ENTRY Load_RDI, _TEXT + mov rdi, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI, _TEXT + +LEAF_ENTRY Load_RDI_RSI, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX_RCX, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + mov rcx, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX_RCX, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX_RCX_R8, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + mov rcx, [r10 + 24] + mov r8, [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Load_RDI_RSI_RDX_RCX_R8_R9, _TEXT + mov rdi, [r10] + mov rsi, [r10 + 8] + mov rdx, [r10 + 16] + mov rcx, [r10 + 24] + mov r8, [r10 + 32] + mov r9, [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDI_RSI_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RSI, _TEXT + mov rsi, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI, _TEXT + +LEAF_ENTRY Load_RSI_RDX, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX, _TEXT + +LEAF_ENTRY Load_RSI_RDX_RCX, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + mov rcx, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX_RCX, _TEXT + +LEAF_ENTRY Load_RSI_RDX_RCX_R8, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + mov rcx, [r10 + 16] + mov r8, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Load_RSI_RDX_RCX_R8_R9, _TEXT + mov rsi, [r10] + mov rdx, [r10 + 8] + mov rcx, [r10 + 16] + mov r8, [r10 + 24] + mov r9, [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RSI_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RDX, _TEXT + mov rdx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX, _TEXT + +LEAF_ENTRY Load_RDX_RCX, _TEXT + mov rdx, [r10] + mov rcx, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_RCX, _TEXT + +LEAF_ENTRY Load_RDX_RCX_R8, _TEXT + mov rdx, [r10] + mov rcx, [r10 + 8] + mov r8, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_RCX_R8, _TEXT + +LEAF_ENTRY Load_RDX_RCX_R8_R9, _TEXT + mov rdx, [r10] + mov rcx, [r10 + 8] + mov r8, [r10 + 16] + mov r9, [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RCX, _TEXT + mov rcx, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX, _TEXT + +LEAF_ENTRY Load_RCX_R8, _TEXT + mov rcx, [r10] + mov r8, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_R8, _TEXT + +LEAF_ENTRY Load_RCX_R8_R9, _TEXT + mov rcx, [r10] + mov r8, [r10 + 8] + mov r9, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RCX_R8_R9, _TEXT + +LEAF_ENTRY Load_RDX_R8, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8, _TEXT + +LEAF_ENTRY Load_RDX_R8_R9, _TEXT + mov rdx, [r10] + mov r8, [r10 + 8] + mov r9, [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_RDX_R8_R9, _TEXT + +LEAF_ENTRY Load_R8, _TEXT + mov r8, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8, _TEXT + +LEAF_ENTRY Load_R8_R9, _TEXT + mov r8, [r10] + mov r9, [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R8_R9, _TEXT + +LEAF_ENTRY Load_R9, _TEXT + mov r9, [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_R9, _TEXT + +LEAF_ENTRY Load_XMM0, _TEXT + movsd xmm0, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + movsd xmm5, real8 ptr [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + movsd xmm5, real8 ptr [r10 + 40] + movsd xmm6, real8 ptr [r10 + 48] + add r10, 56 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm0, real8 ptr [r10] + movsd xmm1, real8 ptr [r10 + 8] + movsd xmm2, real8 ptr [r10 + 16] + movsd xmm3, real8 ptr [r10 + 24] + movsd xmm4, real8 ptr [r10 + 32] + movsd xmm5, real8 ptr [r10 + 40] + movsd xmm6, real8 ptr [r10 + 48] + movsd xmm7, real8 ptr [r10 + 56] + add r10, 64 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM1, _TEXT + movsd xmm1, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + movsd xmm5, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + movsd xmm5, real8 ptr [r10 + 32] + movsd xmm6, real8 ptr [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm1, real8 ptr [r10] + movsd xmm2, real8 ptr [r10 + 8] + movsd xmm3, real8 ptr [r10 + 16] + movsd xmm4, real8 ptr [r10 + 24] + movsd xmm5, real8 ptr [r10 + 32] + movsd xmm6, real8 ptr [r10 + 40] + movsd xmm7, real8 ptr [r10 + 48] + add r10, 56 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM2, _TEXT + movsd xmm2, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4_XMM5, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + movsd xmm5, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + movsd xmm5, real8 ptr [r10 + 24] + movsd xmm6, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm2, real8 ptr [r10] + movsd xmm3, real8 ptr [r10 + 8] + movsd xmm4, real8 ptr [r10 + 16] + movsd xmm5, real8 ptr [r10 + 24] + movsd xmm6, real8 ptr [r10 + 32] + movsd xmm7, real8 ptr [r10 + 40] + add r10, 48 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM3, _TEXT + movsd xmm3, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4_XMM5, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + movsd xmm5, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4_XMM5_XMM6, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + movsd xmm5, real8 ptr [r10 + 16] + movsd xmm6, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm3, real8 ptr [r10] + movsd xmm4, real8 ptr [r10 + 8] + movsd xmm5, real8 ptr [r10 + 16] + movsd xmm6, real8 ptr [r10 + 24] + movsd xmm7, real8 ptr [r10 + 32] + add r10, 40 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM3_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM4, _TEXT + movsd xmm4, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4, _TEXT + +LEAF_ENTRY Load_XMM4_XMM5, _TEXT + movsd xmm4, real8 ptr [r10] + movsd xmm5, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4_XMM5, _TEXT + +LEAF_ENTRY Load_XMM4_XMM5_XMM6, _TEXT + movsd xmm4, real8 ptr [r10] + movsd xmm5, real8 ptr [r10 + 8] + movsd xmm6, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM4_XMM5_XMM6_XMM7, _TEXT + movsd xmm4, real8 ptr [r10] + movsd xmm5, real8 ptr [r10 + 8] + movsd xmm6, real8 ptr [r10 + 16] + movsd xmm7, real8 ptr [r10 + 24] + add r10, 32 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM4_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM5, _TEXT + movsd xmm5, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM5, _TEXT + +LEAF_ENTRY Load_XMM5_XMM6, _TEXT + movsd xmm5, real8 ptr [r10] + movsd xmm6, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM5_XMM6, _TEXT + +LEAF_ENTRY Load_XMM5_XMM6_XMM7, _TEXT + movsd xmm5, real8 ptr [r10] + movsd xmm6, real8 ptr [r10 + 8] + movsd xmm7, real8 ptr [r10 + 16] + add r10, 24 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM5_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM6, _TEXT + movsd xmm6, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM6, _TEXT + +LEAF_ENTRY Load_XMM6_XMM7, _TEXT + movsd xmm6, real8 ptr [r10] + movsd xmm7, real8 ptr [r10 + 8] + add r10, 16 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM6_XMM7, _TEXT + +LEAF_ENTRY Load_XMM7, _TEXT + movsd xmm7, real8 ptr [r10] + add r10, 8 + add r11, 8 + jmp qword ptr [r11] +LEAF_END Load_XMM7, _TEXT + + +NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler +// TODO: decide whether to create RBP frame everywhere or not + push_nonvol_reg rbp + mov rbp, rsp + alloc_stack 0x10 + save_reg_postrsp r10, 0 +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov r10, [rsp] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetVoid, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + alloc_stack 0x10 + save_reg_postrsp r10, 0 +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + mov rdi, rdx // return buffer + call qword ptr [r11] + mov r10, [rsp] + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetBuff, _TEXT + +NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + //pop rdx + mov rdx, [rbp - 8] + movsd real8 ptr [rdx], xmm0 + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetDouble, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov rdx, [rbp - 8] + mov qword ptr [rdx], rax + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetI8, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8I8, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] + mov rcx, [rbp - 8] + mov qword ptr [rcx], rax + mov qword ptr [rcx + 8], rdx + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetI8I8, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8Double, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + mov rcx, [rbp - 8] + mov qword ptr [rcx], rax + movsd real8 ptr [rcx + 8], xmm0 + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetI8Double, _TEXT + +NESTED_ENTRY CallJittedMethodRetDoubleI8, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + mov rcx, [rbp - 8] + movsd real8 ptr [rcx], xmm0 + mov qword ptr [rcx + 8], rax + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetDoubleI8, _TEXT + +NESTED_ENTRY CallJittedMethodRetDoubleDouble, _TEXT, NoHandler + push_nonvol_reg rbp + mov rbp, rsp + push_register rdx + push_register rax // align +END_PROLOGUE + mov rcx, [rbp - 8] + movsd real8 ptr [rcx], xmm0 + movsd real8 ptr [rcx + 8], xmm1 + mov rsp, rbp + pop rbp + ret +NESTED_END CallJittedMethodRetDoubleDouble, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index f974a29352bbf7..adadab377f4743 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -869,3 +869,674 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT mov x1, x11 // Move temp register to first arg register for static method with return buffer EPILOG_BRANCH_REG x12 LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT + +LEAF_ENTRY Load_Stack + ldr w14, [x10, #4]! // SP offset + ldr w12, [x10, #4]! // number of stack slots + add x14, sp, x14 +LOCAL_LABEL(CopyLoop): + ldr x13, [x9], #8 + str x13, [x14], #8 + subs x12, x12, #8 + bne LOCAL_LABEL(CopyLoop) + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Stack + +LEAF_ENTRY Load_Ref_X0 + mov x0, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0 + +LEAF_ENTRY Load_Ref_X1 + mov x1, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1 + +LEAF_ENTRY Load_Ref_X2 + mov x2, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Ref_X2 + +LEAF_ENTRY Load_Ref_X3 + mov x3, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Ref_X3 + +LEAF_ENTRY Load_Ref_X4 + mov x4, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Ref_X4 + +LEAF_ENTRY Load_Ref_X5 + mov x5, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Ref_X5 + +LEAF_ENTRY Load_Ref_X6 + mov x6, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Ref_X6 + +LEAF_ENTRY Load_Ref_X7 + mov x7, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Ref_X7 + +LEAF_ENTRY Load_X0 + ldr x0, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0 + +LEAF_ENTRY Load_X0_X1 + ldp x0, x1, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0_X1 + +LEAF_ENTRY Load_X0_X1_X2 + ldp x0, x1, [x9], #16 + ldr x2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0_X1_X2 + +LEAF_ENTRY Load_X0_X1_X2_X3 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0_X1_X2_X3 + +LEAF_ENTRY Load_X0_X1_X2_X3_X4 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldr x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0_X1_X2_X3_X4 + +LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0_X1_X2_X3_X4_X5 + +LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0_X1_X2_X3_X4_X5_X6 + +LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6_X7 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X0_X1_X2_X3_X4_X5_X6_X7 + +LEAF_ENTRY Load_X1 + ldr x1, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1 + +LEAF_ENTRY Load_X1_X2 + ldp x1, x2, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1_X2 + +LEAF_ENTRY Load_X1_X2_X3 + ldp x1, x2, [x9], #16 + ldr x3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1_X2_X3 + +LEAF_ENTRY Load_X1_X2_X3_X4 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1_X2_X3_X4 + +LEAF_ENTRY Load_X1_X2_X3_X4_X5 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldr x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1_X2_X3_X4_X5 + +LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1_X2_X3_X4_X5_X6 + +LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6_X7 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X1_X2_X3_X4_X5_X6_X7 + +LEAF_ENTRY Load_X2 + ldr x2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X2 +LEAF_ENTRY Load_X2_X3 + ldp x2, x3, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X2_X3 + +LEAF_ENTRY Load_X2_X3_X4 + ldp x2, x3, [x9], #16 + ldr x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X2_X3_X4 + +LEAF_ENTRY Load_X2_X3_X4_X5 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X2_X3_X4_X5 + +LEAF_ENTRY Load_X2_X3_X4_X5_X6 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X2_X3_X4_X5_X6 + +LEAF_ENTRY Load_X2_X3_X4_X5_X6_X7 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X2_X3_X4_X5_X6_X7 + +LEAF_ENTRY Load_X3 + ldr x3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X3 +LEAF_ENTRY Load_X3_X4 + ldp x3, x4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X3_X4 + +LEAF_ENTRY Load_X3_X4_X5 + ldp x3, x4, [x9], #16 + ldr x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X3_X4_X5 + +LEAF_ENTRY Load_X3_X4_X5_X6 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X3_X4_X5_X6 + +LEAF_ENTRY Load_X3_X4_X5_X6_X7 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X3_X4_X5_X6_X7 + +LEAF_ENTRY Load_X4 + ldr x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X4 + +LEAF_ENTRY Load_X4_X5 + ldp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X4_X5 + +LEAF_ENTRY Load_X4_X5_X6 + ldp x4, x5, [x9], #16 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X4_X5_X6 + +LEAF_ENTRY Load_X4_X5_X6_X7 + ldp x4, x5, [x9], #16 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X4_X5_X6_X7 + +LEAF_ENTRY Load_X5 + ldr x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X5 + +LEAF_ENTRY Load_X5_X6 + ldp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X5_X6 + +LEAF_ENTRY Load_X5_X6_X7 + ldp x5, x6, [x9], #16 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X5_X6_X7 + +LEAF_ENTRY Load_X6 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X6 + +LEAF_ENTRY Load_X6_X7 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X6_X7 + +LEAF_ENTRY Load_X7 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_X7 + +LEAF_ENTRY Load_D0 + ldr d0, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0 + +LEAF_ENTRY Load_D1 + ldr d1, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D1 + +LEAF_ENTRY Load_D0_D1 + ldp d0, d1, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1 + +LEAF_ENTRY Load_D0_D1_D2 + ldr d0, [x9], #8 +ALTERNATE_ENTRY Load_D1_D2 + ldr d1, [x9], #8 +ALTERNATE_ENTRY Load_D2 + ldr d2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2 + +LEAF_ENTRY Load_D0_D1_D2_D3 + ldr d0, [x9], #8 +ALTERNATE_ENTRY Load_D1_D2_D3 + ldr d1, [x9], #8 +ALTERNATE_ENTRY Load_D2_D3 + ldr d2, [x9], #8 +ALTERNATE_ENTRY Load_D3 + ldr d3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2_D3 + +LEAF_ENTRY Load_D0_D1_D2_D3_D4 + ldr d0, [x9], #8 +ALTERNATE_ENTRY Load_D1_D2_D3_D4 + ldr d1, [x9], #8 +ALTERNATE_ENTRY Load_D2_D3_D4 + ldr d2, [x9], #8 +ALTERNATE_ENTRY Load_D3_D4 + ldr d3, [x9], #8 +ALTERNATE_ENTRY Load_D4 + ldr d4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2_D3_D4 + +LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5 + ldr d0, [x9], #8 +ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5 + ldr d1, [x9], #8 +ALTERNATE_ENTRY Load_D2_D3_D4_D5 + ldr d2, [x9], #8 +ALTERNATE_ENTRY Load_D3_D4_D5 + ldr d3, [x9], #8 +ALTERNATE_ENTRY Load_D4_D5 + ldr d4, [x9], #8 +ALTERNATE_ENTRY Load_D5 + ldr d5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2_D3_D4_D5 + +LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6 + ldr d0, [x9], #8 +ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5_D6 + ldr d1, [x9], #8 +ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6 + ldr d2, [x9], #8 +ALTERNATE_ENTRY Load_D3_D4_D5_D6 + ldr d3, [x9], #8 +ALTERNATE_ENTRY Load_D4_D5_D6 + ldr d4, [x9], #8 +ALTERNATE_ENTRY Load_D5_D6 + ldr d5, [x9], #8 +ALTERNATE_ENTRY Load_D6 + ldr d6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2_D3_D4_D5_D6 +/* +LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 + ldr d0, [x9], #8 +ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5_D6_D7 + ldr d1, [x9], #8 +ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 + ldr d2, [x9], #8 +ALTERNATE_ENTRY Load_D3_D4_D5_D6_D7 + ldr d3, [x9], #8 +ALTERNATE_ENTRY Load_D4_D5_D6_D7 + ldr d4, [x9], #8 +ALTERNATE_ENTRY Load_D5_D6_D7 + ldr d5, [x9], #8 +ALTERNATE_ENTRY Load_D6_D7 + ldr d6, [x9], #8 +ALTERNATE_ENTRY Load_D7 + ldr d7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 +*/ +LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 + ldp d0, d1, [x9], #16 +ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 + ldp d2, d3, [x9], #16 +ALTERNATE_ENTRY Load_D4_D5_D6_D7 + ldp d4, d5, [x9], #16 +ALTERNATE_ENTRY Load_D6_D7 + ldp d6, d7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 + +LEAF_ENTRY Load_D1_D2_D3_D4_D5_D6_D7 + ldp d1, d2, [x9], #16 +ALTERNATE_ENTRY Load_D3_D4_D5_D6_D7 + ldp d3, d4, [x9], #16 +ALTERNATE_ENTRY Load_D5_D6_D7 + ldp d5, d6, [x9], #16 +ALTERNATE_ENTRY Load_D7 + ldr d7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -16 + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + ret lr +NESTED_END CallJittedMethodRetVoid, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -16 + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + mov x8, x2 + ldr x11, [x10], #8 + blr x11 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + ret lr +NESTED_END CallJittedMethodRetBuff, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + str x0, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRetI8, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet2I8, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp x0, x1, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRet2I8, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + str d0, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRetDouble, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet2Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp d0, d1, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRet2Double, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet3Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp d0, d1, [x2], #16 + str d2, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRet3Double, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet4Double, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp d0, d1, [x2], #16 + stp d2, d3, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRet4Double, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + str s0, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRetFloat, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet2Float, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp s0, s1, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRet2Float, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet3Float, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp s0, s1, [x2], #8 + str s2, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRet3Float, _TEXT + +// X0 - routines array +// X1 - interpreter stack args location +// X2 - interpreter stack return value location +// X3 - stack arguments size (properly aligned) +NESTED_ENTRY CallJittedMethodRet4Float, _TEXT, NoHandler + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp s0, s1, [x2], #8 + stp s2, s3, [x2] + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + ret lr +NESTED_END CallJittedMethodRet4Float, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 9d57250de9953c..b15a1017f880a5 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1253,5 +1253,676 @@ JIT_PollGCRarePath EPILOG_BRANCH_REG x12 LEAF_END + LEAF_ENTRY Load_Stack + ldr w14, [x10, #4]! // SP offset + ldr w12, [x10, #4]! // number of stack slots + add x14, sp, x14 +CopyLoop + ldr x13, [x9], #8 + str x13, [x14], #8 + subs x12, x12, #8 + bne CopyLoop + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Stack + + LEAF_ENTRY Load_Ref_X0 + mov x0, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0 + + LEAF_ENTRY Load_Ref_X1 + mov x1, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1 + + LEAF_ENTRY Load_Ref_X2 + mov x2, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Ref_X2 + + LEAF_ENTRY Load_Ref_X3 + mov x3, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Ref_X3 + + LEAF_ENTRY Load_Ref_X4 + mov x4, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Ref_X4 + + LEAF_ENTRY Load_Ref_X5 + mov x5, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Ref_X5 + + LEAF_ENTRY Load_Ref_X6 + mov x6, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Ref_X6 + + LEAF_ENTRY Load_Ref_X7 + mov x7, x9 + ldr x12, [x10], #8 + add x9, x9, x12 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_Ref_X7 + + LEAF_ENTRY Load_X0 + ldr x0, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0 + + LEAF_ENTRY Load_X0_X1 + ldp x0, x1, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0_X1 + + LEAF_ENTRY Load_X0_X1_X2 + ldp x0, x1, [x9], #16 + ldr x2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0_X1_X2 + + LEAF_ENTRY Load_X0_X1_X2_X3 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0_X1_X2_X3 + + LEAF_ENTRY Load_X0_X1_X2_X3_X4 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldr x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0_X1_X2_X3_X4 + + LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0_X1_X2_X3_X4_X5 + + LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0_X1_X2_X3_X4_X5_X6 + + LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6_X7 + ldp x0, x1, [x9], #16 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X0_X1_X2_X3_X4_X5_X6_X7 + + LEAF_ENTRY Load_X1 + ldr x1, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1 + + LEAF_ENTRY Load_X1_X2 + ldp x1, x2, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1_X2 + + LEAF_ENTRY Load_X1_X2_X3 + ldp x1, x2, [x9], #16 + ldr x3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1_X2_X3 + + LEAF_ENTRY Load_X1_X2_X3_X4 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1_X2_X3_X4 + + LEAF_ENTRY Load_X1_X2_X3_X4_X5 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldr x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1_X2_X3_X4_X5 + + LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1_X2_X3_X4_X5_X6 + + LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6_X7 + ldp x1, x2, [x9], #16 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X1_X2_X3_X4_X5_X6_X7 + + LEAF_ENTRY Load_X2 + ldr x2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X2 + LEAF_ENTRY Load_X2_X3 + ldp x2, x3, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X2_X3 + + LEAF_ENTRY Load_X2_X3_X4 + ldp x2, x3, [x9], #16 + ldr x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X2_X3_X4 + + LEAF_ENTRY Load_X2_X3_X4_X5 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X2_X3_X4_X5 + + LEAF_ENTRY Load_X2_X3_X4_X5_X6 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X2_X3_X4_X5_X6 + + LEAF_ENTRY Load_X2_X3_X4_X5_X6_X7 + ldp x2, x3, [x9], #16 + ldp x4, x5, [x9], #16 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X2_X3_X4_X5_X6_X7 + + LEAF_ENTRY Load_X3 + ldr x3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X3 + LEAF_ENTRY Load_X3_X4 + ldp x3, x4, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X3_X4 + + LEAF_ENTRY Load_X3_X4_X5 + ldp x3, x4, [x9], #16 + ldr x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X3_X4_X5 + + LEAF_ENTRY Load_X3_X4_X5_X6 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X3_X4_X5_X6 + + LEAF_ENTRY Load_X3_X4_X5_X6_X7 + ldp x3, x4, [x9], #16 + ldp x5, x6, [x9], #16 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X3_X4_X5_X6_X7 + + LEAF_ENTRY Load_X4 + ldr x4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X4 + + LEAF_ENTRY Load_X4_X5 + ldp x4, x5, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X4_X5 + + LEAF_ENTRY Load_X4_X5_X6 + ldp x4, x5, [x9], #16 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X4_X5_X6 + + LEAF_ENTRY Load_X4_X5_X6_X7 + ldp x4, x5, [x9], #16 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X4_X5_X6_X7 + + LEAF_ENTRY Load_X5 + ldr x5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X5 + + LEAF_ENTRY Load_X5_X6 + ldp x5, x6, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X5_X6 + + LEAF_ENTRY Load_X5_X6_X7 + ldp x5, x6, [x9], #16 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X5_X6_X7 + + LEAF_ENTRY Load_X6 + ldr x6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X6 + + LEAF_ENTRY Load_X6_X7 + ldp x6, x7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X6_X7 + + LEAF_ENTRY Load_X7 + ldr x7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_X7 + + LEAF_ENTRY Load_D0 + ldr d0, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0 + + LEAF_ENTRY Load_D1 + ldr d1, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D1 + + LEAF_ENTRY Load_D0_D1 + ldp d0, d1, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1 + + LEAF_ENTRY Load_D0_D1_D2 + ldr d0, [x9], #8 + ALTERNATE_ENTRY Load_D1_D2 + ldr d1, [x9], #8 + ALTERNATE_ENTRY Load_D2 + ldr d2, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2 + + LEAF_ENTRY Load_D0_D1_D2_D3 + ldr d0, [x9], #8 + ALTERNATE_ENTRY Load_D1_D2_D3 + ldr d1, [x9], #8 + ALTERNATE_ENTRY Load_D2_D3 + ldr d2, [x9], #8 + ALTERNATE_ENTRY Load_D3 + ldr d3, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2_D3 + + LEAF_ENTRY Load_D0_D1_D2_D3_D4 + ldr d0, [x9], #8 + ALTERNATE_ENTRY Load_D1_D2_D3_D4 + ldr d1, [x9], #8 + ALTERNATE_ENTRY Load_D2_D3_D4 + ldr d2, [x9], #8 + ALTERNATE_ENTRY Load_D3_D4 + ldr d3, [x9], #8 + ALTERNATE_ENTRY Load_D4 + ldr d4, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2_D3_D4 + + LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5 + ldr d0, [x9], #8 + ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5 + ldr d1, [x9], #8 + ALTERNATE_ENTRY Load_D2_D3_D4_D5 + ldr d2, [x9], #8 + ALTERNATE_ENTRY Load_D3_D4_D5 + ldr d3, [x9], #8 + ALTERNATE_ENTRY Load_D4_D5 + ldr d4, [x9], #8 + ALTERNATE_ENTRY Load_D5 + ldr d5, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2_D3_D4_D5 + + LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6 + ldr d0, [x9], #8 + ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5_D6 + ldr d1, [x9], #8 + ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6 + ldr d2, [x9], #8 + ALTERNATE_ENTRY Load_D3_D4_D5_D6 + ldr d3, [x9], #8 + ALTERNATE_ENTRY Load_D4_D5_D6 + ldr d4, [x9], #8 + ALTERNATE_ENTRY Load_D5_D6 + ldr d5, [x9], #8 + ALTERNATE_ENTRY Load_D6 + ldr d6, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2_D3_D4_D5_D6 + /* + LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 + ldr d0, [x9], #8 + ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5_D6_D7 + ldr d1, [x9], #8 + ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 + ldr d2, [x9], #8 + ALTERNATE_ENTRY Load_D3_D4_D5_D6_D7 + ldr d3, [x9], #8 + ALTERNATE_ENTRY Load_D4_D5_D6_D7 + ldr d4, [x9], #8 + ALTERNATE_ENTRY Load_D5_D6_D7 + ldr d5, [x9], #8 + ALTERNATE_ENTRY Load_D6_D7 + ldr d6, [x9], #8 + ALTERNATE_ENTRY Load_D7 + ldr d7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 + */ + LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 + ldp d0, d1, [x9], #16 + ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 + ldp d2, d3, [x9], #16 + ALTERNATE_ENTRY Load_D4_D5_D6_D7 + ldp d4, d5, [x9], #16 + ALTERNATE_ENTRY Load_D6_D7 + ldp d6, d7, [x9], #16 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 + + LEAF_ENTRY Load_D1_D2_D3_D4_D5_D6_D7 + ldp d1, d2, [x9], #16 + ALTERNATE_ENTRY Load_D3_D4_D5_D6_D7 + ldp d3, d4, [x9], #16 + ALTERNATE_ENTRY Load_D5_D6_D7 + ldp d5, d6, [x9], #16 + ALTERNATE_ENTRY Load_D7 + ldr d7, [x9], #8 + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 + LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRetVoid + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + ret lr + NESTED_END CallJittedMethodRetVoid + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRetBuff + PROLOG_SAVE_REG_PAIR fp, lr, #-16! + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + mov x8, x2 + ldr x11, [x10], #8 + blr x11 + EPILOG_RESTORE_REG_PAIR fp, lr, #16! + ret lr + NESTED_END CallJittedMethodRetBuff + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRetI8 + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + str x0, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRetI8 + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRet2I8 + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp x0, x1, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRet2I8 + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRetDouble + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + str d0, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRetDouble + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRet2Double + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp d0, d1, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRet2Double + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRet3Double + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp d0, d1, [x2], #16 + str d2, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRet3Double + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRet4Double + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp d0, d1, [x2], #16 + stp d2, d3, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRet4Double + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRetFloat + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + str s0, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRetFloat + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRet2Float + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp s0, s1, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRet2Float + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRet3Float + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp s0, s1, [x2], #8 + str s2, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRet3Float + + // X0 - routines array + // X1 - interpreter stack args location + // X2 - interpreter stack return value location + // X3 - stack arguments size (properly aligned) + NESTED_ENTRY CallJittedMethodRet4Float + PROLOG_SAVE_REG_PAIR fp, lr, #-32! + str x2, [sp, #16] + sub sp, sp, x3 + mov x10, x0 + mov x9, x1 + ldr x11, [x10], #8 + blr x11 + ldr x2, [sp, #16] + stp s0, s1, [x2], #8 + stp s2, s3, [x2] + EPILOG_RESTORE_REG_PAIR fp, lr, #32! + ret lr + NESTED_END CallJittedMethodRet4Float + ; Must be at very end of file END diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp new file mode 100644 index 00000000000000..352c4b51f52448 --- /dev/null +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -0,0 +1,952 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifdef FEATURE_INTERPRETER + +#include "callstubgenerator.h" + +extern "C" void Load_Stack(); + +#ifdef TARGET_AMD64 + +#ifdef TARGET_WINDOWS +extern "C" void Load_RCX(); +extern "C" void Load_RCX_RDX(); +extern "C" void Load_RCX_RDX_R8(); +extern "C" void Load_RCX_RDX_R8_R9(); +extern "C" void Load_RDX(); +extern "C" void Load_RDX_R8(); +extern "C" void Load_RDX_R8_R9(); +extern "C" void Load_R8(); +extern "C" void Load_R8_R9(); +extern "C" void Load_R9(); +extern "C" void Load_XMM0(); +extern "C" void Load_XMM0_XMM1(); +extern "C" void Load_XMM0_XMM1_XMM2(); +extern "C" void Load_XMM0_XMM1_XMM2_XMM3(); +extern "C" void Load_XMM1(); +extern "C" void Load_XMM1_XMM2(); +extern "C" void Load_XMM1_XMM2_XMM3(); +extern "C" void Load_XMM2(); +extern "C" void Load_XMM2_XMM3(); +extern "C" void Load_XMM3(); +extern "C" void Load_Ref_RCX(); +extern "C" void Load_Ref_RDX(); +extern "C" void Load_Ref_R8(); +extern "C" void Load_Ref_R9(); + +PCODE GPRegsRoutines[] = +{ + (PCODE)Load_RCX, // 00 + (PCODE)Load_RCX_RDX, // 01 + (PCODE)Load_RCX_RDX_R8, // 02 + (PCODE)Load_RCX_RDX_R8_R9, // 03 + (PCODE)0, // 10 + (PCODE)Load_RDX, // 11 + (PCODE)Load_RDX_R8, // 12 + (PCODE)Load_RDX_R8_R9, // 13 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_R8, // 22 + (PCODE)Load_R8_R9, // 23 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_R9 // 33 +}; + +PCODE GPRegsRefRoutines[] = +{ + (PCODE)Load_Ref_RCX, // 0 + (PCODE)Load_Ref_RDX, // 1 + (PCODE)Load_Ref_R8, // 2 + (PCODE)Load_Ref_R9, // 3 +}; + +PCODE FPRegsRoutines[] = +{ + (PCODE)Load_XMM0, // 00 + (PCODE)Load_XMM0_XMM1, // 01 + (PCODE)Load_XMM0_XMM1_XMM2, // 02 + (PCODE)Load_XMM0_XMM1_XMM2_XMM3, // 03 + (PCODE)0, // 10 + (PCODE)Load_XMM1, // 11 + (PCODE)Load_XMM1_XMM2, // 12 + (PCODE)Load_XMM1_XMM2_XMM3, // 13 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_XMM2, // 22 + (PCODE)Load_XMM2_XMM3, // 23 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_XMM3 // 33 +}; + +#else // TARGET_WINDOWS + +extern "C" void Load_RDI(); +extern "C" void Load_RDI_RSI(); +extern "C" void Load_RDI_RSI_RDX(); +extern "C" void Load_RDI_RSI_RDX_RCX(); +extern "C" void Load_RDI_RSI_RDX_RCX_R8(); +extern "C" void Load_RDI_RSI_RDX_RCX_R8_R9(); +extern "C" void Load_RSI(); +extern "C" void Load_RSI_RDX(); +extern "C" void Load_RSI_RDX_RCX(); +extern "C" void Load_RSI_RDX_RCX_R8(); +extern "C" void Load_RSI_RDX_RCX_R8_R9(); +extern "C" void Load_RDX(); +extern "C" void Load_RDX_RCX(); +extern "C" void Load_RDX_RCX_R8(); +extern "C" void Load_RDX_RCX_R8_R9(); +extern "C" void Load_RCX(); +extern "C" void Load_RCX_R8(); +extern "C" void Load_RCX_R8_R9(); +extern "C" void Load_R8(); +extern "C" void Load_R8_R9(); +extern "C" void Load_R9(); + +extern "C" void Load_Ref_RDI(); +extern "C" void Load_Ref_RSI(); +extern "C" void Load_Ref_RDX(); +extern "C" void Load_Ref_RCX(); +extern "C" void Load_Ref_R8(); +extern "C" void Load_Ref_R9(); + +PCODE GPRegsRoutines[] = +{ + (PCODE)Load_RDI, // 00 + (PCODE)Load_RDI_RSI, // 01 + (PCODE)Load_RDI_RSI_RDX, // 02 + (PCODE)Load_RDI_RSI_RDX_RCX, // 03 + (PCODE)Load_RDI_RSI_RDX_RCX_R8, // 04 + (PCODE)Load_RDI_RSI_RDX_RCX_R8_R9, // 05 + (PCODE)0, // 10 + (PCODE)Load_RSI, // 11 + (PCODE)Load_RSI_RDX, // 12 + (PCODE)Load_RSI_RDX_RCX, // 13 + (PCODE)Load_RSI_RDX_RCX_R8, // 14 + (PCODE)Load_RSI_RDX_RCX_R8_R9, // 15 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_RDX, // 22 + (PCODE)Load_RDX_RCX, // 23 + (PCODE)Load_RDX_RCX_R8, // 24 + (PCODE)Load_RDX_RCX_R8_R9, // 25 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_RCX, // 33 + (PCODE)Load_RCX_R8, // 34 + (PCODE)Load_RCX_R8_R9, // 35 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Load_R8, // 44 + (PCODE)Load_R8_R9, // 45 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Load_R9 // 55 +}; + +extern "C" void Load_XMM0(); +extern "C" void Load_XMM0_XMM1(); +extern "C" void Load_XMM0_XMM1_XMM2(); +extern "C" void Load_XMM0_XMM1_XMM2_XMM3(); +extern "C" void Load_XMM0_XMM1_XMM2_XMM3_XMM4(); +extern "C" void Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5(); +extern "C" void Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Load_XMM1(); +extern "C" void Load_XMM1_XMM2(); +extern "C" void Load_XMM1_XMM2_XMM3(); +extern "C" void Load_XMM1_XMM2_XMM3_XMM4(); +extern "C" void Load_XMM1_XMM2_XMM3_XMM4_XMM5(); +extern "C" void Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Load_XMM2(); +extern "C" void Load_XMM2_XMM3(); +extern "C" void Load_XMM2_XMM3_XMM4(); +extern "C" void Load_XMM2_XMM3_XMM4_XMM5(); +extern "C" void Load_XMM2_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Load_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Load_XMM3(); +extern "C" void Load_XMM3_XMM4(); +extern "C" void Load_XMM3_XMM4_XMM5(); +extern "C" void Load_XMM3_XMM4_XMM5_XMM6(); +extern "C" void Load_XMM3_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Load_XMM4(); +extern "C" void Load_XMM4_XMM5(); +extern "C" void Load_XMM4_XMM5_XMM6(); +extern "C" void Load_XMM4_XMM5_XMM6_XMM7(); +extern "C" void Load_XMM5(); +extern "C" void Load_XMM5_XMM6(); +extern "C" void Load_XMM5_XMM6_XMM7(); +extern "C" void Load_XMM6(); +extern "C" void Load_XMM6_XMM7(); +extern "C" void Load_XMM7(); + +PCODE FPRegsRoutines[] = +{ + (PCODE)Load_XMM0, // 00 + (PCODE)Load_XMM0_XMM1, // 01 + (PCODE)Load_XMM0_XMM1_XMM2, // 02 + (PCODE)Load_XMM0_XMM1_XMM2_XMM3, // 03 + (PCODE)Load_XMM0_XMM1_XMM2_XMM3_XMM4, // 04 + (PCODE)Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5, // 05 + (PCODE)Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, // 06 + (PCODE)Load_XMM0_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7,// 07 + (PCODE)0, // 10 + (PCODE)Load_XMM1, // 11 + (PCODE)Load_XMM1_XMM2, // 12 + (PCODE)Load_XMM1_XMM2_XMM3, // 13 + (PCODE)Load_XMM1_XMM2_XMM3_XMM4, // 14 + (PCODE)Load_XMM1_XMM2_XMM3_XMM4_XMM5, // 15 + (PCODE)Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6, // 16 + (PCODE)Load_XMM1_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_XMM2, // 22 + (PCODE)Load_XMM2_XMM3, // 23 + (PCODE)Load_XMM2_XMM3_XMM4, // 24 + (PCODE)Load_XMM2_XMM3_XMM4_XMM5, // 25 + (PCODE)Load_XMM2_XMM3_XMM4_XMM5_XMM6, // 26 + (PCODE)Load_XMM2_XMM3_XMM4_XMM5_XMM6_XMM7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_XMM3, // 33 + (PCODE)Load_XMM3_XMM4, // 34 + (PCODE)Load_XMM3_XMM4_XMM5, // 35 + (PCODE)Load_XMM3_XMM4_XMM5_XMM6, // 36 + (PCODE)Load_XMM3_XMM4_XMM5_XMM6_XMM7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Load_XMM4, // 44 + (PCODE)Load_XMM4_XMM5, // 45 + (PCODE)Load_XMM4_XMM5_XMM6, // 46 + (PCODE)Load_XMM4_XMM5_XMM6_XMM7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Load_XMM5, // 55 + (PCODE)Load_XMM5_XMM6, // 56 + (PCODE)Load_XMM5_XMM6_XMM7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Load_XMM6, // 66 + (PCODE)Load_XMM6_XMM7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Load_XMM7 // 77 +}; + +#endif // TARGET_WINDOWS + +#endif // TARGET_AMD64 + +#ifdef TARGET_ARM64 + +extern "C" void Load_X0(); +extern "C" void Load_X0_X1(); +extern "C" void Load_X0_X1_X2(); +extern "C" void Load_X0_X1_X2_X3(); +extern "C" void Load_X0_X1_X2_X3_X4(); +extern "C" void Load_X0_X1_X2_X3_X4_X5(); +extern "C" void Load_X0_X1_X2_X3_X4_X5_X6(); +extern "C" void Load_X0_X1_X2_X3_X4_X5_X6_X7(); +extern "C" void Load_X1(); +extern "C" void Load_X1_X2(); +extern "C" void Load_X1_X2_X3(); +extern "C" void Load_X1_X2_X3_X4(); +extern "C" void Load_X1_X2_X3_X4_X5(); +extern "C" void Load_X1_X2_X3_X4_X5_X6(); +extern "C" void Load_X1_X2_X3_X4_X5_X6_X7(); +extern "C" void Load_X2(); +extern "C" void Load_X2_X3(); +extern "C" void Load_X2_X3_X4(); +extern "C" void Load_X2_X3_X4_X5(); +extern "C" void Load_X2_X3_X4_X5_X6(); +extern "C" void Load_X2_X3_X4_X5_X6_X7(); +extern "C" void Load_X3(); +extern "C" void Load_X3_X4(); +extern "C" void Load_X3_X4_X5(); +extern "C" void Load_X3_X4_X5_X6(); +extern "C" void Load_X3_X4_X5_X6_X7(); +extern "C" void Load_X4(); +extern "C" void Load_X4_X5(); +extern "C" void Load_X4_X5_X6(); +extern "C" void Load_X4_X5_X6_X7(); +extern "C" void Load_X5(); +extern "C" void Load_X5_X6(); +extern "C" void Load_X5_X6_X7(); +extern "C" void Load_X6(); +extern "C" void Load_X6_X7(); +extern "C" void Load_X7(); + +extern "C" void Load_Ref_X0(); +extern "C" void Load_Ref_X1(); +extern "C" void Load_Ref_X2(); +extern "C" void Load_Ref_X3(); +extern "C" void Load_Ref_X4(); +extern "C" void Load_Ref_X5(); +extern "C" void Load_Ref_X6(); +extern "C" void Load_Ref_X7(); + + +PCODE GPRegsRoutines[] = +{ + (PCODE)Load_X0, // 00 + (PCODE)Load_X0_X1, // 01 + (PCODE)Load_X0_X1_X2, // 02 + (PCODE)Load_X0_X1_X2_X3, // 03 + (PCODE)Load_X0_X1_X2_X3_X4, // 04 + (PCODE)Load_X0_X1_X2_X3_X4_X5, // 05 + (PCODE)Load_X0_X1_X2_X3_X4_X5_X6, // 06 + (PCODE)Load_X0_X1_X2_X3_X4_X5_X6_X7, // 07 + (PCODE)0, // 10 + (PCODE)Load_X1, // 11 + (PCODE)Load_X1_X2, // 12 + (PCODE)Load_X1_X2_X3, // 13 + (PCODE)Load_X1_X2_X3_X4, // 14 + (PCODE)Load_X1_X2_X3_X4_X5, // 15 + (PCODE)Load_X1_X2_X3_X4_X5_X6, // 16 + (PCODE)Load_X1_X2_X3_X4_X5_X6_X7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_X2, // 22 + (PCODE)Load_X2_X3, // 23 + (PCODE)Load_X2_X3_X4, // 24 + (PCODE)Load_X2_X3_X4_X5, // 25 + (PCODE)Load_X2_X3_X4_X5_X6, // 26 + (PCODE)Load_X2_X3_X4_X5_X6_X7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_X3, // 33 + (PCODE)Load_X3_X4, // 34 + (PCODE)Load_X3_X4_X5, // 35 + (PCODE)Load_X3_X4_X5_X6, // 36 + (PCODE)Load_X3_X4_X5_X6_X7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Load_X4, // 44 + (PCODE)Load_X4_X5, // 45 + (PCODE)Load_X4_X5_X6, // 46 + (PCODE)Load_X4_X5_X6_X7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Load_X5, // 55 + (PCODE)Load_X5_X6, // 56 + (PCODE)Load_X5_X6_X7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Load_X6, // 66 + (PCODE)Load_X6_X7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Load_X7 // 77 +}; + +PCODE GPRegsRefRoutines[] = +{ + (PCODE)Load_Ref_X0, // 0 + (PCODE)Load_Ref_X1, // 1 + (PCODE)Load_Ref_X2, // 2 + (PCODE)Load_Ref_X3, // 3 + (PCODE)Load_Ref_X4, // 4 + (PCODE)Load_Ref_X5, // 5 + (PCODE)Load_Ref_X6, // 6 + (PCODE)Load_Ref_X7 // 7 +}; + +extern "C" void Load_D0(); +extern "C" void Load_D0_D1(); +extern "C" void Load_D0_D1_D2(); +extern "C" void Load_D0_D1_D2_D3(); +extern "C" void Load_D0_D1_D2_D3_D4(); +extern "C" void Load_D0_D1_D2_D3_D4_D5(); +extern "C" void Load_D0_D1_D2_D3_D4_D5_D6(); +extern "C" void Load_D0_D1_D2_D3_D4_D5_D6_D7(); +extern "C" void Load_D1(); +extern "C" void Load_D1_D2(); +extern "C" void Load_D1_D2_D3(); +extern "C" void Load_D1_D2_D3_D4(); +extern "C" void Load_D1_D2_D3_D4_D5(); +extern "C" void Load_D1_D2_D3_D4_D5_D6(); +extern "C" void Load_D1_D2_D3_D4_D5_D6_D7(); +extern "C" void Load_D2(); +extern "C" void Load_D2_D3(); +extern "C" void Load_D2_D3_D4(); +extern "C" void Load_D2_D3_D4_D5(); +extern "C" void Load_D2_D3_D4_D5_D6(); +extern "C" void Load_D2_D3_D4_D5_D6_D7(); +extern "C" void Load_D3(); +extern "C" void Load_D3_D4(); +extern "C" void Load_D3_D4_D5(); +extern "C" void Load_D3_D4_D5_D6(); +extern "C" void Load_D3_D4_D5_D6_D7(); +extern "C" void Load_D4(); +extern "C" void Load_D4_D5(); +extern "C" void Load_D4_D5_D6(); +extern "C" void Load_D4_D5_D6_D7(); +extern "C" void Load_D5(); +extern "C" void Load_D5_D6(); +extern "C" void Load_D5_D6_D7(); +extern "C" void Load_D6(); +extern "C" void Load_D6_D7(); +extern "C" void Load_D7(); + +PCODE FPRegsRoutines[] = +{ + (PCODE)Load_D0, // 00 + (PCODE)Load_D0_D1, // 01 + (PCODE)Load_D0_D1_D2, // 02 + (PCODE)Load_D0_D1_D2_D3, // 03 + (PCODE)Load_D0_D1_D2_D3_D4, // 04 + (PCODE)Load_D0_D1_D2_D3_D4_D5, // 05 + (PCODE)Load_D0_D1_D2_D3_D4_D5_D6, // 06 + (PCODE)Load_D0_D1_D2_D3_D4_D5_D6_D7, // 07 + (PCODE)0, // 10 + (PCODE)Load_D1, // 11 + (PCODE)Load_D1_D2, // 12 + (PCODE)Load_D1_D2_D3, // 13 + (PCODE)Load_D1_D2_D3_D4, // 14 + (PCODE)Load_D1_D2_D3_D4_D5, // 15 + (PCODE)Load_D1_D2_D3_D4_D5_D6, // 16 + (PCODE)Load_D1_D2_D3_D4_D5_D6_D7, // 17 + (PCODE)0, // 20 + (PCODE)0, // 21 + (PCODE)Load_D2, // 22 + (PCODE)Load_D2_D3, // 23 + (PCODE)Load_D2_D3_D4, // 24 + (PCODE)Load_D2_D3_D4_D5, // 25 + (PCODE)Load_D2_D3_D4_D5_D6, // 26 + (PCODE)Load_D2_D3_D4_D5_D6_D7, // 27 + (PCODE)0, // 30 + (PCODE)0, // 31 + (PCODE)0, // 32 + (PCODE)Load_D3, // 33 + (PCODE)Load_D3_D4, // 34 + (PCODE)Load_D3_D4_D5, // 35 + (PCODE)Load_D3_D4_D5_D6, // 36 + (PCODE)Load_D3_D4_D5_D6_D7, // 37 + (PCODE)0, // 40 + (PCODE)0, // 41 + (PCODE)0, // 42 + (PCODE)0, // 43 + (PCODE)Load_D4, // 44 + (PCODE)Load_D4_D5, // 45 + (PCODE)Load_D4_D5_D6, // 46 + (PCODE)Load_D4_D5_D6_D7, // 47 + (PCODE)0, // 50 + (PCODE)0, // 51 + (PCODE)0, // 52 + (PCODE)0, // 53 + (PCODE)0, // 54 + (PCODE)Load_D5, // 55 + (PCODE)Load_D5_D6, // 56 + (PCODE)Load_D5_D6_D7, // 57 + (PCODE)0, // 60 + (PCODE)0, // 61 + (PCODE)0, // 62 + (PCODE)0, // 63 + (PCODE)0, // 64 + (PCODE)0, // 65 + (PCODE)Load_D6, // 66 + (PCODE)Load_D6_D7, // 67 + (PCODE)0, // 70 + (PCODE)0, // 71 + (PCODE)0, // 72 + (PCODE)0, // 73 + (PCODE)0, // 74 + (PCODE)0, // 75 + (PCODE)0, // 76 + (PCODE)Load_D7 // 77 +}; +#endif // TARGET_ARM64 + +PCODE GetGPRegRangeLoadRoutine(int r1, int r2) +{ + int index = r1 * NUM_ARGUMENT_REGISTERS + r2; + return GPRegsRoutines[index]; +} + +#ifndef UNIX_AMD64_ABI +PCODE GetGPRegRefLoadRoutine(int r) +{ + return GPRegsRefRoutines[r]; +} +#endif // UNIX_AMD64_ABI + +PCODE GetFPRegRangeLoadRoutine(int x1, int x2) +{ + int index = x1 * NUM_FLOAT_ARGUMENT_REGISTERS + x2; + return FPRegsRoutines[index]; +} + +PCODE GetStackRangeLoadRoutine(int s1, int s2) +{ + // Stack range is not supported yet + assert(!"Stack range is not supported yet"); + return NULL; +} + +extern "C" void CallJittedMethodRetVoid(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetDouble(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetBuff(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); + +#ifdef UNIX_AMD64_ABI +extern "C" void CallJittedMethodRetI8I8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetI8Double(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetDoubleI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetDoubleDouble(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +#endif + +#ifdef TARGET_ARM64 +extern "C" void CallJittedMethodRet2I8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRet2Double(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRet3Double(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRet4Double(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetFloat(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRet2Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRet3Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRet4Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +#endif // TARGET_ARM64 + +CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) +{ + MetaSig sig(pMD); + ArgIterator argIt(&sig); + int ofs = 0; + DWORD arg = 0; + m_r1 = argIt.HasThis() ? 0 : NO_RANGE; // The "this" argument register is not enumerated by the arg iterator. + m_r2 = 0; + m_x1 = NO_RANGE; // indicates that there is no active range of FP registers + m_x2 = 0; + m_s1 = NO_RANGE; // indicates that there is no active range of stack arguments + m_s2 = 0; + m_routineIndex = 0; + m_totalStackSize = 0; + int numArgs = sig.NumFixedArgs() + (sig.HasThis() ? 1 : 0); + + // Allocate space for the routines. The size of the array is conservatively set to twice the number of arguments + // plus one slot for the target pointer and reallocated to the real size at the end. + + // Interpreter-TODO: handle OOM here and at the realloc + m_pHeader = (CallStubHeader*)malloc(sizeof(CallStubHeader) + (2 * numArgs + 1) * sizeof(PCODE)); + PCODE *routines = m_pHeader->Routines; + + for (; TransitionBlock::InvalidOffset != (ofs = argIt.GetNextOffset()); arg++) + { + ArgLocDesc argLocDesc; + argIt.GetArgLoc(ofs, &argLocDesc); + +#ifdef UNIX_AMD64_ABI + if (argIt.GetArgLocDescForStructInRegs() != NULL) + { + TypeHandle argTypeHandle; + CorElementType corType = argIt.GetArgType(&argTypeHandle); + _ASSERTE(corType == ELEMENT_TYPE_VALUETYPE); + + MethodTable *pMT = argTypeHandle.AsMethodTable(); + EEClass *pEEClass = pMT->GetClass(); + int numEightBytes = pEEClass->GetNumberEightBytes(); + for (int i = 0; i < numEightBytes; i++) + { + ArgLocDesc argLocDescEightByte = {}; + SystemVClassificationType eightByteType = pEEClass->GetEightByteClassification(i); + if (eightByteType == SystemVClassificationTypeInteger) + { + if (argLocDesc.m_cGenReg != 0) + { + argLocDescEightByte.m_cGenReg = 1; + argLocDescEightByte.m_idxGenReg = argLocDesc.m_idxGenReg++; + } + else + { + argLocDescEightByte.m_byteStackSize = 8; + argLocDescEightByte.m_byteStackIndex = argLocDesc.m_byteStackIndex; + argLocDesc.m_byteStackIndex += 8; + } + } + else if (eightByteType == SystemVClassificationTypeSSE) + { + if (argLocDesc.m_cFloatReg != 0) + { + argLocDescEightByte.m_cFloatReg = 1; + argLocDescEightByte.m_idxFloatReg = argLocDesc.m_idxFloatReg++; + } + else + { + argLocDescEightByte.m_byteStackSize = 8; + argLocDescEightByte.m_byteStackIndex = argLocDesc.m_byteStackIndex; + argLocDesc.m_byteStackIndex += 8; + } + } + ProcessArgument(argIt, argLocDescEightByte); + } + } + else +#endif // UNIX_AMD64_ABI + { + ProcessArgument(argIt, argLocDesc); + } + } + + // All arguments were processed, but there is likely a pending ranges to store. + if (m_r1 != NO_RANGE) + { + routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + } + else if (m_x1 != NO_RANGE) + { + routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + } + else if (m_s1 != NO_RANGE) + { + m_totalStackSize += m_s2 - m_s1 + 1; + routines[m_routineIndex++] = (PCODE)Load_Stack; + routines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; + } + + m_totalStackSize = ALIGN_UP(m_totalStackSize, 16); // Align the stack to 16 bytes + + if (argIt.HasRetBuffArg()) + { + m_pHeader->Invoke = CallJittedMethodRetBuff; + } + else + { + TypeHandle thReturnValueType; + CorElementType thReturnType = sig.GetReturnTypeNormalized(&thReturnValueType); + + switch (thReturnType) + { + case ELEMENT_TYPE_BOOLEAN: + case ELEMENT_TYPE_CHAR: + case ELEMENT_TYPE_I1: + case ELEMENT_TYPE_U1: + case ELEMENT_TYPE_I2: + case ELEMENT_TYPE_U2: + case ELEMENT_TYPE_I4: + case ELEMENT_TYPE_U4: + case ELEMENT_TYPE_I8: + case ELEMENT_TYPE_U8: + case ELEMENT_TYPE_I: + case ELEMENT_TYPE_U: + case ELEMENT_TYPE_CLASS: + case ELEMENT_TYPE_OBJECT: + case ELEMENT_TYPE_STRING: + case ELEMENT_TYPE_PTR: + case ELEMENT_TYPE_BYREF: + case ELEMENT_TYPE_TYPEDBYREF: + case ELEMENT_TYPE_ARRAY: + case ELEMENT_TYPE_SZARRAY: + case ELEMENT_TYPE_FNPTR: + m_pHeader->Invoke = CallJittedMethodRetI8; + break; + case ELEMENT_TYPE_R4: + case ELEMENT_TYPE_R8: + m_pHeader->Invoke = CallJittedMethodRetDouble; + break; + case ELEMENT_TYPE_VOID: + m_pHeader->Invoke = CallJittedMethodRetVoid; + break; + case ELEMENT_TYPE_VALUETYPE: +#ifdef TARGET_AMD64 +#ifdef TARGET_WINDOWS + if (thReturnValueType.AsMethodTable()->IsIntrinsicType()) + { + // E.g. Vector2 + m_pHeader->Invoke = CallJittedMethodRetDouble; + } + else + { + // POD structs smaller than 64 bits are returned in rax + m_pHeader->Invoke = CallJittedMethodRetI8; + } +#else // TARGET_WINDOWS + if (thReturnValueType.AsMethodTable()->IsRegPassedStruct()) + { + UINT fpReturnSize = argIt.GetFPReturnSize(); + if (fpReturnSize == 0) + { + m_pHeader->Invoke = CallJittedMethodRetI8; + } + else if (fpReturnSize == 8) + { + m_pHeader->Invoke = CallJittedMethodRetDouble; + } + else + { + _ASSERTE((fpReturnSize & 16) != 0); + // The fpReturnSize bits 0..1 have the following meaning: + // Bit 0 - the first 8 bytes of the struct is integer (0) or floating point (1) + // Bit 1 - the second 8 bytes of the struct is integer (0) or floating point (1) + switch (fpReturnSize & 0x3) + { + case 0: + m_pHeader->Invoke = CallJittedMethodRetI8I8; + break; + case 1: + m_pHeader->Invoke = CallJittedMethodRetDoubleI8; + break; + case 2: + m_pHeader->Invoke = CallJittedMethodRetI8Double; + break; + case 3: + m_pHeader->Invoke = CallJittedMethodRetDoubleDouble; + break; + } + } + } + else + { + _ASSERTE(!"All value types that are not returnable structs in registers should be returned using return buffer"); + } +#endif // TARGET_WINDOWS +#elif TARGET_ARM64 + // HFA, HVA, POD structs smaller than 128 bits + if (thReturnValueType.IsHFA()) + { + switch (thReturnValueType.GetHFAType()) + { + case CORINFO_HFA_ELEM_FLOAT: + switch (thReturnValueType.GetSize()) + { + case 4: + m_pHeader->Invoke = CallJittedMethodRetFloat; + break; + case 8: + m_pHeader->Invoke = CallJittedMethodRet2Float; + break; + case 12: + m_pHeader->Invoke = CallJittedMethodRet3Float; + break; + case 16: + m_pHeader->Invoke = CallJittedMethodRet4Float; + break; + default: + _ASSERTE(!"Should not get here"); + break; + } + break; + case CORINFO_HFA_ELEM_DOUBLE: + switch (thReturnValueType.GetSize()) + { + case 8: + m_pHeader->Invoke = CallJittedMethodRetDouble; + break; + case 16: + m_pHeader->Invoke = CallJittedMethodRet2Double; + break; + case 24: + m_pHeader->Invoke = CallJittedMethodRet3Double; + break; + case 32: + m_pHeader->Invoke = CallJittedMethodRet4Double; + break; + default: + _ASSERTE(!"Should not get here"); + break; + } + break; + default: + _ASSERTE(!"HFA types other than float and double are not supported yet"); + break; + } + } + else + { + switch (thReturnValueType.GetSize()) + { + case 1: + case 2: + case 4: + case 8: + m_pHeader->Invoke = CallJittedMethodRetI8; + break; + case 16: + m_pHeader->Invoke = CallJittedMethodRet2I8; + break; + default: + _ASSERTE(!"The return types that are not HFA should be <= 16 bytes in size"); + break; + } + } +#else + _ASSERTE(!"Struct returns by value are not supported yet"); +#endif + break; + default: + _ASSERTE(!"Unexpected return type"); + break; + } + } + + m_pHeader->NumRoutines = m_routineIndex + 1; // Reserve one extra slot for the target method pointer + m_pHeader->TotalStackSize = m_totalStackSize; + + // resize the structure to its actually used size + return (CallStubHeader*)realloc(m_pHeader, sizeof(CallStubHeader) + m_pHeader->NumRoutines * sizeof(PCODE)); +} + +void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc) +{ + PCODE *routines = m_pHeader->Routines; + + // Check if we have a range of registers or stack arguments that we need to store because the current argument + // terminates it. + if ((argLocDesc.m_cGenReg == 0) && (m_r1 != NO_RANGE)) + { + // No GP register is used to pass the current argument, but we already have a range of GP registers, + // store the routine for the range + routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + m_r1 = NO_RANGE; + } + else if (((argLocDesc.m_cFloatReg == 0)) && (m_x1 != NO_RANGE)) + { + // No floating point register is used to pass the current argument, but we already have a range of FP registers, + // store the routine for the range + routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + m_x1 = NO_RANGE; + } + else if ((argLocDesc.m_byteStackSize == 0) && (m_s1 != NO_RANGE)) + { + // No stack argument is used to pass the current argument, but we already have a range of stack arguments, + // store the routine for the range + m_totalStackSize += m_s2 - m_s1 + 1; + routines[m_routineIndex++] = (PCODE)Load_Stack; + routines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; + m_s1 = NO_RANGE; + } + + if (argLocDesc.m_cGenReg != 0) + { +#ifndef UNIX_AMD64_ABI + if (argIt.IsArgPassedByRef()) + { + if (m_r1 != NO_RANGE) + { + // The args passed by reference use a separate routine, so we need to flush the existing range + // of general purpose registers if we have one. + routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + m_r1 = NO_RANGE; + } + // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. + // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, + // we always process single argument passed by reference using single routine. + routines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); + routines[m_routineIndex++] = argIt.GetArgSize(); + } + else +#endif // UNIX_AMD64_ABI + { + if (m_r1 == NO_RANGE) // No active range yet + { + // Start a new range + m_r1 = argLocDesc.m_idxGenReg; + m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; + } + else if (argLocDesc.m_idxGenReg == m_r2 + 1) + { + // Extend an existing range + m_r2 += argLocDesc.m_cGenReg; + } + else + { + // Discontinuous range - store a routine for the current and start a new one + routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + m_r1 = argLocDesc.m_idxGenReg; + m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; + } + } + } + + if (argLocDesc.m_cFloatReg != 0) + { + if (m_x1 == NO_RANGE) // No active range yet + { + // Start a new range + m_x1 = argLocDesc.m_idxFloatReg; + m_x2 = m_x1 + argLocDesc.m_cFloatReg - 1; + } + else if (argLocDesc.m_idxFloatReg == m_x2 + 1) + { + // Extend an existing range + m_x2 += argLocDesc.m_cFloatReg; + } + else + { + // Discontinuous range - store a routine for the current and start a new one + routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + m_x1 = argLocDesc.m_idxFloatReg; + m_x2 = m_x1 + argLocDesc.m_cFloatReg - 1; + } + } + + if (argLocDesc.m_byteStackSize != 0) + { + if (m_s1 == NO_RANGE) // No active range yet + { + // Start a new range + m_s1 = argLocDesc.m_byteStackIndex; + m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; + } + else if (argLocDesc.m_byteStackIndex == m_s2 + 1) + { + // Extend an existing range + m_s2 += argLocDesc.m_byteStackSize; + } + else + { + // Discontinuous range - store a routine for the current and start a new one + m_totalStackSize += m_s2 - m_s1 + 1; + routines[m_routineIndex++] = (PCODE)Load_Stack; + routines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; + m_s1 = argLocDesc.m_byteStackIndex; + m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; + } + } +} + +void CallStubGenerator::FreeCallStub(CallStubHeader *pHeader) +{ + free(pHeader); +} + +#endif // FEATURE_INTERPRETER \ No newline at end of file diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h new file mode 100644 index 00000000000000..9266c9ec5ebe76 --- /dev/null +++ b/src/coreclr/vm/callstubgenerator.h @@ -0,0 +1,44 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef CALLSTUBGENERATOR_H +#define CALLSTUBGENERATOR_H + +#include "callingconvention.h" + +class MethodDesc; + +struct CallStubHeader +{ + int NumRoutines; + int TotalStackSize; + void (*Invoke)(PCODE*, int8_t*, int8_t*, int); + PCODE Routines[0]; + + void SetTarget(PCODE target) + { + Routines[NumRoutines - 1] = target; + } +}; + +class CallStubGenerator +{ + static const int NO_RANGE = -1; + + int m_r1; + int m_r2; + int m_x1; + int m_x2; + int m_s1; + int m_s2; + int m_routineIndex; + int m_totalStackSize; + CallStubHeader *m_pHeader; + + void ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc); +public: + CallStubHeader *GenerateCallStub(MethodDesc *pMD); + static void FreeCallStub(CallStubHeader *pHeader); +}; + +#endif // CALLSTUBGENERATOR_H diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 883d12e336e22c..e8888e9e3a1a3d 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -6,6 +6,29 @@ #include "threads.h" #include "gcenv.h" #include "interpexec.h" +#include "callstubgenerator.h" + +void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) +{ + CallStubGenerator callStubGenerator; + CallStubHeader *pHeader = pMD->GetCallStubHeader(); + if (pHeader == NULL) + { + pHeader = callStubGenerator.GenerateCallStub(pMD); + HRESULT hr = pMD->SetCallStubHeader(pHeader); + if (hr == S_FALSE) + { + // We have lost the race for generating the header, so we need to free the one we generated + // and use the one that was generated by another thread. + CallStubGenerator::FreeCallStub(pHeader); + pHeader = pMD->GetCallStubHeader(); + } + } + + pHeader->SetTarget(pMD->GetNativeCode()); // The method to call + + pHeader->Invoke(pHeader->Routines, pArgs, pRet, pHeader->TotalStackSize); +} typedef void* (*HELPER_FTN_PP)(void*); @@ -1081,13 +1104,25 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } else { - // At this stage in the implementation, we assume this is pointer to - // interpreter code. In the future, this should probably be tagged pointer - // for interpreter call or normal pointer for JIT/R2R call. targetIp = (const int32_t*)targetMethod; } } CALL_TARGET_IP: + // HACK: we need a fast way to check of the targetIp is an interpreter code or not. + // Probably use a tagged pointer for interpreter code and a normal pointer for JIT/R2R code. + EECodeInfo codeInfo((PCODE)targetIp); + if (!codeInfo.IsValid()) + { + printf("Attempted to execute native code from interpreter.\n"); + assert(0); + } + else if (codeInfo.GetCodeManager() != ExecutionManager::GetInterpreterCodeManager()) + { + MethodDesc *pMD = codeInfo.GetMethodDesc(); + InvokeCompiledMethod(pMD, stack + callArgsOffset, stack + returnOffset); + break; + } + // Save current execution state for when we return from called method pFrame->ip = ip; diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index b4b6c5185a0941..3e1e9697827165 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -253,6 +253,32 @@ HRESULT MethodDesc::SetMethodDescVersionState(PTR_MethodDescVersioningState stat return S_OK; } +#ifdef FEATURE_INTERPRETER +HRESULT MethodDesc::SetCallStubHeader(CallStubHeader *pHeader) +{ + WRAPPER_NO_CONTRACT; + + HRESULT hr; + IfFailRet(EnsureCodeDataExists(NULL)); + + _ASSERTE(m_codeData != NULL); + if (InterlockedCompareExchangeT(&m_codeData->CallStubHeader, pHeader, NULL) != NULL) + return S_FALSE; + + return S_OK; +} + +CallStubHeader *MethodDesc::GetCallStubHeader() +{ + WRAPPER_NO_CONTRACT; + + PTR_MethodDescCodeData codeData = VolatileLoadWithoutBarrier(&m_codeData); + if (codeData == NULL) + return NULL; + return VolatileLoadWithoutBarrier(&codeData->CallStubHeader); +} +#endif // FEATURE_INTERPRETER + #endif //!DACCESS_COMPILE PTR_MethodDescVersioningState MethodDesc::GetMethodDescVersionState() diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index de0f28698f1f4b..4b2ddeb3f161f0 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -22,7 +22,9 @@ #include #include "eeconfig.h" #include "precode.h" - +#ifdef FEATURE_INTERPRETER +#include "callstubgenerator.h" +#endif // FEATURE_INTERPRETER class Stub; class FCallMethodDesc; class FieldDesc; @@ -231,6 +233,9 @@ struct MethodDescCodeData final { PTR_MethodDescVersioningState VersioningState; PCODE TemporaryEntryPoint; +#ifdef FEATURE_INTERPRETER + CallStubHeader *CallStubHeader; +#endif // FEATURE_INTERPRETER }; using PTR_MethodDescCodeData = DPTR(MethodDescCodeData); @@ -1815,6 +1820,11 @@ class MethodDesc HRESULT EnsureCodeDataExists(AllocMemTracker *pamTracker); HRESULT SetMethodDescVersionState(PTR_MethodDescVersioningState state); +#ifdef FEATURE_INTERPRETER + HRESULT SetCallStubHeader(CallStubHeader *pHeader); + CallStubHeader *GetCallStubHeader(); +#endif // FEATURE_INTERPRETER + #endif //!DACCESS_COMPILE PTR_MethodDescVersioningState GetMethodDescVersionState(); From 27bac16e401697e16f37641d52523635582f32b6 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 7 May 2025 20:53:09 +0200 Subject: [PATCH 02/12] Calling convention testing --- src/tests/JIT/interpreter/Interpreter.cs | 275 +++++++++++++++++++++++ 1 file changed, 275 insertions(+) diff --git a/src/tests/JIT/interpreter/Interpreter.cs b/src/tests/JIT/interpreter/Interpreter.cs index a0a5cd621353ec..273735f596f45f 100644 --- a/src/tests/JIT/interpreter/Interpreter.cs +++ b/src/tests/JIT/interpreter/Interpreter.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. using System; +using System.Numerics; using System.Runtime.CompilerServices; public interface ITest @@ -74,14 +75,209 @@ public StructWithRefs(int val1, int val2) o1 = new MyObj(val1); o2 = new MyObj(val2); } +public struct TestStruct +{ + public int a; + public int b; + public int c; + public int d; + public int e; + public int f; +} + +public struct TestStruct2 +{ + public int a; + public int b; +} + +public struct TestStruct4ii +{ + public int a; + public int b; + public int c; + public int d; +} + +public struct TestStruct4if +{ + public int a; + public int b; + public float c; + public float d; +} + +public struct TestStruct4fi +{ + public float a; + public float b; + public int c; + public int d; +} + +public struct TestStruct4ff +{ + public float a; + public float b; + public float c; + public float d; +} + + +public struct TestStruct3d +{ + public double a; + public double b; + public double c; } public class InterpreterTest { + static void TestCallingConvention0(int a, float b, int c, double d, int e, double f) + { + Console.WriteLine("TestCallingConvention0: a = {0}, b = {1}, c = {2}, d = {3}, e = {4}, f = {5}", a, b, c, d, e, f); + } + + static void TestCallingConvention1(TestStruct s) + { + Console.WriteLine("TestCallingConvention1: a = {0}, b = {1}, c = {2}, d = {3}, e = {4}, f = {5}", s.a, s.b, s.c, s.d, s.e, s.f); + } + + static TestStruct2 TestCallingConvention2() + { + TestStruct2 s; + s.a = 1; + s.b = 2; + return s; + } + + static Vector2 TestCallingConvention3() + { + Vector2 v = new Vector2(1, 2); + return v; + } + + static TestStruct TestCallingConvention4() + { + TestStruct s; + s.a = 1; + s.b = 2; + s.c = 3; + s.d = 4; + s.e = 5; + s.f = 6; + return s; + } + + static TestStruct4ii TestCallingConvention5() + { + TestStruct4ii s; + s.a = 1; + s.b = 2; + s.c = 3; + s.d = 4; + return s; + } + + static TestStruct4if TestCallingConvention6() + { + TestStruct4if s; + s.a = 1; + s.b = 2; + s.c = 3.0f; + s.d = 4.0f; + return s; + } + + static TestStruct4fi TestCallingConvention7() + { + TestStruct4fi s; + s.a = 1.0f; + s.b = 2.0f; + s.c = 3; + s.d = 4; + return s; + } + + static TestStruct4ff TestCallingConvention8() + { + TestStruct4ff s; + s.a = 1.0f; + s.b = 2.0f; + s.c = 3.0f; + s.d = 4.0f; + return s; + } + + static void TestCallingConvention9(TestStruct4fi s) + { + Console.WriteLine("TestCallingConvention9: a = {0}, b = {1}, c = {2}, d = {3}", s.a, s.b, s.c, s.d); + } + + static void TestCallingConvention10(TestStruct3d s) + { + Console.WriteLine("TestCallingConvention10: a = {0}, b = {1}, c = {2}", s.a, s.b, s.c); + } + + static TestStruct3d TestCallingConvention11() + { + TestStruct3d s; + s.a = 1.0f; + s.b = 2.0f; + s.c = 3.0f; + return s; + } + + static int Main(string[] args) { jitField1 = 42; jitField2 = 43; + + TestCallingConvention0(1, 2.0f, 3, 4.0, 5, 6.0); + + TestStruct s = new TestStruct(); + s.a = 1; + s.b = 2; + s.c = 3; + s.d = 4; + s.e = 5; + s.f = 6; + TestCallingConvention1(s); + + TestStruct2 s2 = TestCallingConvention2(); + + Vector2 v = TestCallingConvention3(); + + TestStruct s4 = TestCallingConvention4(); + + TestStruct4ii s5 = TestCallingConvention5(); + + TestStruct4if s6 = TestCallingConvention6(); + + TestStruct4fi s7 = TestCallingConvention7(); + + TestStruct4ff s8 = TestCallingConvention8(); + + TestStruct4fi s9 = new TestStruct4fi(); + s9.a = 1.0f; + s9.b = 2.0f; + s9.c = 3; + s9.d = 4; + TestCallingConvention9(s9); + + TestStruct3d s10 = new TestStruct3d(); + s10.a = 1.0f; + s10.b = 2.0f; + s10.c = 3.0f; + TestCallingConvention10(s10); + + TestStruct3d s11 = TestCallingConvention11(); + Console.WriteLine("TestCallingConvention11: s = "); + Console.WriteLine(s11.a); + Console.WriteLine(s11.b); + Console.WriteLine(s11.c); + RunInterpreterTests(); return 100; } @@ -89,6 +285,85 @@ static int Main(string[] args) [MethodImpl(MethodImplOptions.NoInlining)] public static void RunInterpreterTests() { + TestCallingConvention0(1, 2.0f, 3, 4.0, 5, 6.0); + + TestStruct s = new TestStruct(); + s.a = 1; + s.b = 2; + s.c = 3; + s.d = 4; + s.e = 5; + s.f = 6; + TestCallingConvention1(s); + + TestStruct2 s2 = TestCallingConvention2(); + Console.WriteLine("TestCallingConvention: s = "); + Console.WriteLine(s2.a); + Console.WriteLine(s2.b); + +#if VECTOR_ALIGNMENT_WORKS + // TODO: enable this again after fixing the alignment for the Vector2 struct and similar ones + Vector2 v = TestCallingConvention3(); + Console.WriteLine("TestCallingConvention: v = "); + Console.WriteLine(v[0]); + Console.WriteLine(v[1]); +#endif + TestStruct s4 = TestCallingConvention4(); + Console.WriteLine("TestCallingConvention: s = "); + Console.WriteLine(s4.a); + Console.WriteLine(s4.b); + Console.WriteLine(s4.c); + Console.WriteLine(s4.d); + Console.WriteLine(s4.e); + Console.WriteLine(s4.f); + + TestStruct4ii s5 = TestCallingConvention5(); + Console.WriteLine("TestCallingConvention: s = "); + Console.WriteLine(s5.a); + Console.WriteLine(s5.b); + Console.WriteLine(s5.c); + Console.WriteLine(s5.d); + + TestStruct4if s6 = TestCallingConvention6(); + Console.WriteLine("TestCallingConvention: s = "); + Console.WriteLine(s6.a); + Console.WriteLine(s6.b); + Console.WriteLine(s6.c); + Console.WriteLine(s6.d); + + TestStruct4fi s7 = TestCallingConvention7(); + Console.WriteLine("TestCallingConvention: s = "); + Console.WriteLine(s7.a); + Console.WriteLine(s7.b); + Console.WriteLine(s7.c); + Console.WriteLine(s7.d); + + TestStruct4ff s8 = TestCallingConvention8(); + Console.WriteLine("TestCallingConvention: s = "); + Console.WriteLine(s8.a); + Console.WriteLine(s8.b); + Console.WriteLine(s8.c); + Console.WriteLine(s8.d); + + TestStruct4fi s9 = new TestStruct4fi(); + s9.a = 1.0f; + s9.b = 2.0f; + s9.c = 3; + s9.d = 4; + TestCallingConvention9(s9); + + TestStruct3d s10 = new TestStruct3d(); + s10.a = 1.0f; + s10.b = 2.0f; + s10.c = 3.0f; + TestCallingConvention10(s10); + + TestStruct3d s11 = TestCallingConvention11(); + Console.WriteLine("TestCallingConvention: s = "); + Console.WriteLine(s11.a); + Console.WriteLine(s11.b); + Console.WriteLine(s11.c); + // Console.WriteLine("Run interp tests"); if (SumN(50) != 1275) Environment.FailFast(null); From 53806f84648721600b76e445dfac5c6f5310c776 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 13 May 2025 00:12:03 +0200 Subject: [PATCH 03/12] Cleanup, comments and apple arm64 fix --- src/coreclr/vm/amd64/AsmHelpers.asm | 29 ++- src/coreclr/vm/amd64/asmhelpers.S | 60 ++--- src/coreclr/vm/arm64/asmhelpers.S | 257 ++++++------------ src/coreclr/vm/arm64/asmhelpers.asm | 318 +++++++---------------- src/coreclr/vm/callstubgenerator.cpp | 128 ++++++--- src/coreclr/vm/callstubgenerator.h | 29 ++- src/coreclr/vm/interpexec.cpp | 29 ++- src/coreclr/vm/method.cpp | 8 +- src/coreclr/vm/method.hpp | 6 +- src/tests/JIT/interpreter/Interpreter.cs | 26 +- 10 files changed, 395 insertions(+), 495 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 29fbe4a2d465a6..7aecce31694688 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -628,6 +628,9 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFilterFunclet, _TEXT + +; Copy arguments from the interpreter stack to the processor stack. +; The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack, _TEXT push rdi push rsi @@ -647,6 +650,8 @@ LEAF_ENTRY Load_Stack, _TEXT jmp qword ptr [r11] LEAF_END Load_Stack, _TEXT +; Routines for passing value type arguments by reference in general purpose registers RCX, RDX, R8, R9 + LEAF_ENTRY Load_Ref_RCX, _TEXT mov rcx, r10 add r10, [r11 + 8] ; size of the value type @@ -675,6 +680,8 @@ LEAF_ENTRY Load_Ref_R9, _TEXT jmp qword ptr [r11] LEAF_END Load_Ref_R9, _TEXT +; Routines for passing arguments by value in general purpose registers RCX, RDX, R8, R9 + LEAF_ENTRY Load_RCX, _TEXT mov rcx, [r10] add r10, 8 @@ -755,6 +762,8 @@ LEAF_ENTRY Load_R9, _TEXT jmp qword ptr [r11] LEAF_END Load_R9, _TEXT +; Routines for passing arguments in floating point registers XMM0..XMM3 + LEAF_ENTRY Load_XMM0, _TEXT movsd xmm0, real8 ptr [r10] add r10, 8 @@ -865,28 +874,40 @@ END_PROLOGUE NESTED_END CallJittedMethodRetBuff, _TEXT NESTED_ENTRY CallJittedMethodRetDouble, _TEXT + push_nonvol_reg rbp + mov rbp, rsp push_vol_reg r8 - alloc_stack 20h + push_vol_reg rax ; align END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space mov r11, rcx ; The routines list mov r10, rdx ; interpreter stack args call qword ptr [r11] add rsp, 20h - pop r8 + mov r8, [rbp - 8] movsd real8 ptr [r8], xmm0 + mov rsp, rbp + pop rbp ret NESTED_END CallJittedMethodRetDouble, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT + push_nonvol_reg rbp + mov rbp, rsp push_vol_reg r8 - alloc_stack 20h + push_vol_reg rax ; align END_PROLOGUE + add r9, 20h ; argument save area + alignment + sub rsp, r9 ; total stack space mov r11, rcx ; The routines list mov r10, rdx ; interpreter stack args call qword ptr [r11] add rsp, 20h - pop r8 + mov r8, [rbp - 8] mov qword ptr [r8], rax + mov rsp, rbp + pop rbp ret NESTED_END CallJittedMethodRetI8, _TEXT diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 87528c031f9776..57071d23a62fd6 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -452,6 +452,9 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler FUNCLET_CALL_EPILOGUE ret NESTED_END CallEHFilterFunclet, _TEXT + +// Copy arguments from the interpreter stack to the processor stack. +// The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack, _TEXT push rdi push rsi @@ -471,47 +474,7 @@ LEAF_ENTRY Load_Stack, _TEXT jmp qword ptr [r11] LEAF_END Load_Stack, _TEXT -LEAF_ENTRY Load_Ref_RDI, _TEXT - mov rdi, r10 - add r10, [r11 + 8] - add r11, 16 - jmp qword ptr [r11] -LEAF_END Load_Ref_RDI, _TEXT - -LEAF_ENTRY Load_Ref_RSI, _TEXT - mov rsi, r10 - add r10, [r11 + 8] - add r11, 16 - jmp qword ptr [r11] -LEAF_END Load_Ref_RSI, _TEXT - -LEAF_ENTRY Load_Ref_RDX, _TEXT - mov rdx, r10 - add r10, [r11 + 8] - add r11, 16 - jmp qword ptr [r11] -LEAF_END Load_Ref_RDX, _TEXT - -LEAF_ENTRY Load_Ref_RCX, _TEXT - mov rcx, r10 - add r10, [r11 + 8] - add r11, 16 - jmp qword ptr [r11] -LEAF_END Load_Ref_RCX, _TEXT - -LEAF_ENTRY Load_Ref_R8, _TEXT - mov r8, r10 - add r10, [r11 + 8] - add r11, 16 - jmp qword ptr [r11] -LEAF_END Load_Ref_R8, _TEXT - -LEAF_ENTRY Load_Ref_R9, _TEXT - mov r9, r10 - add r10, [r11 + 8] - add r11, 16 - jmp qword ptr [r11] -LEAF_END Load_Ref_R9, _TEXT +// Routines for passing arguments by value in general purpose registers RDI, RSI, RDX, RCX, R8, R9 LEAF_ENTRY Load_RDI, _TEXT mov rdi, [r10] @@ -712,6 +675,8 @@ LEAF_ENTRY Load_R9, _TEXT jmp qword ptr [r11] LEAF_END Load_R9, _TEXT +// Routines for passing arguments in floating point registers XMM0..XMM7 + LEAF_ENTRY Load_XMM0, _TEXT movsd xmm0, real8 ptr [r10] add r10, 8 @@ -1050,7 +1015,6 @@ LEAF_END Load_XMM7, _TEXT NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler -// TODO: decide whether to create RBP frame everywhere or not push_nonvol_reg rbp mov rbp, rsp alloc_stack 0x10 @@ -1142,6 +1106,10 @@ NESTED_ENTRY CallJittedMethodRetI8Double, _TEXT, NoHandler push_register rdx push_register rax // align END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] mov rcx, [rbp - 8] mov qword ptr [rcx], rax movsd real8 ptr [rcx + 8], xmm0 @@ -1156,6 +1124,10 @@ NESTED_ENTRY CallJittedMethodRetDoubleI8, _TEXT, NoHandler push_register rdx push_register rax // align END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] mov rcx, [rbp - 8] movsd real8 ptr [rcx], xmm0 mov qword ptr [rcx + 8], rax @@ -1170,6 +1142,10 @@ NESTED_ENTRY CallJittedMethodRetDoubleDouble, _TEXT, NoHandler push_register rdx push_register rax // align END_PROLOGUE + sub rsp, rcx // total stack space + mov r11, rdi // The routines list + mov r10, rsi // interpreter stack args + call qword ptr [r11] mov rcx, [rbp - 8] movsd real8 ptr [rcx], xmm0 movsd real8 ptr [rcx + 8], xmm1 diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index adadab377f4743..80192a5089a29e 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -870,19 +870,47 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT EPILOG_BRANCH_REG x12 LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT +// Copy arguments from the interpreter stack to the processor stack +// Except for Apple target, the CPU stack slots are aligned to +// pointer size. LEAF_ENTRY Load_Stack - ldr w14, [x10, #4]! // SP offset - ldr w12, [x10, #4]! // number of stack slots + ldr w14, [x10], #4 // SP offset + ldr w12, [x10], #4 // number of stack slots add x14, sp, x14 +#ifdef TARGET_APPLE + cmp x12, #8 + blt LOCAL_LABEL(LessThan8Bytes) +#endif // TARGET_APPLE LOCAL_LABEL(CopyLoop): ldr x13, [x9], #8 str x13, [x14], #8 subs x12, x12, #8 bne LOCAL_LABEL(CopyLoop) +#ifdef TARGET_APPLE + b LOCAL_LABEL(DoneLoop) +LOCAL_LABEL(LessThan8Bytes): + cmp x12, #4 + blt LOCAL_LABEL(LessThan4Bytes) + ldr w13, [x9], #8 + str w13, [x14], #4 + b LOCAL_LABEL(DoneLoop) +LOCAL_LABEL(LessThan4Bytes): + cmp x12, #2 + blt LOCAL_LABEL(LessThan2Bytes) + ldrh w13, [x9], #8 + strh w13, [x14], #2 + b LOCAL_LABEL(DoneLoop) +LOCAL_LABEL(LessThan2Bytes): + ldrb w13, [x9], #8 + strb w13, [x14], #1 +LOCAL_LABEL(DoneLoop): +#endif // TARGET_APPLE ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Load_Stack +// Routines for passing value type arguments by reference in general purpose registers X0..X7 + LEAF_ENTRY Load_Ref_X0 mov x0, x9 ldr x12, [x10], #8 @@ -947,6 +975,8 @@ LEAF_ENTRY Load_Ref_X7 EPILOG_BRANCH_REG x11 LEAF_END Load_Ref_X7 +// Routines for passing arguments by value in general purpose registers X0..X7 + LEAF_ENTRY Load_X0 ldr x0, [x9], #8 ldr x11, [x10], #8 @@ -961,6 +991,7 @@ LEAF_END Load_X0_X1 LEAF_ENTRY Load_X0_X1_X2 ldp x0, x1, [x9], #16 +ALTERNATE_ENTRY Load_X2 ldr x2, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -968,6 +999,7 @@ LEAF_END Load_X0_X1_X2 LEAF_ENTRY Load_X0_X1_X2_X3 ldp x0, x1, [x9], #16 +ALTERNATE_ENTRY Load_X2_X3 ldp x2, x3, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -975,7 +1007,9 @@ LEAF_END Load_X0_X1_X2_X3 LEAF_ENTRY Load_X0_X1_X2_X3_X4 ldp x0, x1, [x9], #16 +ALTERNATE_ENTRY Load_X2_X3_X4 ldp x2, x3, [x9], #16 +ALTERNATE_ENTRY Load_X4 ldr x4, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -983,7 +1017,9 @@ LEAF_END Load_X0_X1_X2_X3_X4 LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5 ldp x0, x1, [x9], #16 +ALTERNATE_ENTRY Load_X2_X3_X4_X5 ldp x2, x3, [x9], #16 +ALTERNATE_ENTRY Load_X4_X5 ldp x4, x5, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -991,8 +1027,11 @@ LEAF_END Load_X0_X1_X2_X3_X4_X5 LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6 ldp x0, x1, [x9], #16 +ALTERNATE_ENTRY Load_X2_X3_X4_X5_X6 ldp x2, x3, [x9], #16 +ALTERNATE_ENTRY Load_X4_X5_X6 ldp x4, x5, [x9], #16 +ALTERNATE_ENTRY Load_X6 ldr x6, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1000,8 +1039,11 @@ LEAF_END Load_X0_X1_X2_X3_X4_X5_X6 LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6_X7 ldp x0, x1, [x9], #16 +ALTERNATE_ENTRY Load_X2_X3_X4_X5_X6_X7 ldp x2, x3, [x9], #16 +ALTERNATE_ENTRY Load_X4_X5_X6_X7 ldp x4, x5, [x9], #16 +ALTERNATE_ENTRY Load_X6_X7 ldp x6, x7, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1021,6 +1063,7 @@ LEAF_END Load_X1_X2 LEAF_ENTRY Load_X1_X2_X3 ldp x1, x2, [x9], #16 +ALTERNATE_ENTRY Load_X3 ldr x3, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1028,6 +1071,7 @@ LEAF_END Load_X1_X2_X3 LEAF_ENTRY Load_X1_X2_X3_X4 ldp x1, x2, [x9], #16 +ALTERNATE_ENTRY Load_X3_X4 ldp x3, x4, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1035,7 +1079,9 @@ LEAF_END Load_X1_X2_X3_X4 LEAF_ENTRY Load_X1_X2_X3_X4_X5 ldp x1, x2, [x9], #16 +ALTERNATE_ENTRY Load_X3_X4_X5 ldp x3, x4, [x9], #16 +ALTERNATE_ENTRY Load_X5 ldr x5, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1043,7 +1089,9 @@ LEAF_END Load_X1_X2_X3_X4_X5 LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6 ldp x1, x2, [x9], #16 +ALTERNATE_ENTRY Load_X3_X4_X5_X6 ldp x3, x4, [x9], #16 +ALTERNATE_ENTRY Load_X5_X6 ldp x5, x6, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1051,149 +1099,17 @@ LEAF_END Load_X1_X2_X3_X4_X5_X6 LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6_X7 ldp x1, x2, [x9], #16 +ALTERNATE_ENTRY Load_X3_X4_X5_X6_X7 ldp x3, x4, [x9], #16 +ALTERNATE_ENTRY Load_X5_X6_X7 ldp x5, x6, [x9], #16 +ALTERNATE_ENTRY Load_X7 ldr x7, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Load_X1_X2_X3_X4_X5_X6_X7 -LEAF_ENTRY Load_X2 - ldr x2, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X2 -LEAF_ENTRY Load_X2_X3 - ldp x2, x3, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X2_X3 - -LEAF_ENTRY Load_X2_X3_X4 - ldp x2, x3, [x9], #16 - ldr x4, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X2_X3_X4 - -LEAF_ENTRY Load_X2_X3_X4_X5 - ldp x2, x3, [x9], #16 - ldp x4, x5, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X2_X3_X4_X5 - -LEAF_ENTRY Load_X2_X3_X4_X5_X6 - ldp x2, x3, [x9], #16 - ldp x4, x5, [x9], #16 - ldr x6, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X2_X3_X4_X5_X6 - -LEAF_ENTRY Load_X2_X3_X4_X5_X6_X7 - ldp x2, x3, [x9], #16 - ldp x4, x5, [x9], #16 - ldp x6, x7, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X2_X3_X4_X5_X6_X7 - -LEAF_ENTRY Load_X3 - ldr x3, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X3 -LEAF_ENTRY Load_X3_X4 - ldp x3, x4, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X3_X4 - -LEAF_ENTRY Load_X3_X4_X5 - ldp x3, x4, [x9], #16 - ldr x5, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X3_X4_X5 - -LEAF_ENTRY Load_X3_X4_X5_X6 - ldp x3, x4, [x9], #16 - ldp x5, x6, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X3_X4_X5_X6 - -LEAF_ENTRY Load_X3_X4_X5_X6_X7 - ldp x3, x4, [x9], #16 - ldp x5, x6, [x9], #16 - ldr x7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X3_X4_X5_X6_X7 - -LEAF_ENTRY Load_X4 - ldr x4, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X4 - -LEAF_ENTRY Load_X4_X5 - ldp x4, x5, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X4_X5 - -LEAF_ENTRY Load_X4_X5_X6 - ldp x4, x5, [x9], #16 - ldr x6, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X4_X5_X6 - -LEAF_ENTRY Load_X4_X5_X6_X7 - ldp x4, x5, [x9], #16 - ldp x6, x7, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X4_X5_X6_X7 - -LEAF_ENTRY Load_X5 - ldr x5, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X5 - -LEAF_ENTRY Load_X5_X6 - ldp x5, x6, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X5_X6 - -LEAF_ENTRY Load_X5_X6_X7 - ldp x5, x6, [x9], #16 - ldr x7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X5_X6_X7 - -LEAF_ENTRY Load_X6 - ldr x6, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X6 - -LEAF_ENTRY Load_X6_X7 - ldp x6, x7, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X6_X7 - -LEAF_ENTRY Load_X7 - ldr x7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_X7 +// Routines for passing arguments in floating point registers D0..D7 LEAF_ENTRY Load_D0 ldr d0, [x9], #8 @@ -1282,27 +1198,7 @@ ALTERNATE_ENTRY Load_D6 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Load_D0_D1_D2_D3_D4_D5_D6 -/* -LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 - ldr d0, [x9], #8 -ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5_D6_D7 - ldr d1, [x9], #8 -ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 - ldr d2, [x9], #8 -ALTERNATE_ENTRY Load_D3_D4_D5_D6_D7 - ldr d3, [x9], #8 -ALTERNATE_ENTRY Load_D4_D5_D6_D7 - ldr d4, [x9], #8 -ALTERNATE_ENTRY Load_D5_D6_D7 - ldr d5, [x9], #8 -ALTERNATE_ENTRY Load_D6_D7 - ldr d6, [x9], #8 -ALTERNATE_ENTRY Load_D7 - ldr d7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 -LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 -*/ + LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 ldp d0, d1, [x9], #16 ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 @@ -1325,7 +1221,12 @@ ALTERNATE_ENTRY Load_D7 ldr d7, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 -LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 +LEAF_END Load_D1_D2_D3_D4_D5_D6_D7 + +// Functions to invoke a sequence of routines to: +// 1. load arguments from the interpreter stack to registers / stack based on the calling convention +// 2. call the target method +// 3. put the return value of the target method to the interpreter stack // X0 - routines array // X1 - interpreter stack args location @@ -1337,8 +1238,9 @@ NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler mov x9, x1 ldr x11, [x10], #8 blr x11 + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetVoid, _TEXT // X0 - routines array @@ -1353,8 +1255,9 @@ NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler mov x8, x2 ldr x11, [x10], #8 blr x11 + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetBuff, _TEXT // X0 - routines array @@ -1371,8 +1274,9 @@ NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler blr x11 ldr x2, [sp, #16] str x0, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetI8, _TEXT // X0 - routines array @@ -1389,8 +1293,9 @@ NESTED_ENTRY CallJittedMethodRet2I8, _TEXT, NoHandler blr x11 ldr x2, [sp, #16] stp x0, x1, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet2I8, _TEXT // X0 - routines array @@ -1407,8 +1312,9 @@ NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler blr x11 ldr x2, [sp, #16] str d0, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetDouble, _TEXT // X0 - routines array @@ -1425,8 +1331,9 @@ NESTED_ENTRY CallJittedMethodRet2Double, _TEXT, NoHandler blr x11 ldr x2, [sp, #16] stp d0, d1, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet2Double, _TEXT // X0 - routines array @@ -1444,8 +1351,9 @@ NESTED_ENTRY CallJittedMethodRet3Double, _TEXT, NoHandler ldr x2, [sp, #16] stp d0, d1, [x2], #16 str d2, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet3Double, _TEXT // X0 - routines array @@ -1463,8 +1371,9 @@ NESTED_ENTRY CallJittedMethodRet4Double, _TEXT, NoHandler ldr x2, [sp, #16] stp d0, d1, [x2], #16 stp d2, d3, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet4Double, _TEXT // X0 - routines array @@ -1481,8 +1390,9 @@ NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler blr x11 ldr x2, [sp, #16] str s0, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetFloat, _TEXT // X0 - routines array @@ -1499,8 +1409,9 @@ NESTED_ENTRY CallJittedMethodRet2Float, _TEXT, NoHandler blr x11 ldr x2, [sp, #16] stp s0, s1, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet2Float, _TEXT // X0 - routines array @@ -1518,8 +1429,9 @@ NESTED_ENTRY CallJittedMethodRet3Float, _TEXT, NoHandler ldr x2, [sp, #16] stp s0, s1, [x2], #8 str s2, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet3Float, _TEXT // X0 - routines array @@ -1537,6 +1449,7 @@ NESTED_ENTRY CallJittedMethodRet4Float, _TEXT, NoHandler ldr x2, [sp, #16] stp s0, s1, [x2], #8 stp s2, s3, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet4Float, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index b15a1017f880a5..743395626bb01c 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1253,9 +1253,11 @@ JIT_PollGCRarePath EPILOG_BRANCH_REG x12 LEAF_END + ; Copy arguments from the interpreter stack to the processor stack + ; The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack - ldr w14, [x10, #4]! // SP offset - ldr w12, [x10, #4]! // number of stack slots + ldr w14, [x10], #4 ; SP offset + ldr w12, [x10], #4 ; number of stack slots add x14, sp, x14 CopyLoop ldr x13, [x9], #8 @@ -1266,6 +1268,8 @@ CopyLoop EPILOG_BRANCH_REG x11 LEAF_END Load_Stack + ; Routines for passing value type arguments by reference in general purpose registers X0..X7 + LEAF_ENTRY Load_Ref_X0 mov x0, x9 ldr x12, [x10], #8 @@ -1330,6 +1334,8 @@ CopyLoop EPILOG_BRANCH_REG x11 LEAF_END Load_Ref_X7 + ; Routines for passing arguments by value in general purpose registers X0..X7 + LEAF_ENTRY Load_X0 ldr x0, [x9], #8 ldr x11, [x10], #8 @@ -1344,6 +1350,7 @@ CopyLoop LEAF_ENTRY Load_X0_X1_X2 ldp x0, x1, [x9], #16 + ALTERNATE_ENTRY Load_X2 ldr x2, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1351,6 +1358,7 @@ CopyLoop LEAF_ENTRY Load_X0_X1_X2_X3 ldp x0, x1, [x9], #16 + ALTERNATE_ENTRY Load_X2_X3 ldp x2, x3, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1358,7 +1366,9 @@ CopyLoop LEAF_ENTRY Load_X0_X1_X2_X3_X4 ldp x0, x1, [x9], #16 + ALTERNATE_ENTRY Load_X2_X3_X4 ldp x2, x3, [x9], #16 + ALTERNATE_ENTRY Load_X4 ldr x4, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1366,7 +1376,9 @@ CopyLoop LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5 ldp x0, x1, [x9], #16 + ALTERNATE_ENTRY Load_X2_X3_X4_X5 ldp x2, x3, [x9], #16 + ALTERNATE_ENTRY Load_X4_X5 ldp x4, x5, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1374,8 +1386,11 @@ CopyLoop LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6 ldp x0, x1, [x9], #16 + ALTERNATE_ENTRY Load_X2_X3_X4_X5_X6 ldp x2, x3, [x9], #16 + ALTERNATE_ENTRY Load_X4_X5_X6 ldp x4, x5, [x9], #16 + ALTERNATE_ENTRY Load_X6 ldr x6, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1383,8 +1398,11 @@ CopyLoop LEAF_ENTRY Load_X0_X1_X2_X3_X4_X5_X6_X7 ldp x0, x1, [x9], #16 + ALTERNATE_ENTRY Load_X2_X3_X4_X5_X6_X7 ldp x2, x3, [x9], #16 + ALTERNATE_ENTRY Load_X4_X5_X6_X7 ldp x4, x5, [x9], #16 + ALTERNATE_ENTRY Load_X6_X7 ldp x6, x7, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1404,6 +1422,7 @@ CopyLoop LEAF_ENTRY Load_X1_X2_X3 ldp x1, x2, [x9], #16 + ALTERNATE_ENTRY Load_X3 ldr x3, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1411,6 +1430,7 @@ CopyLoop LEAF_ENTRY Load_X1_X2_X3_X4 ldp x1, x2, [x9], #16 + ALTERNATE_ENTRY Load_X3_X4 ldp x3, x4, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1418,7 +1438,9 @@ CopyLoop LEAF_ENTRY Load_X1_X2_X3_X4_X5 ldp x1, x2, [x9], #16 + ALTERNATE_ENTRY Load_X3_X4_X5 ldp x3, x4, [x9], #16 + ALTERNATE_ENTRY Load_X5 ldr x5, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1426,7 +1448,9 @@ CopyLoop LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6 ldp x1, x2, [x9], #16 + ALTERNATE_ENTRY Load_X3_X4_X5_X6 ldp x3, x4, [x9], #16 + ALTERNATE_ENTRY Load_X5_X6 ldp x5, x6, [x9], #16 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 @@ -1434,149 +1458,17 @@ CopyLoop LEAF_ENTRY Load_X1_X2_X3_X4_X5_X6_X7 ldp x1, x2, [x9], #16 + ALTERNATE_ENTRY Load_X3_X4_X5_X6_X7 ldp x3, x4, [x9], #16 + ALTERNATE_ENTRY Load_X5_X6_X7 ldp x5, x6, [x9], #16 + ALTERNATE_ENTRY Load_X7 ldr x7, [x9], #8 ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Load_X1_X2_X3_X4_X5_X6_X7 - LEAF_ENTRY Load_X2 - ldr x2, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X2 - LEAF_ENTRY Load_X2_X3 - ldp x2, x3, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X2_X3 - - LEAF_ENTRY Load_X2_X3_X4 - ldp x2, x3, [x9], #16 - ldr x4, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X2_X3_X4 - - LEAF_ENTRY Load_X2_X3_X4_X5 - ldp x2, x3, [x9], #16 - ldp x4, x5, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X2_X3_X4_X5 - - LEAF_ENTRY Load_X2_X3_X4_X5_X6 - ldp x2, x3, [x9], #16 - ldp x4, x5, [x9], #16 - ldr x6, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X2_X3_X4_X5_X6 - - LEAF_ENTRY Load_X2_X3_X4_X5_X6_X7 - ldp x2, x3, [x9], #16 - ldp x4, x5, [x9], #16 - ldp x6, x7, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X2_X3_X4_X5_X6_X7 - - LEAF_ENTRY Load_X3 - ldr x3, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X3 - LEAF_ENTRY Load_X3_X4 - ldp x3, x4, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X3_X4 - - LEAF_ENTRY Load_X3_X4_X5 - ldp x3, x4, [x9], #16 - ldr x5, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X3_X4_X5 - - LEAF_ENTRY Load_X3_X4_X5_X6 - ldp x3, x4, [x9], #16 - ldp x5, x6, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X3_X4_X5_X6 - - LEAF_ENTRY Load_X3_X4_X5_X6_X7 - ldp x3, x4, [x9], #16 - ldp x5, x6, [x9], #16 - ldr x7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X3_X4_X5_X6_X7 - - LEAF_ENTRY Load_X4 - ldr x4, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X4 - - LEAF_ENTRY Load_X4_X5 - ldp x4, x5, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X4_X5 - - LEAF_ENTRY Load_X4_X5_X6 - ldp x4, x5, [x9], #16 - ldr x6, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X4_X5_X6 - - LEAF_ENTRY Load_X4_X5_X6_X7 - ldp x4, x5, [x9], #16 - ldp x6, x7, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X4_X5_X6_X7 - - LEAF_ENTRY Load_X5 - ldr x5, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X5 - - LEAF_ENTRY Load_X5_X6 - ldp x5, x6, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X5_X6 - - LEAF_ENTRY Load_X5_X6_X7 - ldp x5, x6, [x9], #16 - ldr x7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X5_X6_X7 - - LEAF_ENTRY Load_X6 - ldr x6, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X6 - - LEAF_ENTRY Load_X6_X7 - ldp x6, x7, [x9], #16 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X6_X7 - - LEAF_ENTRY Load_X7 - ldr x7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_X7 + ; Routines for passing arguments in floating point registers D0..D7 LEAF_ENTRY Load_D0 ldr d0, [x9], #8 @@ -1665,27 +1557,7 @@ CopyLoop ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 LEAF_END Load_D0_D1_D2_D3_D4_D5_D6 - /* - LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 - ldr d0, [x9], #8 - ALTERNATE_ENTRY Load_D1_D2_D3_D4_D5_D6_D7 - ldr d1, [x9], #8 - ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 - ldr d2, [x9], #8 - ALTERNATE_ENTRY Load_D3_D4_D5_D6_D7 - ldr d3, [x9], #8 - ALTERNATE_ENTRY Load_D4_D5_D6_D7 - ldr d4, [x9], #8 - ALTERNATE_ENTRY Load_D5_D6_D7 - ldr d5, [x9], #8 - ALTERNATE_ENTRY Load_D6_D7 - ldr d6, [x9], #8 - ALTERNATE_ENTRY Load_D7 - ldr d7, [x9], #8 - ldr x11, [x10], #8 - EPILOG_BRANCH_REG x11 - LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 - */ + LEAF_ENTRY Load_D0_D1_D2_D3_D4_D5_D6_D7 ldp d0, d1, [x9], #16 ALTERNATE_ENTRY Load_D2_D3_D4_D5_D6_D7 @@ -1710,9 +1582,9 @@ CopyLoop EPILOG_BRANCH_REG x11 LEAF_END Load_D0_D1_D2_D3_D4_D5_D6_D7 - // X0 - routines array - // X1 - interpreter stack args location - // X2 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRetVoid PROLOG_SAVE_REG_PAIR fp, lr, #-16! sub sp, sp, x3 @@ -1720,14 +1592,15 @@ CopyLoop mov x9, x1 ldr x11, [x10], #8 blr x11 + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #16! ret lr NESTED_END CallJittedMethodRetVoid - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRetBuff PROLOG_SAVE_REG_PAIR fp, lr, #-16! sub sp, sp, x3 @@ -1736,14 +1609,15 @@ CopyLoop mov x8, x2 ldr x11, [x10], #8 blr x11 + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #16! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetBuff - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRetI8 PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1754,14 +1628,15 @@ CopyLoop blr x11 ldr x2, [sp, #16] str x0, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetI8 - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRet2I8 PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1772,14 +1647,15 @@ CopyLoop blr x11 ldr x2, [sp, #16] stp x0, x1, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet2I8 - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRetDouble PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1790,14 +1666,15 @@ CopyLoop blr x11 ldr x2, [sp, #16] str d0, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetDouble - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRet2Double PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1808,14 +1685,15 @@ CopyLoop blr x11 ldr x2, [sp, #16] stp d0, d1, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet2Double - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRet3Double PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1827,14 +1705,15 @@ CopyLoop ldr x2, [sp, #16] stp d0, d1, [x2], #16 str d2, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet3Double - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRet4Double PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1846,14 +1725,15 @@ CopyLoop ldr x2, [sp, #16] stp d0, d1, [x2], #16 stp d2, d3, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet4Double - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRetFloat PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1864,14 +1744,15 @@ CopyLoop blr x11 ldr x2, [sp, #16] str s0, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRetFloat - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRet2Float PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1882,14 +1763,15 @@ CopyLoop blr x11 ldr x2, [sp, #16] stp s0, s1, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet2Float - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRet3Float PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1901,14 +1783,15 @@ CopyLoop ldr x2, [sp, #16] stp s0, s1, [x2], #8 str s2, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet3Float - // X0 - routines array - // X1 - interpreter stack args location - // X2 - interpreter stack return value location - // X3 - stack arguments size (properly aligned) + ; X0 - routines array + ; X1 - interpreter stack args location + ; X2 - interpreter stack return value location + ; X3 - stack arguments size (properly aligned) NESTED_ENTRY CallJittedMethodRet4Float PROLOG_SAVE_REG_PAIR fp, lr, #-32! str x2, [sp, #16] @@ -1920,8 +1803,9 @@ CopyLoop ldr x2, [sp, #16] stp s0, s1, [x2], #8 stp s2, s3, [x2] + EPILOG_STACK_RESTORE EPILOG_RESTORE_REG_PAIR fp, lr, #32! - ret lr + EPILOG_RETURN NESTED_END CallJittedMethodRet4Float ; Must be at very end of file diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 352c4b51f52448..1b13f4f252b5c7 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -107,13 +107,6 @@ extern "C" void Load_R8(); extern "C" void Load_R8_R9(); extern "C" void Load_R9(); -extern "C" void Load_Ref_RDI(); -extern "C" void Load_Ref_RSI(); -extern "C" void Load_Ref_RDX(); -extern "C" void Load_Ref_RCX(); -extern "C" void Load_Ref_R8(); -extern "C" void Load_Ref_R9(); - PCODE GPRegsRoutines[] = { (PCODE)Load_RDI, // 00 @@ -517,13 +510,6 @@ PCODE GetFPRegRangeLoadRoutine(int x1, int x2) return FPRegsRoutines[index]; } -PCODE GetStackRangeLoadRoutine(int s1, int s2) -{ - // Stack range is not supported yet - assert(!"Stack range is not supported yet"); - return NULL; -} - extern "C" void CallJittedMethodRetVoid(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetDouble(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); @@ -547,30 +533,66 @@ extern "C" void CallJittedMethodRet3Float(PCODE *routines, int8_t*pArgs, int8_t* extern "C" void CallJittedMethodRet4Float(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); #endif // TARGET_ARM64 +// Generate the call stub for the given method. +// The returned call stub header must be freed by the caller using FreeCallStub. CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) { + CONTRACTL + { + NOTHROW; + MODE_ANY; + GC_NOTRIGGER; + FORBID_FAULT; + PRECONDITION(CheckPointer(pMD)); + } + CONTRACTL_END + MetaSig sig(pMD); ArgIterator argIt(&sig); - int ofs = 0; - DWORD arg = 0; - m_r1 = argIt.HasThis() ? 0 : NO_RANGE; // The "this" argument register is not enumerated by the arg iterator. + + m_r1 = NoRange; // indicates that there is no active range of general purpose registers m_r2 = 0; - m_x1 = NO_RANGE; // indicates that there is no active range of FP registers + m_x1 = NoRange; // indicates that there is no active range of FP registers m_x2 = 0; - m_s1 = NO_RANGE; // indicates that there is no active range of stack arguments + m_s1 = NoRange; // indicates that there is no active range of stack arguments m_s2 = 0; m_routineIndex = 0; m_totalStackSize = 0; + int numArgs = sig.NumFixedArgs() + (sig.HasThis() ? 1 : 0); - + + if (argIt.HasThis()) + { + // The "this" argument register is not enumerated by the arg iterator, so + // we need to "inject" it here. +#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) + if (argIt.HasRetBuffArg()) + { + // The return buffer on Windows AMD64 is passed in the first argument register, so the + // "this" argument is be passed in the second argument register. + m_r1 = 1; + } + else +#endif // TARGET_WINDOWS && TARGET_AMD64 + { + // The "this" pointer is passed in the first argument register. + m_r1 = 0; + } + } + // Allocate space for the routines. The size of the array is conservatively set to twice the number of arguments // plus one slot for the target pointer and reallocated to the real size at the end. - // Interpreter-TODO: handle OOM here and at the realloc m_pHeader = (CallStubHeader*)malloc(sizeof(CallStubHeader) + (2 * numArgs + 1) * sizeof(PCODE)); + if (m_pHeader == NULL) + { + return NULL; + } + PCODE *routines = m_pHeader->Routines; - for (; TransitionBlock::InvalidOffset != (ofs = argIt.GetNextOffset()); arg++) + int ofs; + while ((ofs = argIt.GetNextOffset()) != TransitionBlock::InvalidOffset) { ArgLocDesc argLocDesc; argIt.GetArgLoc(ofs, &argLocDesc); @@ -628,15 +650,16 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) } // All arguments were processed, but there is likely a pending ranges to store. - if (m_r1 != NO_RANGE) + // Process such a range if any. + if (m_r1 != NoRange) { routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); } - else if (m_x1 != NO_RANGE) + else if (m_x1 != NoRange) { routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); } - else if (m_s1 != NO_RANGE) + else if (m_s1 != NoRange) { m_totalStackSize += m_s2 - m_s1 + 1; routines[m_routineIndex++] = (PCODE)Load_Stack; @@ -821,38 +844,44 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) m_pHeader->NumRoutines = m_routineIndex + 1; // Reserve one extra slot for the target method pointer m_pHeader->TotalStackSize = m_totalStackSize; - // resize the structure to its actually used size - return (CallStubHeader*)realloc(m_pHeader, sizeof(CallStubHeader) + m_pHeader->NumRoutines * sizeof(PCODE)); + // resize the structure to its actually utilized size + m_pHeader = (CallStubHeader*)realloc(m_pHeader, sizeof(CallStubHeader) + m_pHeader->NumRoutines * sizeof(PCODE)); + // In case the reallocation failed, this function return NULL + return m_pHeader; } +// Process the argument described by argLocDesc. This function is called for each argument in the method signature. +// It updates the ranges of registers and emits entries into the routines array at discontinuities. void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc) { + LIMITED_METHOD_CONTRACT; + PCODE *routines = m_pHeader->Routines; // Check if we have a range of registers or stack arguments that we need to store because the current argument // terminates it. - if ((argLocDesc.m_cGenReg == 0) && (m_r1 != NO_RANGE)) + if ((argLocDesc.m_cGenReg == 0) && (m_r1 != NoRange)) { // No GP register is used to pass the current argument, but we already have a range of GP registers, // store the routine for the range routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); - m_r1 = NO_RANGE; + m_r1 = NoRange; } - else if (((argLocDesc.m_cFloatReg == 0)) && (m_x1 != NO_RANGE)) + else if (((argLocDesc.m_cFloatReg == 0)) && (m_x1 != NoRange)) { // No floating point register is used to pass the current argument, but we already have a range of FP registers, // store the routine for the range routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); - m_x1 = NO_RANGE; + m_x1 = NoRange; } - else if ((argLocDesc.m_byteStackSize == 0) && (m_s1 != NO_RANGE)) + else if ((argLocDesc.m_byteStackSize == 0) && (m_s1 != NoRange)) { // No stack argument is used to pass the current argument, but we already have a range of stack arguments, // store the routine for the range m_totalStackSize += m_s2 - m_s1 + 1; routines[m_routineIndex++] = (PCODE)Load_Stack; routines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; - m_s1 = NO_RANGE; + m_s1 = NoRange; } if (argLocDesc.m_cGenReg != 0) @@ -860,12 +889,12 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe #ifndef UNIX_AMD64_ABI if (argIt.IsArgPassedByRef()) { - if (m_r1 != NO_RANGE) + if (m_r1 != NoRange) { // The args passed by reference use a separate routine, so we need to flush the existing range // of general purpose registers if we have one. routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); - m_r1 = NO_RANGE; + m_r1 = NoRange; } // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, @@ -876,7 +905,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe else #endif // UNIX_AMD64_ABI { - if (m_r1 == NO_RANGE) // No active range yet + if (m_r1 == NoRange) // No active range yet { // Start a new range m_r1 = argLocDesc.m_idxGenReg; @@ -899,7 +928,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe if (argLocDesc.m_cFloatReg != 0) { - if (m_x1 == NO_RANGE) // No active range yet + if (m_x1 == NoRange) // No active range yet { // Start a new range m_x1 = argLocDesc.m_idxFloatReg; @@ -921,15 +950,23 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe if (argLocDesc.m_byteStackSize != 0) { - if (m_s1 == NO_RANGE) // No active range yet + if (m_s1 == NoRange) // No active range yet { // Start a new range m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } - else if (argLocDesc.m_byteStackIndex == m_s2 + 1) + else if (argLocDesc.m_byteStackIndex == m_s2 + 1 +#ifdef TARGET_APPLE + && ((m_s2 - m_s1 + 1) >= sizeof(void*)) +#endif // TARGET_APPLE + ) { - // Extend an existing range + // Extend an existing range, but only if the previous range was at least pointer size large. + // The only case when this is not true is on arm64 Apple OSes where types smaller than 8 bytes + // are passed on the stack in a packed manner. We process such arguments one by one to avoid + // explosion of the number of routines. The interpreter stack has pointer size alignment for + // all types of arguments. m_s2 += argLocDesc.m_byteStackSize; } else @@ -944,8 +981,19 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe } } +// Free the call stub header generated by GenerateCallStub. void CallStubGenerator::FreeCallStub(CallStubHeader *pHeader) { + CONTRACTL + { + NOTHROW; + MODE_ANY; + GC_NOTRIGGER; + FORBID_FAULT; + PRECONDITION(CheckPointer(pHeader)); + } + CONTRACTL_END + free(pHeader); } diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index 9266c9ec5ebe76..0549cbbba5acbd 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -8,36 +8,61 @@ class MethodDesc; +// This is a header for a call stub that translates arguments from the interpreter stack to the CPU registers and native +// stack, invokes the target method, and translates the return value back to the interpreter stack. struct CallStubHeader { + // Number of routines in the Routines array. The last one is the target method to call. int NumRoutines; + // Total stack size used for the arguments. int TotalStackSize; - void (*Invoke)(PCODE*, int8_t*, int8_t*, int); + // This is a pointer to a helper function that invokes the target method. There are several + // versions of this function, depending on the return type of the target method. + void (*Invoke)(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); + // This is an array of routines that translate the arguments from the interpreter stack to the CPU registers and native stack. PCODE Routines[0]; + // Set the address of the target method to call. void SetTarget(PCODE target) { + LIMITED_METHOD_CONTRACT; + + _ASSERTE(target != 0); Routines[NumRoutines - 1] = target; } }; +// This class generates the call stub for a given method. It uses the calling convention of the target CPU to determine +// how to translate the arguments from the interpreter stack to the CPU registers and native stack. class CallStubGenerator { - static const int NO_RANGE = -1; + // When the m_r1, m_x1 or m_s1 are set to NoRange, it means that there is no active range of registers or stack arguments. + static const int NoRange = -1; + // Current sequential range of general purpose registers used to pass arguments. int m_r1; int m_r2; + // Current sequential range of floating point registers used to pass arguments. int m_x1; int m_x2; + // Current sequential range of offsets of stack arguments used to pass arguments. int m_s1; int m_s2; + // The index of the next routine to store in the Routines array. int m_routineIndex; + // The total stack size used for the arguments. int m_totalStackSize; + // The header of the call stub that is being generated. CallStubHeader *m_pHeader; + // Process the argument described by argLocDesc. This function is called for each argument in the method signature. void ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc); public: + // Generate the call stub for the given method. + // The returned call stub header must be freed by the caller using FreeCallStub. + // The return value is NULL in case of an OOM. CallStubHeader *GenerateCallStub(MethodDesc *pMD); + // Free the call stub header generated by GenerateCallStub. static void FreeCallStub(CallStubHeader *pHeader); }; diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index e8888e9e3a1a3d..6fd3a237908b89 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -10,18 +10,36 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) { + CONTRACTL + { + THROWS; + MODE_COOPERATIVE; + GC_NOTRIGGER; + FORBID_FAULT; + PRECONDITION(CheckPointer(pMD)); + PRECONDITION(CheckPointer(pArgs)); + PRECONDITION(CheckPointer(pRet)); + } + CONTRACTL_END + CallStubGenerator callStubGenerator; - CallStubHeader *pHeader = pMD->GetCallStubHeader(); + CallStubHeader *pHeader = pMD->GetCallStub(); if (pHeader == NULL) { pHeader = callStubGenerator.GenerateCallStub(pMD); - HRESULT hr = pMD->SetCallStubHeader(pHeader); + if (pHeader == NULL) + { + // allocating the header has failed due to OOM + COMPlusThrowOM(); + } + + HRESULT hr = pMD->SetCallStub(pHeader); if (hr == S_FALSE) { // We have lost the race for generating the header, so we need to free the one we generated // and use the one that was generated by another thread. CallStubGenerator::FreeCallStub(pHeader); - pHeader = pMD->GetCallStubHeader(); + pHeader = pMD->GetCallStub(); } } @@ -1108,13 +1126,12 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } } CALL_TARGET_IP: - // HACK: we need a fast way to check of the targetIp is an interpreter code or not. + // Interpreter-TODO: we need a fast way to check of the targetIp is an interpreter code or not. // Probably use a tagged pointer for interpreter code and a normal pointer for JIT/R2R code. EECodeInfo codeInfo((PCODE)targetIp); if (!codeInfo.IsValid()) { - printf("Attempted to execute native code from interpreter.\n"); - assert(0); + EEPOLICY_HANDLE_FATAL_ERROR_WITH_MESSAGE(COR_E_EXECUTIONENGINE, W("Attempted to execute native code from interpreter")); } else if (codeInfo.GetCodeManager() != ExecutionManager::GetInterpreterCodeManager()) { diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 3e1e9697827165..1383ddc1486837 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -254,7 +254,7 @@ HRESULT MethodDesc::SetMethodDescVersionState(PTR_MethodDescVersioningState stat } #ifdef FEATURE_INTERPRETER -HRESULT MethodDesc::SetCallStubHeader(CallStubHeader *pHeader) +HRESULT MethodDesc::SetCallStub(CallStubHeader *pHeader) { WRAPPER_NO_CONTRACT; @@ -262,20 +262,20 @@ HRESULT MethodDesc::SetCallStubHeader(CallStubHeader *pHeader) IfFailRet(EnsureCodeDataExists(NULL)); _ASSERTE(m_codeData != NULL); - if (InterlockedCompareExchangeT(&m_codeData->CallStubHeader, pHeader, NULL) != NULL) + if (InterlockedCompareExchangeT(&m_codeData->CallStub, pHeader, NULL) != NULL) return S_FALSE; return S_OK; } -CallStubHeader *MethodDesc::GetCallStubHeader() +CallStubHeader *MethodDesc::GetCallStub() { WRAPPER_NO_CONTRACT; PTR_MethodDescCodeData codeData = VolatileLoadWithoutBarrier(&m_codeData); if (codeData == NULL) return NULL; - return VolatileLoadWithoutBarrier(&codeData->CallStubHeader); + return VolatileLoadWithoutBarrier(&codeData->CallStub); } #endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index 4b2ddeb3f161f0..8f9477164b9761 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -234,7 +234,7 @@ struct MethodDescCodeData final PTR_MethodDescVersioningState VersioningState; PCODE TemporaryEntryPoint; #ifdef FEATURE_INTERPRETER - CallStubHeader *CallStubHeader; + CallStubHeader *CallStub; #endif // FEATURE_INTERPRETER }; using PTR_MethodDescCodeData = DPTR(MethodDescCodeData); @@ -1821,8 +1821,8 @@ class MethodDesc HRESULT SetMethodDescVersionState(PTR_MethodDescVersioningState state); #ifdef FEATURE_INTERPRETER - HRESULT SetCallStubHeader(CallStubHeader *pHeader); - CallStubHeader *GetCallStubHeader(); + HRESULT SetCallStub(CallStubHeader *pHeader); + CallStubHeader *GetCallStub(); #endif // FEATURE_INTERPRETER #endif //!DACCESS_COMPILE diff --git a/src/tests/JIT/interpreter/Interpreter.cs b/src/tests/JIT/interpreter/Interpreter.cs index 273735f596f45f..76fc3cf11b8962 100644 --- a/src/tests/JIT/interpreter/Interpreter.cs +++ b/src/tests/JIT/interpreter/Interpreter.cs @@ -228,12 +228,16 @@ static TestStruct3d TestCallingConvention11() return s; } - - static int Main(string[] args) + static void TestCallingConvention12(byte a, byte b, byte c, byte d, byte e, byte f, byte g, byte h, byte i, char j, int k, int l, long m) { - jitField1 = 42; - jitField2 = 43; + Console.WriteLine("TestCallingConvention12: a = {0}, b = {1}, c = {2}, d = {3}, e = {4}, f = {5}, g = {6}, h = {7}, i = {8}, j = {9}, k = {10}, l = {11}, m = {12}", a, b, c, d, e, f, g, h, i, j, k, l, m); + } + // This method is invoked before we start interpretting anything, so the methods invoked in it will be jitted. + // This is necessary for the calling convention tests that test calls from the interpreter to the JITted code + // to actually test things. + static void EnsureCallingConventionTestTargetMethodsAreJitted() + { TestCallingConvention0(1, 2.0f, 3, 4.0, 5, 6.0); TestStruct s = new TestStruct(); @@ -278,6 +282,16 @@ static int Main(string[] args) Console.WriteLine(s11.b); Console.WriteLine(s11.c); + TestCallingConvention12(1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 10, 11, 12); + } + + static int Main(string[] args) + { + jitField1 = 42; + jitField2 = 43; + + EnsureCallingConventionTestTargetMethodsAreJitted(); + RunInterpreterTests(); return 100; } @@ -302,7 +316,7 @@ public static void RunInterpreterTests() Console.WriteLine(s2.b); #if VECTOR_ALIGNMENT_WORKS - // TODO: enable this again after fixing the alignment for the Vector2 struct and similar ones + // Interpreter-TODO: enable this again after fixing the alignment for the Vector2 struct and similar ones Vector2 v = TestCallingConvention3(); Console.WriteLine("TestCallingConvention: v = "); Console.WriteLine(v[0]); @@ -364,6 +378,8 @@ public static void RunInterpreterTests() Console.WriteLine(s11.b); Console.WriteLine(s11.c); + TestCallingConvention12(1, 2, 3, 4, 5, 6, 7, 8, 9, 'a', 10, 11, 12); + // Console.WriteLine("Run interp tests"); if (SumN(50) != 1275) Environment.FailFast(null); From 6ac574e1768cbbe18c2f0abfbdebf4b396b124a6 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 13 May 2025 15:35:38 +0200 Subject: [PATCH 04/12] Fix test build break and cleanup Apple arm64 stack args handling --- src/coreclr/vm/arm64/asmhelpers.S | 58 +++++++------ src/coreclr/vm/callstubgenerator.cpp | 101 +++++++++++++---------- src/tests/JIT/interpreter/Interpreter.cs | 2 + 3 files changed, 95 insertions(+), 66 deletions(-) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 80192a5089a29e..2d6736d4d94a62 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -871,43 +871,53 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT // Copy arguments from the interpreter stack to the processor stack -// Except for Apple target, the CPU stack slots are aligned to -// pointer size. +// The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack ldr w14, [x10], #4 // SP offset ldr w12, [x10], #4 // number of stack slots add x14, sp, x14 -#ifdef TARGET_APPLE - cmp x12, #8 - blt LOCAL_LABEL(LessThan8Bytes) -#endif // TARGET_APPLE LOCAL_LABEL(CopyLoop): ldr x13, [x9], #8 str x13, [x14], #8 subs x12, x12, #8 bne LOCAL_LABEL(CopyLoop) + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Stack + #ifdef TARGET_APPLE - b LOCAL_LABEL(DoneLoop) -LOCAL_LABEL(LessThan8Bytes): - cmp x12, #4 - blt LOCAL_LABEL(LessThan4Bytes) - ldr w13, [x9], #8 - str w13, [x14], #4 - b LOCAL_LABEL(DoneLoop) -LOCAL_LABEL(LessThan4Bytes): - cmp x12, #2 - blt LOCAL_LABEL(LessThan2Bytes) - ldrh w13, [x9], #8 - strh w13, [x14], #2 - b LOCAL_LABEL(DoneLoop) -LOCAL_LABEL(LessThan2Bytes): + +// Copy single byte argument from the interpreter stack to the processor stack +LEAF_ENTRY Load_Stack_1B + ldr x14, [x10], #8 // SP offset + add x14, sp, x14 ldrb w13, [x9], #8 - strb w13, [x14], #1 -LOCAL_LABEL(DoneLoop): -#endif // TARGET_APPLE + strb w13, [x14] ldr x11, [x10], #8 EPILOG_BRANCH_REG x11 -LEAF_END Load_Stack +LEAF_END Load_Stack_1B + +// Copy two byte argument from the interpreter stack to the processor stack +LEAF_ENTRY Load_Stack_2B + ldr x14, [x10], #8 // SP offset + add x14, sp, x14 + ldrh w13, [x9], #8 + strh w13, [x14] + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Stack_2B + +// Copy four byte argument from the interpreter stack to the processor stack +LEAF_ENTRY Load_Stack_4B + ldr x14, [x10], #8 // SP offset + add x14, sp, x14 + ldr w13, [x9], #8 + str w13, [x14] + ldr x11, [x10], #8 + EPILOG_BRANCH_REG x11 +LEAF_END Load_Stack_4B + +#endif // TARGET_APPLE // Routines for passing value type arguments by reference in general purpose registers X0..X7 diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 1b13f4f252b5c7..a4070d4d5ff043 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -7,6 +7,12 @@ extern "C" void Load_Stack(); +#if defined(TARGET_APPLE) && defined(TARGET_ARM64) +extern "C" void Load_Stack_1B(); +extern "C" void Load_Stack_2B(); +extern "C" void Load_Stack_4B(); +#endif // TARGET_APPLE && TARGET_ARM64 + #ifdef TARGET_AMD64 #ifdef TARGET_WINDOWS @@ -886,43 +892,24 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe if (argLocDesc.m_cGenReg != 0) { -#ifndef UNIX_AMD64_ABI - if (argIt.IsArgPassedByRef()) + if (m_r1 == NoRange) // No active range yet { - if (m_r1 != NoRange) - { - // The args passed by reference use a separate routine, so we need to flush the existing range - // of general purpose registers if we have one. - routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); - m_r1 = NoRange; - } + // Start a new range + m_r1 = argLocDesc.m_idxGenReg; + m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; + } + else if (argLocDesc.m_idxGenReg == m_r2 + 1 && !argIt.IsArgPassedByRef()) + { + // Extend an existing range, but only if the argument is not passed by reference. // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. - // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, - // we always process single argument passed by reference using single routine. - routines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); - routines[m_routineIndex++] = argIt.GetArgSize(); + m_r2 += argLocDesc.m_cGenReg; } else -#endif // UNIX_AMD64_ABI { - if (m_r1 == NoRange) // No active range yet - { - // Start a new range - m_r1 = argLocDesc.m_idxGenReg; - m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; - } - else if (argLocDesc.m_idxGenReg == m_r2 + 1) - { - // Extend an existing range - m_r2 += argLocDesc.m_cGenReg; - } - else - { - // Discontinuous range - store a routine for the current and start a new one - routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); - m_r1 = argLocDesc.m_idxGenReg; - m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; - } + // Discontinuous range - store a routine for the current and start a new one + routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + m_r1 = argLocDesc.m_idxGenReg; + m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; } } @@ -956,17 +943,12 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } - else if (argLocDesc.m_byteStackIndex == m_s2 + 1 -#ifdef TARGET_APPLE - && ((m_s2 - m_s1 + 1) >= sizeof(void*)) -#endif // TARGET_APPLE - ) + else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= 8)) { - // Extend an existing range, but only if the previous range was at least pointer size large. - // The only case when this is not true is on arm64 Apple OSes where types smaller than 8 bytes - // are passed on the stack in a packed manner. We process such arguments one by one to avoid - // explosion of the number of routines. The interpreter stack has pointer size alignment for - // all types of arguments. + // Extend an existing range, but only if the argument is at least pointer size large. + // The only case when this is not true is on Apple ARM64 OSes where primitive type smaller + // than 8 bytes are passed on the stack in a packed manner. We process such arguments one by + // one to avoid explosion of the number of routines. m_s2 += argLocDesc.m_byteStackSize; } else @@ -978,6 +960,41 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } + +#if defined(TARGET_APPLE) && defined(TARGET_ARM64) + // Process primitive types smaller than 8 bytes separately on Apple ARM64 + if (argLocDesc.m_byteStackSize < 8) + { + switch (argLocDesc.m_byteStackSize) + { + case 1: + routines[m_routineIndex++] = (PCODE)Load_Stack_1B; + break; + case 2: + routines[m_routineIndex++] = (PCODE)Load_Stack_2B; + break; + case 4: + routines[m_routineIndex++] = (PCODE)Load_Stack_4B; + break; + default: + _ASSERTE(!"Unexpected stack argument size"); + break; + } + routines[m_routineIndex++] = m_s1; + m_s1 = NO_RANGE; + } +#endif // TARGET_APPLE && TARGET_ARM64 + + // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. + // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, + // we always process single argument passed by reference using single routine. + if (argIt.IsArgPassedByRef()) + { + _ASSERTE(argLocDesc.m_cGenReg == 1); + routines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); + routines[m_routineIndex++] = argIt.GetArgSize(); + m_r1 = NO_RANGE; + } } } diff --git a/src/tests/JIT/interpreter/Interpreter.cs b/src/tests/JIT/interpreter/Interpreter.cs index 76fc3cf11b8962..dcd5c910dd7a98 100644 --- a/src/tests/JIT/interpreter/Interpreter.cs +++ b/src/tests/JIT/interpreter/Interpreter.cs @@ -75,6 +75,8 @@ public StructWithRefs(int val1, int val2) o1 = new MyObj(val1); o2 = new MyObj(val2); } +} + public struct TestStruct { public int a; From 8956ddfb93fe2fd1e38ed140a1741df607155de5 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 13 May 2025 17:00:32 +0200 Subject: [PATCH 05/12] Fix build break --- src/coreclr/vm/callstubgenerator.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index a4070d4d5ff043..a384d8ef786148 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -981,7 +981,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe break; } routines[m_routineIndex++] = m_s1; - m_s1 = NO_RANGE; + m_s1 = NoRange; } #endif // TARGET_APPLE && TARGET_ARM64 @@ -993,7 +993,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe _ASSERTE(argLocDesc.m_cGenReg == 1); routines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); routines[m_routineIndex++] = argIt.GetArgSize(); - m_r1 = NO_RANGE; + m_r1 = NoRange; } } } From d787c4ca25ae2a6908f8cbbf4dc05240aadf4d8f Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Tue, 13 May 2025 17:28:32 +0200 Subject: [PATCH 06/12] Fix Unix x64 build break --- src/coreclr/vm/callstubgenerator.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index a384d8ef786148..2ae2fc6fc8ebbf 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -985,6 +985,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe } #endif // TARGET_APPLE && TARGET_ARM64 +#ifndef UNIX_AMD64_ABI // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, // we always process single argument passed by reference using single routine. @@ -995,6 +996,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe routines[m_routineIndex++] = argIt.GetArgSize(); m_r1 = NoRange; } +#endif // UNIX_AMD64_ABI } } From 59360a696f62efc571b9fbdf585fed9e4fd9efd4 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 14 May 2025 00:00:07 +0200 Subject: [PATCH 07/12] Fix some contracts and a bug in args by ref introduced in a previous commit --- src/coreclr/vm/callstubgenerator.cpp | 34 ++++++++++++---------------- src/coreclr/vm/interpexec.cpp | 4 +--- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 2ae2fc6fc8ebbf..280d9e3e264f6d 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -543,15 +543,9 @@ extern "C" void CallJittedMethodRet4Float(PCODE *routines, int8_t*pArgs, int8_t* // The returned call stub header must be freed by the caller using FreeCallStub. CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) { - CONTRACTL - { - NOTHROW; - MODE_ANY; - GC_NOTRIGGER; - FORBID_FAULT; - PRECONDITION(CheckPointer(pMD)); - } - CONTRACTL_END + STANDARD_VM_CONTRACT; + + _ASSERTE(pMD != NULL); MetaSig sig(pMD); ArgIterator argIt(&sig); @@ -984,20 +978,20 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe m_s1 = NoRange; } #endif // TARGET_APPLE && TARGET_ARM64 + } #ifndef UNIX_AMD64_ABI - // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. - // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, - // we always process single argument passed by reference using single routine. - if (argIt.IsArgPassedByRef()) - { - _ASSERTE(argLocDesc.m_cGenReg == 1); - routines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); - routines[m_routineIndex++] = argIt.GetArgSize(); - m_r1 = NoRange; - } -#endif // UNIX_AMD64_ABI + // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. + // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, + // we always process single argument passed by reference using single routine. + if (argIt.IsArgPassedByRef()) + { + _ASSERTE(argLocDesc.m_cGenReg == 1); + routines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); + routines[m_routineIndex++] = argIt.GetArgSize(); + m_r1 = NoRange; } +#endif // UNIX_AMD64_ABI } // Free the call stub header generated by GenerateCallStub. diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 6fd3a237908b89..f8ece4471263e3 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -13,9 +13,7 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) CONTRACTL { THROWS; - MODE_COOPERATIVE; - GC_NOTRIGGER; - FORBID_FAULT; + MODE_ANY; PRECONDITION(CheckPointer(pMD)); PRECONDITION(CheckPointer(pArgs)); PRECONDITION(CheckPointer(pRet)); From ed393bc938735500c0192520259a55f0652c507f Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 14 May 2025 16:46:43 +0200 Subject: [PATCH 08/12] Move to allocations from LoaderHeap --- src/coreclr/vm/callstubgenerator.cpp | 122 +++++++++++++-------------- src/coreclr/vm/callstubgenerator.h | 19 ++++- 2 files changed, 75 insertions(+), 66 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 280d9e3e264f6d..447cd154d855ca 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -543,7 +543,12 @@ extern "C" void CallJittedMethodRet4Float(PCODE *routines, int8_t*pArgs, int8_t* // The returned call stub header must be freed by the caller using FreeCallStub. CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) { - STANDARD_VM_CONTRACT; + CONTRACTL + { + THROWS; + MODE_ANY; + } + CONTRACTL_END _ASSERTE(pMD != NULL); @@ -582,14 +587,7 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) // Allocate space for the routines. The size of the array is conservatively set to twice the number of arguments // plus one slot for the target pointer and reallocated to the real size at the end. - - m_pHeader = (CallStubHeader*)malloc(sizeof(CallStubHeader) + (2 * numArgs + 1) * sizeof(PCODE)); - if (m_pHeader == NULL) - { - return NULL; - } - - PCODE *routines = m_pHeader->Routines; + PCODE *pRoutines = (PCODE*)alloca(sizeof(CallStubHeader) + (numArgs * 2 + 1) * sizeof(PCODE)); int ofs; while ((ofs = argIt.GetNextOffset()) != TransitionBlock::InvalidOffset) @@ -639,13 +637,13 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) argLocDesc.m_byteStackIndex += 8; } } - ProcessArgument(argIt, argLocDescEightByte); + ProcessArgument(argIt, argLocDescEightByte, pRoutines); } } else #endif // UNIX_AMD64_ABI { - ProcessArgument(argIt, argLocDesc); + ProcessArgument(argIt, argLocDesc, pRoutines); } } @@ -653,24 +651,24 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) // Process such a range if any. if (m_r1 != NoRange) { - routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + pRoutines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); } else if (m_x1 != NoRange) { - routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + pRoutines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); } else if (m_s1 != NoRange) { m_totalStackSize += m_s2 - m_s1 + 1; - routines[m_routineIndex++] = (PCODE)Load_Stack; - routines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; + pRoutines[m_routineIndex++] = (PCODE)Load_Stack; + pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; } - m_totalStackSize = ALIGN_UP(m_totalStackSize, 16); // Align the stack to 16 bytes + CallStubHeader::InvokeFunctionPtr pInvokeFunction = NULL; if (argIt.HasRetBuffArg()) { - m_pHeader->Invoke = CallJittedMethodRetBuff; + pInvokeFunction = CallJittedMethodRetBuff; } else { @@ -700,14 +698,14 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) case ELEMENT_TYPE_ARRAY: case ELEMENT_TYPE_SZARRAY: case ELEMENT_TYPE_FNPTR: - m_pHeader->Invoke = CallJittedMethodRetI8; + pInvokeFunction = CallJittedMethodRetI8; break; case ELEMENT_TYPE_R4: case ELEMENT_TYPE_R8: - m_pHeader->Invoke = CallJittedMethodRetDouble; + pInvokeFunction = CallJittedMethodRetDouble; break; case ELEMENT_TYPE_VOID: - m_pHeader->Invoke = CallJittedMethodRetVoid; + pInvokeFunction = CallJittedMethodRetVoid; break; case ELEMENT_TYPE_VALUETYPE: #ifdef TARGET_AMD64 @@ -715,12 +713,12 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) if (thReturnValueType.AsMethodTable()->IsIntrinsicType()) { // E.g. Vector2 - m_pHeader->Invoke = CallJittedMethodRetDouble; + pInvokeFunction = CallJittedMethodRetDouble; } else { // POD structs smaller than 64 bits are returned in rax - m_pHeader->Invoke = CallJittedMethodRetI8; + pInvokeFunction = CallJittedMethodRetI8; } #else // TARGET_WINDOWS if (thReturnValueType.AsMethodTable()->IsRegPassedStruct()) @@ -728,11 +726,11 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) UINT fpReturnSize = argIt.GetFPReturnSize(); if (fpReturnSize == 0) { - m_pHeader->Invoke = CallJittedMethodRetI8; + pInvokeFunction = CallJittedMethodRetI8; } else if (fpReturnSize == 8) { - m_pHeader->Invoke = CallJittedMethodRetDouble; + pInvokeFunction = CallJittedMethodRetDouble; } else { @@ -743,16 +741,16 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) switch (fpReturnSize & 0x3) { case 0: - m_pHeader->Invoke = CallJittedMethodRetI8I8; + pInvokeFunction = CallJittedMethodRetI8I8; break; case 1: - m_pHeader->Invoke = CallJittedMethodRetDoubleI8; + pInvokeFunction = CallJittedMethodRetDoubleI8; break; case 2: - m_pHeader->Invoke = CallJittedMethodRetI8Double; + pInvokeFunction = CallJittedMethodRetI8Double; break; case 3: - m_pHeader->Invoke = CallJittedMethodRetDoubleDouble; + pInvokeFunction = CallJittedMethodRetDoubleDouble; break; } } @@ -772,16 +770,16 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) switch (thReturnValueType.GetSize()) { case 4: - m_pHeader->Invoke = CallJittedMethodRetFloat; + pInvokeFunction = CallJittedMethodRetFloat; break; case 8: - m_pHeader->Invoke = CallJittedMethodRet2Float; + pInvokeFunction = CallJittedMethodRet2Float; break; case 12: - m_pHeader->Invoke = CallJittedMethodRet3Float; + pInvokeFunction = CallJittedMethodRet3Float; break; case 16: - m_pHeader->Invoke = CallJittedMethodRet4Float; + pInvokeFunction = CallJittedMethodRet4Float; break; default: _ASSERTE(!"Should not get here"); @@ -792,16 +790,16 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) switch (thReturnValueType.GetSize()) { case 8: - m_pHeader->Invoke = CallJittedMethodRetDouble; + pInvokeFunction = CallJittedMethodRetDouble; break; case 16: - m_pHeader->Invoke = CallJittedMethodRet2Double; + pInvokeFunction = CallJittedMethodRet2Double; break; case 24: - m_pHeader->Invoke = CallJittedMethodRet3Double; + pInvokeFunction = CallJittedMethodRet3Double; break; case 32: - m_pHeader->Invoke = CallJittedMethodRet4Double; + pInvokeFunction = CallJittedMethodRet4Double; break; default: _ASSERTE(!"Should not get here"); @@ -821,10 +819,10 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) case 2: case 4: case 8: - m_pHeader->Invoke = CallJittedMethodRetI8; + pInvokeFunction = CallJittedMethodRetI8; break; case 16: - m_pHeader->Invoke = CallJittedMethodRet2I8; + pInvokeFunction = CallJittedMethodRet2I8; break; default: _ASSERTE(!"The return types that are not HFA should be <= 16 bytes in size"); @@ -841,37 +839,37 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) } } - m_pHeader->NumRoutines = m_routineIndex + 1; // Reserve one extra slot for the target method pointer - m_pHeader->TotalStackSize = m_totalStackSize; + m_routineIndex++; // Reserve one extra slot for the target method pointer - // resize the structure to its actually utilized size - m_pHeader = (CallStubHeader*)realloc(m_pHeader, sizeof(CallStubHeader) + m_pHeader->NumRoutines * sizeof(PCODE)); - // In case the reallocation failed, this function return NULL - return m_pHeader; + LoaderAllocator *pLoaderAllocator = pMD->GetLoaderAllocator(); + S_SIZE_T finalStubSize(sizeof(CallStubHeader) + m_routineIndex * sizeof(PCODE)); + void *pHeaderStorage = pLoaderAllocator->GetHighFrequencyHeap()->AllocMem(finalStubSize); + + CallStubHeader *pHeader = new (pHeaderStorage) CallStubHeader(m_routineIndex, pRoutines, ALIGN_UP(m_totalStackSize, STACK_ALIGN_SIZE), pInvokeFunction); + + return pHeader; } // Process the argument described by argLocDesc. This function is called for each argument in the method signature. // It updates the ranges of registers and emits entries into the routines array at discontinuities. -void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc) +void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc, PCODE *pRoutines) { LIMITED_METHOD_CONTRACT; - PCODE *routines = m_pHeader->Routines; - // Check if we have a range of registers or stack arguments that we need to store because the current argument // terminates it. if ((argLocDesc.m_cGenReg == 0) && (m_r1 != NoRange)) { // No GP register is used to pass the current argument, but we already have a range of GP registers, // store the routine for the range - routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + pRoutines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); m_r1 = NoRange; } else if (((argLocDesc.m_cFloatReg == 0)) && (m_x1 != NoRange)) { // No floating point register is used to pass the current argument, but we already have a range of FP registers, // store the routine for the range - routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + pRoutines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); m_x1 = NoRange; } else if ((argLocDesc.m_byteStackSize == 0) && (m_s1 != NoRange)) @@ -879,8 +877,8 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe // No stack argument is used to pass the current argument, but we already have a range of stack arguments, // store the routine for the range m_totalStackSize += m_s2 - m_s1 + 1; - routines[m_routineIndex++] = (PCODE)Load_Stack; - routines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; + pRoutines[m_routineIndex++] = (PCODE)Load_Stack; + pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; m_s1 = NoRange; } @@ -901,7 +899,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe else { // Discontinuous range - store a routine for the current and start a new one - routines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); + pRoutines[m_routineIndex++] = GetGPRegRangeLoadRoutine(m_r1, m_r2); m_r1 = argLocDesc.m_idxGenReg; m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; } @@ -923,7 +921,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe else { // Discontinuous range - store a routine for the current and start a new one - routines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); + pRoutines[m_routineIndex++] = GetFPRegRangeLoadRoutine(m_x1, m_x2); m_x1 = argLocDesc.m_idxFloatReg; m_x2 = m_x1 + argLocDesc.m_cFloatReg - 1; } @@ -942,15 +940,15 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe // Extend an existing range, but only if the argument is at least pointer size large. // The only case when this is not true is on Apple ARM64 OSes where primitive type smaller // than 8 bytes are passed on the stack in a packed manner. We process such arguments one by - // one to avoid explosion of the number of routines. + // one to avoid explosion of the number of pRoutines. m_s2 += argLocDesc.m_byteStackSize; } else { // Discontinuous range - store a routine for the current and start a new one m_totalStackSize += m_s2 - m_s1 + 1; - routines[m_routineIndex++] = (PCODE)Load_Stack; - routines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; + pRoutines[m_routineIndex++] = (PCODE)Load_Stack; + pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } @@ -962,19 +960,19 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe switch (argLocDesc.m_byteStackSize) { case 1: - routines[m_routineIndex++] = (PCODE)Load_Stack_1B; + pRoutines[m_routineIndex++] = (PCODE)Load_Stack_1B; break; case 2: - routines[m_routineIndex++] = (PCODE)Load_Stack_2B; + pRoutines[m_routineIndex++] = (PCODE)Load_Stack_2B; break; case 4: - routines[m_routineIndex++] = (PCODE)Load_Stack_4B; + pRoutines[m_routineIndex++] = (PCODE)Load_Stack_4B; break; default: _ASSERTE(!"Unexpected stack argument size"); break; } - routines[m_routineIndex++] = m_s1; + pRoutines[m_routineIndex++] = m_s1; m_s1 = NoRange; } #endif // TARGET_APPLE && TARGET_ARM64 @@ -987,8 +985,8 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe if (argIt.IsArgPassedByRef()) { _ASSERTE(argLocDesc.m_cGenReg == 1); - routines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); - routines[m_routineIndex++] = argIt.GetArgSize(); + pRoutines[m_routineIndex++] = GetGPRegRefLoadRoutine(argLocDesc.m_idxGenReg); + pRoutines[m_routineIndex++] = argIt.GetArgSize(); m_r1 = NoRange; } #endif // UNIX_AMD64_ABI diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index 0549cbbba5acbd..e2d8bf8ea0c3a8 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -12,16 +12,29 @@ class MethodDesc; // stack, invokes the target method, and translates the return value back to the interpreter stack. struct CallStubHeader { + typedef void (*InvokeFunctionPtr)(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); + // Number of routines in the Routines array. The last one is the target method to call. int NumRoutines; // Total stack size used for the arguments. int TotalStackSize; // This is a pointer to a helper function that invokes the target method. There are several // versions of this function, depending on the return type of the target method. - void (*Invoke)(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); + InvokeFunctionPtr Invoke; // This is an array of routines that translate the arguments from the interpreter stack to the CPU registers and native stack. PCODE Routines[0]; + CallStubHeader(int numRoutines, PCODE *pRoutines, int totalStackSize, InvokeFunctionPtr pInvokeFunction) + { + LIMITED_METHOD_CONTRACT; + + NumRoutines = numRoutines; + TotalStackSize = totalStackSize; + Invoke = pInvokeFunction; + + memcpy(Routines, pRoutines, NumRoutines * sizeof(PCODE)); + } + // Set the address of the target method to call. void SetTarget(PCODE target) { @@ -52,11 +65,9 @@ class CallStubGenerator int m_routineIndex; // The total stack size used for the arguments. int m_totalStackSize; - // The header of the call stub that is being generated. - CallStubHeader *m_pHeader; // Process the argument described by argLocDesc. This function is called for each argument in the method signature. - void ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc); + void ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc, PCODE *pRoutines); public: // Generate the call stub for the given method. // The returned call stub header must be freed by the caller using FreeCallStub. From d2321d3d8ba36f82fee48bba49a91835ee7ed703 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 14 May 2025 22:33:20 +0200 Subject: [PATCH 09/12] PR feedback --- src/coreclr/vm/callstubgenerator.cpp | 27 +++------------------------ src/coreclr/vm/callstubgenerator.h | 7 ++----- src/coreclr/vm/interpexec.cpp | 26 +++++++++++++++++++------- 3 files changed, 24 insertions(+), 36 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 447cd154d855ca..2394c5bacea3f1 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -541,14 +541,9 @@ extern "C" void CallJittedMethodRet4Float(PCODE *routines, int8_t*pArgs, int8_t* // Generate the call stub for the given method. // The returned call stub header must be freed by the caller using FreeCallStub. -CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) +CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD, AllocMemTracker *pamTracker) { - CONTRACTL - { - THROWS; - MODE_ANY; - } - CONTRACTL_END + STANDARD_VM_CONTRACT; _ASSERTE(pMD != NULL); @@ -843,7 +838,7 @@ CallStubHeader *CallStubGenerator::GenerateCallStub(MethodDesc *pMD) LoaderAllocator *pLoaderAllocator = pMD->GetLoaderAllocator(); S_SIZE_T finalStubSize(sizeof(CallStubHeader) + m_routineIndex * sizeof(PCODE)); - void *pHeaderStorage = pLoaderAllocator->GetHighFrequencyHeap()->AllocMem(finalStubSize); + void *pHeaderStorage = pamTracker->Track(pLoaderAllocator->GetHighFrequencyHeap()->AllocMem(finalStubSize)); CallStubHeader *pHeader = new (pHeaderStorage) CallStubHeader(m_routineIndex, pRoutines, ALIGN_UP(m_totalStackSize, STACK_ALIGN_SIZE), pInvokeFunction); @@ -992,20 +987,4 @@ void CallStubGenerator::ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDe #endif // UNIX_AMD64_ABI } -// Free the call stub header generated by GenerateCallStub. -void CallStubGenerator::FreeCallStub(CallStubHeader *pHeader) -{ - CONTRACTL - { - NOTHROW; - MODE_ANY; - GC_NOTRIGGER; - FORBID_FAULT; - PRECONDITION(CheckPointer(pHeader)); - } - CONTRACTL_END - - free(pHeader); -} - #endif // FEATURE_INTERPRETER \ No newline at end of file diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index e2d8bf8ea0c3a8..476bb2f4c9da91 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -7,6 +7,7 @@ #include "callingconvention.h" class MethodDesc; +class AllocMemTracker; // This is a header for a call stub that translates arguments from the interpreter stack to the CPU registers and native // stack, invokes the target method, and translates the return value back to the interpreter stack. @@ -70,11 +71,7 @@ class CallStubGenerator void ProcessArgument(ArgIterator& argIt, ArgLocDesc& argLocDesc, PCODE *pRoutines); public: // Generate the call stub for the given method. - // The returned call stub header must be freed by the caller using FreeCallStub. - // The return value is NULL in case of an OOM. - CallStubHeader *GenerateCallStub(MethodDesc *pMD); - // Free the call stub header generated by GenerateCallStub. - static void FreeCallStub(CallStubHeader *pHeader); + CallStubHeader *GenerateCallStub(MethodDesc *pMD, AllocMemTracker *pamTracker); }; #endif // CALLSTUBGENERATOR_H diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index f8ece4471263e3..547977defc96ab 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -24,7 +24,12 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) CallStubHeader *pHeader = pMD->GetCallStub(); if (pHeader == NULL) { - pHeader = callStubGenerator.GenerateCallStub(pMD); + AllocMemTracker amTracker; + { + GCX_PREEMP(); + pHeader = callStubGenerator.GenerateCallStub(pMD, &amTracker); + } + if (pHeader == NULL) { // allocating the header has failed due to OOM @@ -32,13 +37,20 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) } HRESULT hr = pMD->SetCallStub(pHeader); - if (hr == S_FALSE) + switch (hr) { - // We have lost the race for generating the header, so we need to free the one we generated - // and use the one that was generated by another thread. - CallStubGenerator::FreeCallStub(pHeader); - pHeader = pMD->GetCallStub(); - } + case S_OK: + amTracker.SuppressRelease(); + break; + case S_FALSE: + // We have lost the race for generating the header, use the one that was generated by another thread + // and let the amTracker release the memory of the one we generated. + pHeader = pMD->GetCallStub(); + break; + default: + ThrowHR(hr); + break; + } } pHeader->SetTarget(pMD->GetNativeCode()); // The method to call From 5cb2eba95dfd185a329857957387972ef9253e1c Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Wed, 14 May 2025 23:55:04 +0200 Subject: [PATCH 10/12] PR feedback 2 --- src/coreclr/vm/interpexec.cpp | 25 +++++++------------------ src/coreclr/vm/method.cpp | 16 +++++++--------- src/coreclr/vm/method.hpp | 2 +- 3 files changed, 15 insertions(+), 28 deletions(-) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 547977defc96ab..26199934a1824f 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -30,27 +30,16 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) pHeader = callStubGenerator.GenerateCallStub(pMD, &amTracker); } - if (pHeader == NULL) + if (pMD->SetCallStub(pHeader)) { - // allocating the header has failed due to OOM - COMPlusThrowOM(); + amTracker.SuppressRelease(); } - - HRESULT hr = pMD->SetCallStub(pHeader); - switch (hr) + else { - case S_OK: - amTracker.SuppressRelease(); - break; - case S_FALSE: - // We have lost the race for generating the header, use the one that was generated by another thread - // and let the amTracker release the memory of the one we generated. - pHeader = pMD->GetCallStub(); - break; - default: - ThrowHR(hr); - break; - } + // We have lost the race for generating the header, use the one that was generated by another thread + // and let the amTracker release the memory of the one we generated. + pHeader = pMD->GetCallStub(); + } } pHeader->SetTarget(pMD->GetNativeCode()); // The method to call diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 1383ddc1486837..9a721f18c7ff6a 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -254,23 +254,21 @@ HRESULT MethodDesc::SetMethodDescVersionState(PTR_MethodDescVersioningState stat } #ifdef FEATURE_INTERPRETER -HRESULT MethodDesc::SetCallStub(CallStubHeader *pHeader) +// Set the call stub for the interpreter to JIT/AOT calls +// Returns true if the current call set the stub, false if it was already set +bool MethodDesc::SetCallStub(CallStubHeader *pHeader) { - WRAPPER_NO_CONTRACT; + LIMITED_METHOD_CONTRACT; - HRESULT hr; - IfFailRet(EnsureCodeDataExists(NULL)); + IfFailThrow(EnsureCodeDataExists(NULL)); _ASSERTE(m_codeData != NULL); - if (InterlockedCompareExchangeT(&m_codeData->CallStub, pHeader, NULL) != NULL) - return S_FALSE; - - return S_OK; + return InterlockedCompareExchangeT(&m_codeData->CallStub, pHeader, NULL) == NULL; } CallStubHeader *MethodDesc::GetCallStub() { - WRAPPER_NO_CONTRACT; + LIMITED_METHOD_CONTRACT; PTR_MethodDescCodeData codeData = VolatileLoadWithoutBarrier(&m_codeData); if (codeData == NULL) diff --git a/src/coreclr/vm/method.hpp b/src/coreclr/vm/method.hpp index 8f9477164b9761..5bf221d2498cdd 100644 --- a/src/coreclr/vm/method.hpp +++ b/src/coreclr/vm/method.hpp @@ -1821,7 +1821,7 @@ class MethodDesc HRESULT SetMethodDescVersionState(PTR_MethodDescVersioningState state); #ifdef FEATURE_INTERPRETER - HRESULT SetCallStub(CallStubHeader *pHeader); + bool SetCallStub(CallStubHeader *pHeader); CallStubHeader *GetCallStub(); #endif // FEATURE_INTERPRETER From d2fe6e7cbe6340e9418fc5d2e071f19f903bf366 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Thu, 15 May 2025 00:11:45 +0200 Subject: [PATCH 11/12] PR feedback 3 --- src/coreclr/vm/interpexec.cpp | 7 +++---- src/coreclr/vm/method.cpp | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 26199934a1824f..79ce13d2ce0cbe 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -24,11 +24,10 @@ void InvokeCompiledMethod(MethodDesc *pMD, int8_t *pArgs, int8_t *pRet) CallStubHeader *pHeader = pMD->GetCallStub(); if (pHeader == NULL) { + GCX_PREEMP(); + AllocMemTracker amTracker; - { - GCX_PREEMP(); - pHeader = callStubGenerator.GenerateCallStub(pMD, &amTracker); - } + pHeader = callStubGenerator.GenerateCallStub(pMD, &amTracker); if (pMD->SetCallStub(pHeader)) { diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 9a721f18c7ff6a..07023802238c80 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -258,7 +258,7 @@ HRESULT MethodDesc::SetMethodDescVersionState(PTR_MethodDescVersioningState stat // Returns true if the current call set the stub, false if it was already set bool MethodDesc::SetCallStub(CallStubHeader *pHeader) { - LIMITED_METHOD_CONTRACT; + STANDARD_VM_CONTRACT; IfFailThrow(EnsureCodeDataExists(NULL)); From 552ca6ff8c6581ab9b1b9b39d8f40a602f37e3f6 Mon Sep 17 00:00:00 2001 From: Jan Vorlicek Date: Thu, 15 May 2025 00:26:05 +0200 Subject: [PATCH 12/12] Wrap all the asm helpers in #ifdef FEATURE_INTERPRETER --- src/coreclr/vm/amd64/AsmHelpers.asm | 36 ++++++++++++++------------ src/coreclr/vm/amd64/asmhelpers.S | 40 +++++++++++++++-------------- src/coreclr/vm/arm64/asmhelpers.S | 23 +++++++++-------- src/coreclr/vm/arm64/asmhelpers.asm | 24 +++++++++-------- 4 files changed, 65 insertions(+), 58 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 7aecce31694688..d621ce65cb97a7 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -482,23 +482,6 @@ JIT_PollGCRarePath: TAILJMP_RAX LEAF_END JIT_PollGC, _TEXT -ifdef FEATURE_INTERPRETER -NESTED_ENTRY InterpreterStub, _TEXT - - PROLOG_WITH_TRANSITION_BLOCK - - ; - ; call ExecuteInterpretedMethod - ; - lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock* - mov rdx, METHODDESC_REGISTER - call ExecuteInterpretedMethod - - EPILOG_WITH_TRANSITION_BLOCK_RETURN - -NESTED_END InterpreterStub, _TEXT -endif ; FEATURE_INTERPRETER - ; rcx -This pointer ; rdx -ReturnBuffer LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT @@ -629,6 +612,23 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ret NESTED_END CallEHFilterFunclet, _TEXT +ifdef FEATURE_INTERPRETER + +NESTED_ENTRY InterpreterStub, _TEXT + + PROLOG_WITH_TRANSITION_BLOCK + + ; + ; call ExecuteInterpretedMethod + ; + lea rcx, [rsp + __PWTB_TransitionBlock] ; pTransitionBlock* + mov rdx, METHODDESC_REGISTER + call ExecuteInterpretedMethod + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END InterpreterStub, _TEXT + ; Copy arguments from the interpreter stack to the processor stack. ; The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack, _TEXT @@ -911,4 +911,6 @@ END_PROLOGUE ret NESTED_END CallJittedMethodRetI8, _TEXT +endif ; FEATURE_INTERPRETER + end diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 57071d23a62fd6..24d303da7622da 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -334,25 +334,6 @@ LOCAL_LABEL(JIT_PollGCRarePath): jmp rax LEAF_END JIT_PollGC, _TEXT -#ifdef FEATURE_INTERPRETER -NESTED_ENTRY InterpreterStub, _TEXT, NoHandler - - PROLOG_WITH_TRANSITION_BLOCK 8, 0, 0, 0, 0 - mov [rsp], rax // Return buffer in Swift calling convention - - # - # call ExecuteInterpretedMethod - # - lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock* - mov rsi, METHODDESC_REGISTER - call C_FUNC(ExecuteInterpretedMethod) - - mov rax, [rsp] - EPILOG_WITH_TRANSITION_BLOCK_RETURN - -NESTED_END InterpreterStub, _TEXT -#endif // FEATURE_INTERPRETER - //rdi -This pointer //rsi -ReturnBuffer LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT @@ -453,6 +434,25 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler ret NESTED_END CallEHFilterFunclet, _TEXT +#ifdef FEATURE_INTERPRETER + +NESTED_ENTRY InterpreterStub, _TEXT, NoHandler + + PROLOG_WITH_TRANSITION_BLOCK 8, 0, 0, 0, 0 + mov [rsp], rax // Return buffer in Swift calling convention + + # + # call ExecuteInterpretedMethod + # + lea rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock* + mov rsi, METHODDESC_REGISTER + call C_FUNC(ExecuteInterpretedMethod) + + mov rax, [rsp] + EPILOG_WITH_TRANSITION_BLOCK_RETURN + +NESTED_END InterpreterStub, _TEXT + // Copy arguments from the interpreter stack to the processor stack. // The CPU stack slots are aligned to pointer size. LEAF_ENTRY Load_Stack, _TEXT @@ -1153,3 +1153,5 @@ END_PROLOGUE pop rbp ret NESTED_END CallJittedMethodRetDoubleDouble, _TEXT + +#endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 2d6736d4d94a62..f5df2e4520487c 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -845,6 +845,16 @@ LOCAL_LABEL(JIT_PollGCRarePath): br x9 LEAF_END JIT_PollGC, _TEXT +//x0 -This pointer +//x1 -ReturnBuffer +LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT + ldr x12, [METHODDESC_REGISTER, #ThisPtrRetBufPrecodeData__Target] + mov x11, x0 // Move first arg pointer to temp register + mov x0, x1 // Move ret buf arg pointer from location in ABI for return buffer for instance method to location in ABI for return buffer for static method + mov x1, x11 // Move temp register to first arg register for static method with return buffer + EPILOG_BRANCH_REG x12 +LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT + #ifdef FEATURE_INTERPRETER NESTED_ENTRY InterpreterStub, _TEXT, NoHandler @@ -858,17 +868,6 @@ NESTED_ENTRY InterpreterStub, _TEXT, NoHandler EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END InterpreterStub, _TEXT -#endif // FEATURE_INTERPRETER - -//x0 -This pointer -//x1 -ReturnBuffer -LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT - ldr x12, [METHODDESC_REGISTER, #ThisPtrRetBufPrecodeData__Target] - mov x11, x0 // Move first arg pointer to temp register - mov x0, x1 // Move ret buf arg pointer from location in ABI for return buffer for instance method to location in ABI for return buffer for static method - mov x1, x11 // Move temp register to first arg register for static method with return buffer - EPILOG_BRANCH_REG x12 -LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT // Copy arguments from the interpreter stack to the processor stack // The CPU stack slots are aligned to pointer size. @@ -1463,3 +1462,5 @@ NESTED_ENTRY CallJittedMethodRet4Float, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 EPILOG_RETURN NESTED_END CallJittedMethodRet4Float, _TEXT + +#endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 743395626bb01c..e6483dd1a8ff02 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -1228,7 +1228,18 @@ JIT_PollGCRarePath br x9 LEAF_END +;x0 -This pointer +;x1 -ReturnBuffer + LEAF_ENTRY ThisPtrRetBufPrecodeWorker + ldr x12, [METHODDESC_REGISTER, #ThisPtrRetBufPrecodeData__Target] + mov x11, x0 ; Move first arg pointer to temp register + mov x0, x1 ; Move ret buf arg pointer from location in ABI for return buffer for instance method to location in ABI for return buffer for static method + mov x1, x11 ; Move temp register to first arg register for static method with return buffer + EPILOG_BRANCH_REG x12 + LEAF_END + #ifdef FEATURE_INTERPRETER + NESTED_ENTRY InterpreterStub PROLOG_WITH_TRANSITION_BLOCK @@ -1241,17 +1252,6 @@ JIT_PollGCRarePath EPILOG_WITH_TRANSITION_BLOCK_RETURN NESTED_END -#endif // FEATURE_INTERPRETER - -;x0 -This pointer -;x1 -ReturnBuffer - LEAF_ENTRY ThisPtrRetBufPrecodeWorker - ldr x12, [METHODDESC_REGISTER, #ThisPtrRetBufPrecodeData__Target] - mov x11, x0 ; Move first arg pointer to temp register - mov x0, x1 ; Move ret buf arg pointer from location in ABI for return buffer for instance method to location in ABI for return buffer for static method - mov x1, x11 ; Move temp register to first arg register for static method with return buffer - EPILOG_BRANCH_REG x12 - LEAF_END ; Copy arguments from the interpreter stack to the processor stack ; The CPU stack slots are aligned to pointer size. @@ -1808,5 +1808,7 @@ CopyLoop EPILOG_RETURN NESTED_END CallJittedMethodRet4Float +#endif // FEATURE_INTERPRETER + ; Must be at very end of file END