From e69f16fdaa79e0bf03dd0e304b678d638ce63967 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 14:51:04 +0200 Subject: [PATCH 01/52] Update VM and NativeAOT to use PSPSym-less convention for calling funclets --- .../Runtime/amd64/ExceptionHandling.S | 9 +- .../Runtime/amd64/ExceptionHandling.asm | 9 +- src/coreclr/vm/amd64/AsmHelpers.asm | 118 ++++++++++++++++ src/coreclr/vm/amd64/asmhelpers.S | 89 ++++++++++++ src/coreclr/vm/arm/ehhelpers.S | 4 +- src/coreclr/vm/arm64/asmhelpers.S | 4 +- src/coreclr/vm/arm64/asmhelpers.asm | 4 +- src/coreclr/vm/eetwain.cpp | 127 +++--------------- src/coreclr/vm/exceptionhandling.cpp | 3 +- src/coreclr/vm/exceptionhandling.h | 4 - src/coreclr/vm/loongarch64/asmhelpers.S | 4 +- src/coreclr/vm/riscv64/asmhelpers.S | 4 +- 12 files changed, 244 insertions(+), 135 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S index c5ce852e46fd0e..6d6a3337dcf50f 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S @@ -330,8 +330,7 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler mov [rax], rcx #endif - mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame - mov rsi, [rsp + locArg0] // rsi <- exception object + mov rdi, [rsp + locArg0] // rdi <- exception object call qword ptr [rsp + locArg1] // call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -470,7 +469,6 @@ NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler mov [rax], rcx #endif - mov rdi, [rsi + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame call qword ptr [rsp + locArg0] // handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 @@ -518,10 +516,7 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - mov rax, rsi // rax <- handler funclet address - mov rsi, rdi // rsi <- exception object - mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame - call rax + call rsi ALTERNATE_ENTRY RhpCallFilterFunclet2 diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm index 741b916f00b904..4d225ba46054d0 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -434,8 +434,7 @@ endif movdqa xmm14,[r8 + OFFSETOF__REGDISPLAY__Xmm + 8*10h] movdqa xmm15,[r8 + OFFSETOF__REGDISPLAY__Xmm + 9*10h] - mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame - mov rdx, [rsp + rsp_offsetof_arguments + 0h] ;; rdx <- exception object + mov rcx, [rsp + rsp_offsetof_arguments + 0h] ;; rcx <- exception object call qword ptr [rsp + rsp_offsetof_arguments + 8h] ;; call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -639,7 +638,6 @@ if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the deb mov [rax], r9 endif - mov rcx, [rdx + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame call qword ptr [rsp + rsp_offsetof_arguments + 0h] ;; handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 @@ -702,10 +700,7 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - mov rax, rdx ;; rax <- handler funclet address - mov rdx, rcx ;; rdx <- exception object - mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame - call rax + call rdx ALTERNATE_ENTRY RhpCallFilterFunclet2 diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index b0d1b7b4093312..01dd64221ea0bf 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -507,4 +507,122 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT jmp METHODDESC_REGISTER LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT +;; +;; Prologue of all funclet calling helpers (CallXXXXFunclet) +;; +FUNCLET_CALL_PROLOGUE macro localsCount, alignStack + PUSH_CALLEE_SAVED_REGISTERS + + arguments_scratch_area_size = 20h + xmm_save_area_size = 10 * 10h ;; xmm6..xmm15 save area + stack_alloc_size = arguments_scratch_area_size + localsCount * 8 + alignStack * 8 + xmm_save_area_size + rsp_offsetof_arguments = stack_alloc_size + 8*8h + 8h + rsp_offsetof_locals = arguments_scratch_area_size + xmm_save_area_size + + alloc_stack stack_alloc_size + + save_xmm128_postrsp xmm6, (arguments_scratch_area_size + 0 * 10h) + save_xmm128_postrsp xmm7, (arguments_scratch_area_size + 1 * 10h) + save_xmm128_postrsp xmm8, (arguments_scratch_area_size + 2 * 10h) + save_xmm128_postrsp xmm9, (arguments_scratch_area_size + 3 * 10h) + save_xmm128_postrsp xmm10, (arguments_scratch_area_size + 4 * 10h) + save_xmm128_postrsp xmm11, (arguments_scratch_area_size + 5 * 10h) + save_xmm128_postrsp xmm12, (arguments_scratch_area_size + 6 * 10h) + save_xmm128_postrsp xmm13, (arguments_scratch_area_size + 7 * 10h) + save_xmm128_postrsp xmm14, (arguments_scratch_area_size + 8 * 10h) + save_xmm128_postrsp xmm15, (arguments_scratch_area_size + 9 * 10h) + + END_PROLOGUE +endm + +;; +;; Epilogue of all funclet calling helpers (CallXXXXFunclet) +;; +FUNCLET_CALL_EPILOGUE macro + movdqa xmm6, [rsp + arguments_scratch_area_size + 0 * 10h] + movdqa xmm7, [rsp + arguments_scratch_area_size + 1 * 10h] + movdqa xmm8, [rsp + arguments_scratch_area_size + 2 * 10h] + movdqa xmm9, [rsp + arguments_scratch_area_size + 3 * 10h] + movdqa xmm10, [rsp + arguments_scratch_area_size + 4 * 10h] + movdqa xmm11, [rsp + arguments_scratch_area_size + 5 * 10h] + movdqa xmm12, [rsp + arguments_scratch_area_size + 6 * 10h] + movdqa xmm13, [rsp + arguments_scratch_area_size + 7 * 10h] + movdqa xmm14, [rsp + arguments_scratch_area_size + 8 * 10h] + movdqa xmm15, [rsp + arguments_scratch_area_size + 9 * 10h] + + add rsp, stack_alloc_size + + POP_CALLEE_SAVED_REGISTERS +endm + +; This helper enables us to call into a funclet after restoring Fp register +NESTED_ENTRY CallEHFunclet, _TEXT + ; On entry: + ; + ; RCX = throwable + ; RDX = PC to invoke + ; R8 = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame + ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; + + FUNCLET_CALL_PROLOGUE 0, 1 + + ; Restore RBX, RBP, RSI, RDI, R12, R13, R14, R15 from CONTEXT + mov rbx, [r8 + 0] + mov rbp, [r8 + 16] + mov rsi, [r8 + 24] + mov rdi, [r8 + 32] + mov r12, [r8 + 72] + mov r13, [r8 + 80] + mov r14, [r8 + 88] + mov r15, [r8 + 96] + + ; Restore XMM registers from CONTEXT + movdqa xmm6, [r8 + 272 + 0*10h] + movdqa xmm7, [r8 + 272 + 1*10h] + movdqa xmm8, [r8 + 272 + 2*10h] + movdqa xmm9, [r8 + 272 + 3*10h] + movdqa xmm10, [r8 + 272 + 4*10h] + movdqa xmm11, [r8 + 272 + 5*10h] + movdqa xmm12, [r8 + 272 + 6*10h] + movdqa xmm13, [r8 + 272 + 7*10h] + movdqa xmm14, [r8 + 272 + 8*10h] + movdqa xmm15, [r8 + 272 + 9*10h] + + ; Save the SP of this function. + mov [r9], sp + + ; Invoke the funclet + call rdx + + FUNCLET_CALL_EPILOGUE + + ret +NESTED_END CallEHFunclet, _TEXT + +; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the +; frame pointer for accessing the locals in the parent method. +NESTED_ENTRY CallEHFilterFunclet, _TEXT + ; On entry: + ; + ; RCX = throwable + ; RDX = RBP of main function + ; R8 = PC to invoke + ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; + + FUNCLET_CALL_PROLOGUE 0, 1 + + ; Save the SP of this function + mov [r9], sp + + ; Invoke the filter funclet + mov rbp, rdx + call r8 + + FUNCLET_CALL_EPILOGUE + + ret +NESTED_END CallEHFilterFunclet, _TEXT + end \ No newline at end of file diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 73646ad2aa2a02..4c8cc7c8e6866b 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -362,3 +362,92 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT mov rdi, r11 jmp METHODDESC_REGISTER LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT + + +// +// Prologue of all funclet calling helpers (CallXXXXFunclet) +// +.macro FUNCLET_CALL_PROLOGUE localsCount, alignStack + push_nonvol_reg r15 // save preserved regs for OS stackwalker + push_nonvol_reg r14 // ... + push_nonvol_reg r13 // ... + push_nonvol_reg r12 // ... + push_nonvol_reg rbx // ... + push_nonvol_reg rbp // ... + + stack_alloc_size = \localsCount * 8 + \alignStack * 8 + + alloc_stack stack_alloc_size + + END_PROLOGUE +.endm + +// +// Epilogue of all funclet calling helpers (CallXXXXFunclet) +// +.macro FUNCLET_CALL_EPILOGUE + free_stack stack_alloc_size + + pop_nonvol_reg rbp + pop_nonvol_reg rbx + pop_nonvol_reg r12 + pop_nonvol_reg r13 + pop_nonvol_reg r14 + pop_nonvol_reg r15 +.endm + +// This helper enables us to call into a funclet after restoring Fp register +NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler + // On entry: + // + // RDI = throwable + // RSI = PC to invoke + // RDX = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame + // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + + FUNCLET_CALL_PROLOGUE 0, 1 + + // Restore RBX, RBP, R12, R13, R14, R15 from CONTEXT + mov rbx, [rdx + 0] + mov rbp, [rdx + 16] + mov r12, [rdx + 72] + mov r13, [rdx + 80] + mov r14, [rdx + 88] + mov r15, [rdx + 96] + + // Save the SP of this function. + mov [r9], sp + + // Invoke the funclet + call rsi + + FUNCLET_CALL_EPILOGUE + + ret +NESTED_END CallEHFunclet, _TEXT + +// This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the +// frame pointer for accessing the locals in the parent method. +NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler + // On entry: + // + // RDI = throwable + // RSI = RBP of main function + // RDX = PC to invoke + // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // + + FUNCLET_CALL_PROLOGUE 0, 1 + + // Save the SP of this function + mov [rcx], rsp + + // Invoke the filter funclet + mov rbp, rsi + call rdx + + FUNCLET_CALL_EPILOGUE + + ret +NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/arm/ehhelpers.S b/src/coreclr/vm/arm/ehhelpers.S index eeb6e3894a575e..006ade52bbda70 100644 --- a/src/coreclr/vm/arm/ehhelpers.S +++ b/src/coreclr/vm/arm/ehhelpers.S @@ -132,12 +132,14 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // On entry: // // R0 = throwable - // R1 = SP of the caller of the method/funclet containing the filter + // R1 = FP of main method // R2 = PC to invoke // R3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function str sp, [r3] + // Restore frame pointer + mov r11, r1 // Invoke the filter funclet blx r2 diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 5fac7fae7aa838..97ef790aa8df25 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -412,12 +412,14 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // On entry: // // X0 = throwable - // X1 = SP of the caller of the method/funclet containing the filter + // X1 = FP of main function // X2 = PC to invoke // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function str fp, [x3] + // Restore frame pointer + mov fp, x1 // Invoke the filter funclet blr x2 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 3dd6edcb34ab2b..80d6e94c74bd4e 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -751,12 +751,14 @@ COMToCLRDispatchHelper_RegSetup ; On entry: ; ; X0 = throwable - ; X1 = SP of the caller of the method/funclet containing the filter + ; X1 = FP of the main function ; X2 = PC to invoke ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. ; ; Save the SP of this function str fp, [x3] + ; Restore frame pointer + mov fp, x1 ; Invoke the filter funclet blr x2 diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index c373b984922d3b..243906f65d45c7 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -247,7 +247,6 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, // ======================================= // <--- RSP == RBP (invariant: localalloc disallowed before remap) // Arguments for next call (if there is one) - // PSPSym (optional) // JIT temporaries (if any) // Security object (if any) // Local variables (if any) @@ -273,7 +272,6 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, // So we start at RSP, and zero out: // GetFixedStackSize() - GetSizeOfEditAndContinuePreservedArea() bytes. // - // We'll need to restore PSPSym; location gotten from GCInfo. // We'll need to copy security object; location gotten from GCInfo. // // On ARM64 the JIT generates a slightly different frame and we do not have @@ -283,7 +281,6 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, // Arguments for next call (if there is one) <- SP // JIT temporaries // Locals - // PSPSym // --------------------------------------- ^ zeroed area // MonitorAcquired (for synchronized methods) // Saved FP <- FP @@ -300,14 +297,14 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, // GCInfo for old method GcInfoDecoder oldGcDecoder( pOldCodeInfo->GetGCInfoToken(), - GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_PSP_SYM | DECODE_EDIT_AND_CONTINUE), + GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_EDIT_AND_CONTINUE), 0 // Instruction offset (not needed) ); // GCInfo for new method GcInfoDecoder newGcDecoder( pNewCodeInfo->GetGCInfoToken(), - GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_PSP_SYM | DECODE_EDIT_AND_CONTINUE), + GcInfoDecoderFlags(DECODE_SECURITY_OBJECT | DECODE_EDIT_AND_CONTINUE), 0 // Instruction offset (not needed) ); @@ -374,24 +371,6 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, return E_FAIL; } - TADDR callerSP = oldStackBase + oldFixedStackSize; - -#ifdef _DEBUG - // If the old method has a PSPSym, then its value should == initial-SP (i.e. - // oldStackBase) for x64 and callerSP for arm64 - INT32 nOldPspSymStackSlot = oldGcDecoder.GetPSPSymStackSlot(); - if (nOldPspSymStackSlot != NO_PSP_SYM) - { -#if defined(TARGET_AMD64) - TADDR oldPSP = *PTR_TADDR(oldStackBase + nOldPspSymStackSlot); - _ASSERTE(oldPSP == oldStackBase); -#else - TADDR oldPSP = *PTR_TADDR(callerSP + nOldPspSymStackSlot); - _ASSERTE(oldPSP == callerSP); -#endif - } -#endif // _DEBUG - #else PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform."); #endif @@ -766,23 +745,6 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, #elif defined(TARGET_AMD64) || defined(TARGET_ARM64) memset((void*)newStackBase, 0, newFixedStackSize - frameHeaderSize); - // Restore PSPSym for the new function. Its value should be set to our new FP. But - // first, we gotta find PSPSym's location on the stack - INT32 nNewPspSymStackSlot = newGcDecoder.GetPSPSymStackSlot(); - if (nNewPspSymStackSlot != NO_PSP_SYM) - { -#if defined(TARGET_AMD64) - *PTR_TADDR(newStackBase + nNewPspSymStackSlot) = newStackBase; -#elif defined(TARGET_ARM64) - *PTR_TADDR(callerSP + nNewPspSymStackSlot) = callerSP; -#else - PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform."); -#endif - } -#else // !X86, !X64, !ARM64 - PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform."); -#endif - // 4) Put the variables from step 3 into their new locations. LOG((LF_ENC, LL_INFO100, "EECM::FixContextForEnC: set vars!\n")); @@ -1706,47 +1668,13 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, GcInfoDecoder gcInfoDecoder( gcInfoToken, - GcInfoDecoderFlags (DECODE_PSP_SYM | DECODE_GENERICS_INST_CONTEXT) + GcInfoDecoderFlags (DECODE_GENERICS_INST_CONTEXT) ); INT32 spOffsetGenericsContext = gcInfoDecoder.GetGenericsInstContextStackSlot(); if (spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT) { - - TADDR taSlot; - if (pCodeInfo->IsFunclet()) - { - INT32 spOffsetPSPSym = gcInfoDecoder.GetPSPSymStackSlot(); - _ASSERTE(spOffsetPSPSym != NO_PSP_SYM); - -#ifdef TARGET_AMD64 - // On AMD64 the spOffsetPSPSym is relative to the "Initial SP": the stack - // pointer at the end of the prolog before and dynamic allocations, so it - // can be the same for funclets and the main function. - // However, we have a caller SP, so we need to convert - baseStackSlot -= pCodeInfo->GetFixedStackSize(); - -#endif // TARGET_AMD64 - - // For funclets we have to do an extra dereference to get the PSPSym first. - TADDR newBaseStackSlot = *PTR_TADDR(baseStackSlot + spOffsetPSPSym); - -#ifdef TARGET_AMD64 - // On AMD64 the PSPSym stores the "Initial SP": the stack pointer at the end of - // prolog, before any dynamic allocations. - // However, the GenericsContext offset is relative to the caller SP for all - // platforms. So here we adjust to convert AMD64's initial sp to a caller SP. - // But we have to be careful to use the main function's EECodeInfo, not the - // funclet's EECodeInfo because they have different stack sizes! - newBaseStackSlot += pCodeInfo->GetMainFunctionInfo().GetFixedStackSize(); -#endif // TARGET_AMD64 - - taSlot = (TADDR)( spOffsetGenericsContext + newBaseStackSlot ); - } - else - { - taSlot = (TADDR)( spOffsetGenericsContext + baseStackSlot ); - } + TADDR taSlot = (TADDR)( spOffsetGenericsContext + baseStackSlot ); TADDR taExactGenericsToken = *PTR_TADDR(taSlot); return PTR_VOID(taExactGenericsToken); } @@ -2060,7 +1988,6 @@ void EECodeManager::LeaveCatch(GCInfoToken gcInfoToken, #ifndef TARGET_WASM -#ifdef USE_FUNCLET_CALL_HELPER // This is an assembly helper that enables us to call into EH funclets. EXTERN_C DWORD_PTR STDCALL CallEHFunclet(Object *pThrowable, UINT_PTR pFuncletToInvoke, UINT_PTR *pFirstNonVolReg, UINT_PTR *pFuncletCallerSP); @@ -2080,7 +2007,9 @@ static inline UINT_PTR CastHandlerFn(HandlerFn *pfnHandler) static inline UINT_PTR *GetFirstNonVolatileRegisterAddress(PCONTEXT pContextRecord) { -#if defined(TARGET_ARM) +#if defined(TARGET_AMD64) + return (UINT_PTR*)&(pContextRecord->Rbx); +#elif defined(TARGET_ARM) return (UINT_PTR*)&(pContextRecord->R4); #elif defined(TARGET_ARM64) return (UINT_PTR*)&(pContextRecord->X19); @@ -2098,18 +2027,24 @@ static inline UINT_PTR *GetFirstNonVolatileRegisterAddress(PCONTEXT pContextReco static inline TADDR GetFrameRestoreBase(PCONTEXT pContextRecord) { -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - return GetSP(pContextRecord); +#if defined(TARGET_AMD64) + return pContextRecord->Rbp; #elif defined(TARGET_X86) return pContextRecord->Ebp; +#elif defined(TARGET_ARM64) + return pContextRecord->X29; +#elif defined(TARGET_ARM) + return pContextRecord->R11; +#elif defined(TARGET_LOONGARCH64) + return pContextRecord->Fp; +#elif defined(TARGET_RISCV64) + return pContextRecord->Fp; #else PORTABILITY_ASSERT("GetFrameRestoreBase"); return NULL; #endif } -#endif // USE_FUNCLET_CALL_HELPER - typedef DWORD_PTR (HandlerFn)(UINT_PTR uStackFrame, Object* pExceptionObj); static UINT_PTR GetEstablisherFrame(REGDISPLAY* pvRegDisplay, ExInfo* exInfo) { @@ -2144,18 +2079,6 @@ static UINT_PTR GetEstablisherFrame(REGDISPLAY* pvRegDisplay, ExInfo* exInfo) // * Catch funclet: address to resume at after the catch returns // * Finally funclet: unused // * Filter funclet: result of the filter funclet (EXCEPTION_CONTINUE_SEARCH (0) or EXCEPTION_EXECUTE_HANDLER (1)) -#ifndef USE_FUNCLET_CALL_HELPER -// NOTE: This function must be prevented from calling the actual funclet via a tail call to ensure -// that the m_csfEHClause is really set to what is a SP of the caller frame of the funclet. The -// StackFrameIterator relies on this. -#ifdef _MSC_VER -#pragma optimize("", off) -#elif defined(__clang__) -[[clang::disable_tail_calls]] -#else -[[gnu::optimize("O0")]] -#endif -#endif // USE_FUNCLET_CALL_HELPER DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilterFunclet) { DWORD_PTR dwResult = 0; @@ -2164,7 +2087,6 @@ DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDIS #else HandlerFn* pfnHandler = (HandlerFn*)pHandler; -#ifdef USE_FUNCLET_CALL_HELPER // Since the actual caller of the funclet is the assembly helper, pass the reference // to the CallerStackFrame instance so that it can be updated. UINT_PTR *pFuncletCallerSP = &(pExInfo->m_csfEHClause.SP); @@ -2175,11 +2097,7 @@ DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDIS // it will retrieve the framepointer for accessing the locals in the parent // method. dwResult = CallEHFilterFunclet(OBJECTREFToObject(throwable), -#ifdef USE_CURRENT_CONTEXT_IN_FILTER GetFrameRestoreBase(pRD->pCurrentContext), -#else - GetFrameRestoreBase(pRD->pCallerContext), -#endif CastHandlerFn(pfnHandler), pFuncletCallerSP); } @@ -2190,21 +2108,10 @@ DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDIS GetFirstNonVolatileRegisterAddress(pRD->pCurrentContext), pFuncletCallerSP); } -#else - pExInfo->m_csfEHClause = CallerStackFrame((UINT_PTR)GetCurrentSP()); - - UINT_PTR establisherFrame = GetEstablisherFrame(pRD, pExInfo); - dwResult = pfnHandler(establisherFrame, OBJECTREFToObject(throwable)); -#endif #endif // TARGET_WASM return dwResult; } -#ifndef USE_FUNCLET_CALL_HELPER -#ifdef _MSC_VER -#pragma optimize("", on) -#endif -#endif // USE_FUNCLET_CALL_HELPER #ifdef FEATURE_INTERPRETER DWORD_PTR InterpreterCodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDISPLAY *pRD, ExInfo *pExInfo, bool isFilter) diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index ca362d44d51083..7bfc55436cbaf4 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -3099,9 +3099,8 @@ extern "C" void * QCALLTYPE CallCatchFunclet(QCall::ObjectHandleOnStack exceptio dwResumePC = exInfo->m_frameIter.m_crawl.GetCodeManager()->CallFunclet(throwable, pHandlerIP, pvRegDisplay, exInfo, false /* isFilterFunclet */); -#ifdef USE_FUNCLET_CALL_HELPER FixContext(pvRegDisplay->pCurrentContext); -#endif + // Profiler, debugger and ETW events exInfo->MakeCallbacksRelatedToHandler(false, pThread, pMD, &exInfo->m_ClauseForCatch, (DWORD_PTR)pHandlerIP, spForDebugger); SetIP(pvRegDisplay->pCurrentContext, dwResumePC); diff --git a/src/coreclr/vm/exceptionhandling.h b/src/coreclr/vm/exceptionhandling.h index 49bbcfc1dfe72e..35321dcef74da5 100644 --- a/src/coreclr/vm/exceptionhandling.h +++ b/src/coreclr/vm/exceptionhandling.h @@ -70,8 +70,4 @@ enum class InlinedCallFrameMarker #define USE_CURRENT_CONTEXT_IN_FILTER #endif // TARGET_X86 -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_X86) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) -#define USE_FUNCLET_CALL_HELPER -#endif // TARGET_ARM || TARGET_ARM64 || TARGET_X86 || TARGET_LOONGARCH64 || TARGET_RISCV64 - #endif // __EXCEPTION_HANDLING_h__ diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 6f2a89383852c4..78c44dffa84a22 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -879,12 +879,14 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // On entry: // // $a0 = throwable - // $a1 = SP of the caller of the method/funclet containing the filter + // $a1 = FP of main function // $a2 = PC to invoke // $a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function st.d $fp, $a3, 0 + // Restore frame pointer + mov $fp, $a1 // Invoke the filter funclet jirl $ra, $a2, 0 diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index e548ddd32d31c6..2c049b7a44e2b1 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -736,12 +736,14 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // On entry: // // a0 = throwable - // a1 = SP of the caller of the method/funclet containing the filter + // a1 = FP of main function // a2 = PC to invoke // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function sd fp, 0(a3) + // Restore frame pointer + mov fp, a1 // Invoke the filter funclet jalr a2 From 4319268f72ab24fb5d31e6f7829cae0ec144c519 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 16:54:51 +0200 Subject: [PATCH 02/52] Do not emit and use PSPSym in JIT --- src/coreclr/jit/codegen.h | 81 --------- src/coreclr/jit/codegenarm.cpp | 150 +--------------- src/coreclr/jit/codegenarm64.cpp | 236 +++---------------------- src/coreclr/jit/codegenarmarch.cpp | 7 +- src/coreclr/jit/codegencommon.cpp | 22 +-- src/coreclr/jit/codegenloongarch64.cpp | 176 +----------------- src/coreclr/jit/codegenriscv64.cpp | 135 +------------- src/coreclr/jit/codegenxarch.cpp | 116 +----------- src/coreclr/jit/compiler.h | 17 -- src/coreclr/jit/compiler.hpp | 2 - src/coreclr/jit/flowgraph.cpp | 12 -- src/coreclr/jit/gcencode.cpp | 10 -- src/coreclr/jit/gentree.cpp | 4 - src/coreclr/jit/lclvars.cpp | 75 +------- src/coreclr/jit/targetamd64.h | 8 +- 15 files changed, 59 insertions(+), 992 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 99cb565b68ea62..a2ac56cb63e795 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -462,8 +462,6 @@ class CodeGen final : public CodeGenInterface regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR) unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer unsigned fiSpDelta; // Stack pointer delta - unsigned fiPSP_slot_SP_offset; // PSP slot offset from SP - int fiPSP_slot_CallerSP_offset; // PSP slot offset from Caller SP }; FuncletFrameInfoDsc genFuncletInfo; @@ -479,9 +477,7 @@ class CodeGen final : public CodeGenInterface int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function // (negative) int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive) - int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) int fiSP_to_CalleeSave_delta; // First callee-saved register slot offset from SP (positive) - int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. int fiSpDelta1; // Stack pointer delta 1 (negative) int fiSpDelta2; // Stack pointer delta 2 (negative) @@ -498,7 +494,6 @@ class CodeGen final : public CodeGenInterface { unsigned fiFunction_InitialSP_to_FP_delta; // Delta between Initial-SP and the frame pointer unsigned fiSpDelta; // Stack pointer delta - int fiPSP_slot_InitialSP_offset; // PSP slot offset from Initial-SP }; FuncletFrameInfoDsc genFuncletInfo; @@ -514,8 +509,6 @@ class CodeGen final : public CodeGenInterface int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function // (negative) int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) - int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive) - int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative) int fiSpDelta; // Stack pointer delta (negative) }; @@ -612,80 +605,6 @@ class CodeGen final : public CodeGenInterface void genFuncletEpilog(); void genCaptureFuncletPrologEpilogInfo(); - /*----------------------------------------------------------------------------- - * - * Set the main function PSPSym value in the frame. - * Funclets use different code to load the PSP sym and save it in their frame. - * See the document "CLR ABI.md" for a full description of the PSPSym. - * The PSPSym section of that document is copied here. - * - *********************************** - * The name PSPSym stands for Previous Stack Pointer Symbol. It is how a funclet - * accesses locals from the main function body. - * - * First, two definitions. - * - * Caller-SP is the value of the stack pointer in a function's caller before the call - * instruction is executed. That is, when function A calls function B, Caller-SP for B - * is the value of the stack pointer immediately before the call instruction in A - * (calling B) was executed. Note that this definition holds for both AMD64, which - * pushes the return value when a call instruction is executed, and for ARM, which - * doesn't. For AMD64, Caller-SP is the address above the call return address. - * - * Initial-SP is the initial value of the stack pointer after the fixed-size portion of - * the frame has been allocated. That is, before any "alloca"-type allocations. - * - * The PSPSym is a pointer-sized local variable in the frame of the main function and - * of each funclet. The value stored in PSPSym is the value of Initial-SP/Caller-SP - * for the main function. The stack offset of the PSPSym is reported to the VM in the - * GC information header. The value reported in the GC information is the offset of the - * PSPSym from Initial-SP/Caller-SP. (Note that both the value stored, and the way the - * value is reported to the VM, differs between architectures. In particular, note that - * most things in the GC information header are reported as offsets relative to Caller-SP, - * but PSPSym on AMD64 is one (maybe the only) exception.) - * - * The VM uses the PSPSym to find other locals it cares about (such as the generics context - * in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that - * the frame pointer is the same value in a funclet as it is in the main function body. - * - * When a funclet is called, it is passed the Establisher Frame Pointer. For AMD64 this is - * true for all funclets and it is passed as the first argument in RCX, but for ARM this is - * only true for first pass funclets (currently just filters) and it is passed as the second - * argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent" - * frame in the exception processing system. For the CLR, it points either to the main function - * frame or a dynamically enclosing funclet frame from the same function, for the funclet being - * invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on ARM. - * - * Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we - * don't know if the Establisher Frame is from the main function or a funclet, we design the - * main function and funclet frame layouts to place the PSPSym at an identical, small, constant - * offset from the Establisher Frame in each case. (This is also required because we only report - * a single offset to the PSPSym in the GC information, and that offset must be valid for the main - * function and all of its funclets). Then, the funclet uses this known offset to compute the - * PSPSym address and read its value. From this, it can compute the value of the frame pointer - * (which is a constant offset from the PSPSym value) and set the frame register to be the same - * as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's - * PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular, - * for every nested funclet invocation. - * - * On ARM, for all second pass funclets (finally, fault, catch, and filter-handler) the VM - * restores all non-volatile registers to their values within the parent frame. This includes - * the frame register (R11). Thus, the PSPSym is not used to recompute the frame pointer register - * in this case, though the PSPSym is copied to the funclet's frame, as for all funclets. - * - * Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument - * (REG_EXCEPTION_OBJECT). On AMD64 it is the second argument and thus passed in RDX. On - * ARM this is the first argument and passed in R0. - * - * (Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always - * pass the correct establisher frame to the funclet. Funclet may receive establisher frame of - * funclet when expecting that of original routine." It indicates this is the reason that a PSPSym - * is required in all funclets as well as the main function, whereas if the establisher frame was - * correctly reported, the PSPSym could be omitted in some cases.) - *********************************** - */ - void genSetPSPSym(regNumber initReg, bool* pInitRegZeroed); - void genUpdateCurrentFunclet(BasicBlock* block); void genGeneratePrologsAndEpilogs(); diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index c91f401cc4c697..4491b260e005e8 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2223,7 +2223,7 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) * Funclets have the following incoming arguments: * * catch: r0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: r0 = the exception object to filter (see GT_CATCH_ARG), r1 = CallerSP of the containing function + * filter: r0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -2239,50 +2239,9 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) * ; actually use in the funclet. Currently, we save the same set of callee-saved regs * ; calculated for the entire function. * sub sp, XXX ; Establish the rest of the frame. - * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned - * ; up to preserve stack alignment. If we push an odd number of registers, we also - * ; generate this, to keep the stack aligned. + * ; XXX is determined by lvaOutgoingArgSpaceSize, aligned up to preserve stack alignment. + * ; If we push an odd number of registers, we also generate this, to keep the stack aligned. * - * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested - * ; filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet - * ; epilog. - * - * if (this is a filter funclet) - * { - * // r1 on entry to a filter funclet is CallerSP of the containing function: - * // either the main function, or the funclet for a handler that this filter is dynamically nested within. - * // Note that a filter can be dynamically nested within a funclet even if it is not statically within - * // a funclet. Consider: - * // - * // try { - * // try { - * // throw new Exception(); - * // } catch(Exception) { - * // throw new Exception(); // The exception thrown here ... - * // } - * // } filter { // ... will be processed here, while the "catch" funclet frame is - * // // still on the stack - * // } filter-handler { - * // } - * // - * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the - * // enclosing frame will be a funclet or main function. We won't know any time there is a filter protecting - * // nested EH. To simplify, we just always create a main function PSP for any function with a filter. - * - * ldr r1, [r1 - PSP_slot_CallerSP_offset] ; Load the CallerSP of the main function (stored in the PSP of - * ; the dynamically containing funclet or function) - * str r1, [sp + PSP_slot_SP_offset] ; store the PSP - * sub r11, r1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer - * } - * else - * { - * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. - * // TODO-ARM-CQ: if VM set r1 to CallerSP on entry, like for filters, we could save an instruction. - * - * add r3, r11, Function_CallerSP_to_FP_delta ; compute the CallerSP, given the frame pointer. r3 is scratch. - * str r3, [sp + PSP_slot_SP_offset] ; store the PSP - * } * * The epilog sequence is then: * @@ -2301,11 +2260,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) * +=======================+ <---- Caller's SP * |Callee saved registers | * |-----------------------| - * |Pre-spill regs space | // This is only necessary to keep the PSP slot at the same offset - * | | // in function and funclet - * |-----------------------| - * | PSP slot | // Omitted in NativeAOT ABI - * |-----------------------| * ~ possible 4 byte pad ~ * ~ for alignment ~ * |-----------------------| @@ -2375,31 +2329,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - if (isFilter) - { - // This is the first block of a filter - - GetEmitter()->emitIns_R_R_I(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiPSP_slot_CallerSP_offset); - regSet.verifyRegUsed(REG_R1); - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); - GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_FPBASE, REG_R1, - genFuncletInfo.fiFunctionCallerSPtoFPdelta); - } - else - { - // This is a non-filter funclet - GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, - genFuncletInfo.fiFunctionCallerSPtoFPdelta); - regSet.verifyRegUsed(REG_R3); - GetEmitter()->emitIns_R_R_I(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiPSP_slot_SP_offset); - } } /***************************************************************************** @@ -2504,16 +2433,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize; unsigned spDelta = funcletFrameSizeAligned - saveRegsSize; - unsigned PSP_slot_SP_offset = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad; - int PSP_slot_CallerSP_offset = - -(int)(funcletFrameSize - compiler->lvaOutgoingArgSpaceSize); // NOTE: it's negative! - /* Now save it for future use */ genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; genFuncletInfo.fiSpDelta = spDelta; - genFuncletInfo.fiPSP_slot_SP_offset = PSP_slot_SP_offset; - genFuncletInfo.fiPSP_slot_CallerSP_offset = PSP_slot_CallerSP_offset; #ifdef DEBUG if (verbose) @@ -2525,78 +2448,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() dspRegMask(rsMaskSaveRegs); printf("\n"); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); - printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset); - printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset); - - if (PSP_slot_CallerSP_offset != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) - { - printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); - } } #endif // DEBUG - - assert(PSP_slot_CallerSP_offset < 0); - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(PSP_slot_CallerSP_offset == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main - // function and funclet! - } } } -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - // We either generate: - // add r1, r11, 8 - // str r1, [reg + PSPSymOffset] - // or: - // add r1, sp, 76 - // str r1, [reg + PSPSymOffset] - // depending on the smallest encoding - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - int callerSPOffs; - regNumber regBase; - - if (arm_Valid_Imm_For_Add_SP(SPtoCallerSPdelta)) - { - // use the "add , sp, imm" form - - callerSPOffs = SPtoCallerSPdelta; - regBase = REG_SPBASE; - } - else - { - // use the "add , r11, imm" form - - int FPtoCallerSPdelta = -genCallerSPtoFPdelta(); - noway_assert(arm_Valid_Imm_For_Add(FPtoCallerSPdelta, INS_FLAGS_DONT_CARE)); - - callerSPOffs = FPtoCallerSPdelta; - regBase = REG_FPBASE; - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - GetEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, regTmp, regBase, callerSPOffs); - GetEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); -} - //----------------------------------------------------------------------------- // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case // `genUseBlockInit` is set. diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 3627913ac99c01..d9afebc629aa05 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1101,7 +1101,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Funclets have the following incoming arguments: * * catch: x0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: x0 = the exception object to filter (see GT_CATCH_ARG), x1 = CallerSP of the containing function + * filter: x0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -1132,8 +1132,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes @@ -1163,8 +1161,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Saved FP, LR | // 16 bytes @@ -1197,8 +1193,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned * |-----------------------| * | Saved FP, LR | // 16 bytes <-- SP after first adjustment (points at saved FP) @@ -1214,27 +1208,27 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * * Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3, * it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack - * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 240 bytes: + * pointer needs to be 16 byte aligned at all times. The size of the callee-saved registers space is a maximum of 240 bytes: * * FP,LR registers * 10 int callee-saved register x19-x28 * 8 float callee-saved registers v8-v15 * 8 saved integer argument registers x0-x7, if varargs function - * 1 PSP slot - * 1 alignment slot or monitor acquired slot + * 1 monitor acquired slot +* 1 alignment slot * == 30 slots * 8 bytes = 240 bytes. * * The outgoing argument size, however, can be very large, if we call a function that takes a large number of * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before - * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset - * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument - * space. Both changes to SP might need to add alignment padding. + * saving off the callee-saved registers, so we can use the limited immediate offset encodings we have available, + * before doing another 16-byte aligned SP adjustment to create the outgoing argument space. Both changes to + * SP might need to add alignment padding. * * In addition to the above "standard" frames, we also need to support a frame where the saved FP/LR are at the - * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/LR - * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc. + * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/LR) + * that is used in the main function when a GS cookie is required due to the use of localloc. * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/LR * changed, but where the alignment padding is placed has also changed. * @@ -1243,13 +1237,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * sub sp,sp,#framesz ; establish the frame * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * stp fp,lr,[sp,#yyy] ; save FP/LR. - * ; write PSPSym * * The "#framesz <= 512" condition ensures that after we've established the frame, we can use "stp" with its * maximum allowed offset (504) to save the callee-saved register at the highest address. * - * We use "sub" instead of folding it into the next instruction as a predecrement, as we need to write PSPSym - * at the bottom of the stack, and there might also be an alignment padding slot. + * We use "sub" instead of folding it into the next instruction as a predecrement as there might also be an + * alignment padding slot. * * The funclet frame is thus: * @@ -1269,8 +1262,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned. * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes (optional; if #outsz > 0) @@ -1286,7 +1277,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary * stp fp,lr,[sp,#yyy] ; save FP/LR. * sub sp,sp,#outsz ; create space for outgoing argument space - * ; write PSPSym * * For large frames with "#framesz > 512", we must do one SP adjustment first, after which we can save callee-saved * registers with up to the maximum "stp" offset of 504. Then, we can establish the rest of the frame (namely, the @@ -1310,10 +1300,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| - * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned <-- SP after first adjustment (points at alignment padding or PSP slot) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space). * |-----------------------| * | Outgoing arg space | // multiple of 8 bytes @@ -1326,48 +1312,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Note that in this case we might have 16 bytes of alignment that is adjacent. This is because we are doing 2 SP * subtractions, and each one must be aligned up to 16 bytes. * - * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP - * as in the main function. - * - * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we - * must add buffer space for the saved varargs argument registers here, if the main function did the same. - * - * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. - * - * if (this is a filter funclet) - * { - * // x1 on entry to a filter funclet is CallerSP of the containing function: - * // either the main function, or the funclet for a handler that this filter is dynamically nested within. - * // Note that a filter can be dynamically nested within a funclet even if it is not statically within - * // a funclet. Consider: - * // - * // try { - * // try { - * // throw new Exception(); - * // } catch(Exception) { - * // throw new Exception(); // The exception thrown here ... - * // } - * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack - * // } filter-handler { - * // } - * // - * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will - * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always - * // create a main function PSP for any function with a filter. - * - * ldr x1, [x1, #CallerSP_to_PSP_slot_delta] ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) - * str x1, [sp, #SP_to_PSP_slot_delta] ; store the PSP - * add fp, x1, #Function_CallerSP_to_FP_delta ; re-establish the frame pointer - * } - * else - * { - * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. - * // TODO-ARM64-CQ: if VM set x1 to CallerSP on entry, like for filters, we could save an instruction. - * - * add x3, fp, #Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. x3 is scratch. - * str x3, [sp, #SP_to_PSP_slot_delta] ; store the PSP - * } + * Funclets do not have varargs arguments. * * An example epilog sequence is then: * @@ -1537,44 +1482,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. Otherwise, we need to set up the PSPSym in the funclet frame. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (isFilter) - { - // This is the first block of a filter - // Note that register x1 = CallerSP of the containing function - // X1 is overwritten by the first Load (new callerSP) - // X2 is scratch when we have a large constant offset - - // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or - // function) - genInstrWithConstant(INS_ldr, EA_PTRSIZE, REG_R1, REG_R1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, - REG_R2, false); - regSet.verifyRegUsed(REG_R1); - - // Store the PSP value (aka CallerSP) - genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, - false); - - // re-establish the frame pointer - genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, - genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); - } - else // This is a non-filter funclet - { - // X3 is scratch, X2 can also become scratch - - // compute the CallerSP, given the frame pointer. x3 is scratch. - genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, - -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false); - regSet.verifyRegUsed(REG_R3); - - genInstrWithConstant(INS_str, EA_PTRSIZE, REG_R3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, - false); - } - } } /***************************************************************************** @@ -1747,33 +1654,19 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // The frame size and offsets must be finalized assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); - unsigned const PSPSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; - - // Because a method and funclets must have the same caller-relative PSPSym offset, - // if there is a PSPSym, we have to pad the funclet frame size for OSR. - // - unsigned osrPad = 0; - if (compiler->opts.IsOSR() && (PSPSize > 0)) - { - osrPad = compiler->info.compPatchpointInfo->TotalFrameSize(); - - // OSR pad must be already aligned to stack size. - assert((osrPad % STACK_ALIGN) == 0); - } - - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() - osrPad; + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; assert((rsMaskSaveRegs & RBM_LR) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); - unsigned saveRegsPlusPSPSize = saveRegsCount * REGSIZE_BYTES + PSPSize; + unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); + unsigned saveRegsSize = saveRegsCount * REGSIZE_BYTES; if (compiler->info.compIsVarArgs) { // For varargs we always save all of the integer register arguments // so that they are contiguous with the incoming stack arguments. - saveRegsPlusPSPSize += MAX_REG_ARG * REGSIZE_BYTES; + saveRegsSize += MAX_REG_ARG * REGSIZE_BYTES; } if (compiler->lvaMonAcquired != BAD_VAR_NUM && !compiler->opts.IsOSR()) @@ -1781,10 +1674,10 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // We furthermore allocate the "monitor acquired" bool between PSP and // the saved registers because this is part of the EnC header. // Note that OSR methods reuse the monitor bool created by tier 0. - saveRegsPlusPSPSize += compiler->lvaLclStackHomeSize(compiler->lvaMonAcquired); + saveRegsSize += compiler->lvaLclStackHomeSize(compiler->lvaMonAcquired); } - unsigned const saveRegsPlusPSPSizeAligned = roundUp(saveRegsPlusPSPSize, STACK_ALIGN); + unsigned const saveRegsSizeAligned = roundUp(saveRegsSize, STACK_ALIGN); assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); unsigned const outgoingArgSpaceAligned = roundUp(compiler->lvaOutgoingArgSpaceSize, STACK_ALIGN); @@ -1792,12 +1685,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // If do two SP adjustments, each one must be aligned. This represents the largest possible stack size, if two // separate alignment slots are required. unsigned const twoSpAdjustmentFuncletFrameSizeAligned = - osrPad + saveRegsPlusPSPSizeAligned + outgoingArgSpaceAligned; + saveRegsSizeAligned + outgoingArgSpaceAligned; assert((twoSpAdjustmentFuncletFrameSizeAligned % STACK_ALIGN) == 0); int SP_to_FPLR_save_delta; - int SP_to_PSP_slot_delta; - int CallerSP_to_PSP_slot_delta; + int SP_to_CalleeSave_delta; // Are we stressing frame type 5? Don't do it unless we have non-zero outgoing arg space. const bool useFrameType5 = @@ -1806,7 +1698,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() if ((twoSpAdjustmentFuncletFrameSizeAligned <= 512) && !useFrameType5) { unsigned const oneSpAdjustmentFuncletFrameSize = - osrPad + saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize; + saveRegsSize + compiler->lvaOutgoingArgSpaceSize; unsigned const oneSpAdjustmentFuncletFrameSizeAligned = roundUp(oneSpAdjustmentFuncletFrameSize, STACK_ALIGN); assert(oneSpAdjustmentFuncletFrameSizeAligned <= twoSpAdjustmentFuncletFrameSizeAligned); @@ -1823,17 +1715,15 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES; } - SP_to_PSP_slot_delta = compiler->lvaOutgoingArgSpaceSize + oneSpAdjustmentFuncletFrameSizeAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize); + SP_to_CalleeSave_delta = compiler->lvaOutgoingArgSpaceSize + oneSpAdjustmentFuncletFrameSizeAlignmentPad; genFuncletInfo.fiFrameType = 4; } else { SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize; - SP_to_PSP_slot_delta = + SP_to_CalleeSave_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + oneSpAdjustmentFuncletFrameSizeAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES); if (compiler->lvaOutgoingArgSpaceSize == 0) { @@ -1852,8 +1742,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() } else { - unsigned const saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize; - assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES)); + unsigned const saveRegsAlignmentPad = saveRegsSizeAligned - saveRegsSize; + assert((saveRegsAlignmentPad == 0) || (saveRegsAlignmentPad == REGSIZE_BYTES)); if (genSaveFpLrWithAllCalleeSavedRegisters) { @@ -1863,22 +1753,19 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES; } - SP_to_PSP_slot_delta = outgoingArgSpaceAligned + saveRegsPlusPSPAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSize); + SP_to_CalleeSave_delta = outgoingArgSpaceAligned + saveRegsAlignmentPad; genFuncletInfo.fiFrameType = 5; } else { SP_to_FPLR_save_delta = outgoingArgSpaceAligned; - SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad; - CallerSP_to_PSP_slot_delta = -(int)(osrPad + saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - - saveRegsPlusPSPAlignmentPad); + SP_to_CalleeSave_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsAlignmentPad; genFuncletInfo.fiFrameType = 3; } - genFuncletInfo.fiSpDelta1 = -(int)(osrPad + saveRegsPlusPSPSizeAligned); + genFuncletInfo.fiSpDelta1 = -(int)saveRegsSizeAligned; genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned; assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)twoSpAdjustmentFuncletFrameSizeAligned); @@ -1888,9 +1775,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; - genFuncletInfo.fiSP_to_PSP_slot_delta = SP_to_PSP_slot_delta; - genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_PSP_slot_delta + PSPSize; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = CallerSP_to_PSP_slot_delta; + genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_CalleeSave_delta; #ifdef DEBUG if (verbose) @@ -1900,70 +1785,18 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - if (compiler->opts.IsOSR()) - { - printf(" OSR Pad: %d\n", osrPad); - } printf(" SP to FP/LR save location delta: %d\n", genFuncletInfo.fiSP_to_FPLR_save_delta); - printf(" SP to PSP slot delta: %d\n", genFuncletInfo.fiSP_to_PSP_slot_delta); printf(" SP to callee-saved area delta: %d\n", genFuncletInfo.fiSP_to_CalleeSave_delta); - printf(" Caller SP to PSP slot delta: %d\n", genFuncletInfo.fiCallerSP_to_PSP_slot_delta); printf(" Frame type: %d\n", genFuncletInfo.fiFrameType); printf(" SP delta 1: %d\n", genFuncletInfo.fiSpDelta1); printf(" SP delta 2: %d\n", genFuncletInfo.fiSpDelta2); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for - // debugging - { - printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n", - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); - } - } } assert(genFuncletInfo.fiSP_to_FPLR_save_delta >= 0); - assert(genFuncletInfo.fiSP_to_PSP_slot_delta >= 0); assert(genFuncletInfo.fiSP_to_CalleeSave_delta >= 0); - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta <= 0); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! - } #endif // DEBUG } -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - if (compiler->opts.IsOSR()) - { - SPtoCallerSPdelta += compiler->info.compPatchpointInfo->TotalFrameSize(); - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - GetEmitter()->emitIns_R_R_Imm(INS_add, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta); - GetEmitter()->emitIns_S_R(INS_str, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); -} - //----------------------------------------------------------------------------- // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case // `genUseBlockInit` is set. @@ -2179,20 +2012,10 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) BasicBlock* const nextBlock = block->Next(); // Generate a call to the finally, like this: - // mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used // bl finally-funclet // b finally-return // Only for non-retless finally calls // The 'b' can be a NOP if we're going to the next block. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - GetEmitter()->emitIns_R_S(INS_ldr, EA_PTRSIZE, REG_R0, compiler->lvaPSPSym, 0); - } - else - { - GetEmitter()->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_R0, REG_SPBASE, /* canSkip */ false); - } - if (block->HasFlag(BBF_RETLESS_CALL)) { GetEmitter()->emitIns_J(INS_bl_local, block->GetTarget()); @@ -3152,7 +2975,6 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber targetReg = tree->GetRegNum(); regNumber regCnt = REG_NA; - regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index c4cbf359d87229..96e73439067a67 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -4675,8 +4675,7 @@ void CodeGen::genPushCalleeSavedRegisters() // 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). // // For functions with GS and localloc, we change the frame so the frame pointer and LR are saved at the top - // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same - // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + // of the frame, just under the varargs registers (if any). // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. // // The frames look like the following (simplified to only include components that matter for establishing the @@ -4694,8 +4693,6 @@ void CodeGen::genPushCalleeSavedRegisters() // |-----------------------| // | MonitorAcquired | // 8 bytes; for synchronized methods // |-----------------------| - // | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - // |-----------------------| // | locals, temps, etc. | // |-----------------------| // | possible GS cookie | @@ -4727,8 +4724,6 @@ void CodeGen::genPushCalleeSavedRegisters() // |-----------------------| // | MonitorAcquired | // 8 bytes; for synchronized methods // |-----------------------| - // | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - // |-----------------------| // | locals, temps, etc. | // |-----------------------| // | possible GS cookie | diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 60a44c3356c628..7e5421f96faecc 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5172,18 +5172,6 @@ void CodeGen::genFnProlog() } #endif // DEBUG -#if defined(DEBUG) - - // We cannot force 0-initialization of the PSPSym - // as it will overwrite the real value - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - const LclVarDsc* varDsc = compiler->lvaGetDesc(compiler->lvaPSPSym); - assert(!varDsc->lvMustInit); - } - -#endif // DEBUG - /*------------------------------------------------------------------------- * * Record the stack frame ranges that will cover all of the tracked @@ -5673,13 +5661,9 @@ void CodeGen::genFnProlog() genZeroInitFrame(untrLclHi, untrLclLo, initReg, &initRegZeroed); - if (compiler->UsesFunclets()) - { - genSetPSPSym(initReg, &initRegZeroed); - } - else - { #if defined(FEATURE_EH_WINDOWS_X86) + if (!compiler->UsesFunclets()) + { // when compInitMem is true the genZeroInitFrame will zero out the shadow SP slots if (compiler->ehNeedsShadowSPslots() && !compiler->info.compInitMem) { @@ -5699,8 +5683,8 @@ void CodeGen::genFnProlog() GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, initReg, compiler->lvaShadowSPslotsVar, firstSlotOffs); } -#endif // FEATURE_EH_WINDOWS_X86 } +#endif // FEATURE_EH_WINDOWS_X86 genReportGenericContextArg(initReg, &initRegZeroed); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 32826a7f56e50f..3db4d9e8a5ebc2 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -327,7 +327,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Funclets have the following incoming arguments: * * catch: a0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function + * filter: a0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -356,8 +356,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * |Callee saved registers | // multiple of 8 bytes, not including FP/RA * |-----------------------| * | Saved FP, RA | // 16 bytes @@ -377,54 +375,12 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of * outgoing arguments for any call). * - * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, - * and that location is the same relative to Caller-SP as in the main function where higher than - * the callee-saved registers. - * That is to say, the PSPSym's relative offset to Caller-SP is not depended on the callee-saved registers. * TODO-LoongArch64: the funclet's callee-saved registers should not shared with main function. * - * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we - * must add buffer space for the saved varargs/argument registers here, if the main function did the same. + * Funclets do not have varargs arguments. * * Note that localloc cannot be used in a funclet. * - * ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog. - * - * if (this is a filter funclet) - * { - * // a1 on entry to a filter funclet is CallerSP of the containing function: - * // either the main function, or the funclet for a handler that this filter is dynamically nested within. - * // Note that a filter can be dynamically nested within a funclet even if it is not statically within - * // a funclet. Consider: - * // - * // try { - * // try { - * // throw new Exception(); - * // } catch(Exception) { - * // throw new Exception(); // The exception thrown here ... - * // } - * // } filter { // ... will be processed here, while the "catch" funclet frame is still on the stack - * // } filter-handler { - * // } - * // - * // Because of this, we need a PSP in the main function anytime a filter funclet doesn't know whether the enclosing frame will - * // be a funclet or main function. We won't know any time there is a filter protecting nested EH. To simplify, we just always - * // create a main function PSP for any function with a filter. - * - * ld.d a1,a1, CallerSP_to_PSP_slot_delta ; Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function) - * st.d a1,sp, SP_to_PSP_slot_delta ; store the PSP - * addi.d fp, a1, Function_CallerSP_to_FP_delta ; re-establish the frame pointer - * } - * else - * { - * // This is NOT a filter funclet. The VM re-establishes the frame pointer on entry. - * // TODO-LOONGARCH64-CQ: if VM set a1 to CallerSP on entry, like for filters, we could save an instruction. - * - * addi.d a3,fp,Function_FP_to_CallerSP_delta ; compute the CallerSP, given the frame pointer. a3 is scratch? - * st.d a3,sp,SP_to_PSP_slot_delta ; store the PSP - * } - * * An example epilog sequence is then: * * addi.d sp,sp,#outsz ; if any outgoing argument space @@ -512,45 +468,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet - // frame. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (isFilter) - { - // This is the first block of a filter - // Note that register a1 = CallerSP of the containing function - // A1 is overwritten by the first Load (new callerSP) - // A2 is scratch when we have a large constant offset - - // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or - // function) - genInstrWithConstant(INS_ld_d, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, - REG_A2, false); - regSet.verifyRegUsed(REG_A1); - - // Store the PSP value (aka CallerSP) - genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, - REG_A2, false); - - // re-establish the frame pointer - genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_FPBASE, REG_A1, - genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - } - else // This is a non-filter funclet - { - // A3 is scratch, A2 can also become scratch. - - // compute the CallerSP, given the frame pointer. a3 is scratch? - genInstrWithConstant(INS_addi_d, EA_PTRSIZE, REG_A3, REG_FPBASE, - -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - regSet.verifyRegUsed(REG_A3); - - genInstrWithConstant(INS_st_d, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, - REG_A2, false); - } - } } /***************************************************************************** @@ -628,20 +545,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_RA) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - // Because a method and funclets must have the same caller-relative PSPSym offset, - // if there is a PSPSym, we have to pad the funclet frame size for OSR. - // - int osrPad = 0; - if (compiler->opts.IsOSR()) - { - osrPad -= compiler->info.compPatchpointInfo->TotalFrameSize(); - - // OSR pad must be already aligned to stack size. - assert((osrPad % STACK_ALIGN) == 0); - } - /* Now save it for future use */ - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() + osrPad; + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; @@ -655,13 +560,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() delta_PSP -= TARGET_POINTER_SIZE; } - funcletFrameSize = funcletFrameSize - delta_PSP - osrPad; + funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); genFuncletInfo.fiSpDelta = -funcletFrameSize; genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSize + delta_PSP + osrPad; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = osrPad + delta_PSP; #ifdef DEBUG if (verbose) @@ -671,22 +574,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - if (compiler->opts.IsOSR()) - { - printf(" OSR Pad: %d\n", osrPad); - } printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 0); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! - } #endif // DEBUG } @@ -857,33 +749,6 @@ void CodeGen::genFnEpilog(BasicBlock* block) compiler->unwindEndEpilog(); } -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - if (compiler->opts.IsOSR()) - { - SPtoCallerSPdelta += compiler->info.compPatchpointInfo->TotalFrameSize(); - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - genInstrWithConstant(INS_addi_d, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, regTmp, false); - GetEmitter()->emitIns_S_R(INS_st_d, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); -} - //----------------------------------------------------------------------------- // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case // `genUseBlockInit` is set. @@ -1037,21 +902,6 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) BasicBlock* const nextBlock = block->Next(); - // Generate a call to the finally, like this: - // mov a0,qword ptr [fp + 10H] / sp // Load a0 with PSPSym, or sp if PSPSym is not used - // bl finally-funclet - // b finally-return // Only for non-retless finally calls - // The 'b' can be a NOP if we're going to the next block. - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - GetEmitter()->emitIns_R_S(INS_ld_d, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0); - } - if (block->HasFlag(BBF_RETLESS_CALL)) { GetEmitter()->emitIns_J(INS_bl, block->GetTarget()); @@ -1588,7 +1438,6 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber targetReg = tree->GetRegNum(); regNumber regCnt = REG_NA; - regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; // can optimize for loongarch. @@ -3863,10 +3712,6 @@ int CodeGenInterface::genSPtoFPdelta() const assert(compiler->compCalleeRegsPushed >= 2); // always FP/RA. int delta = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - delta -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { delta -= TARGET_POINTER_SIZE; @@ -7033,8 +6878,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). * * For functions with GS and localloc, we had saved the frame pointer and RA at the top - * of the frame. Note that the funclet frames must follow the same rule, - * and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + * of the frame. * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. * * The frames look like the following (simplified to only include components that matter for establishing the @@ -7054,8 +6898,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSPSym | // 8 bytes, Only for frames with EH, (omitted in NativeAOT ABI) - * |-----------------------| * |Callee saved registers | // not including FP/RA; multiple of 8 bytes * |-----------------------| * | Saved RA | // 8 bytes @@ -7143,10 +6985,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe int totalFrameSize = genTotalFrameSize(); int leftFrameSize = 0; int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; @@ -7213,10 +7051,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) int totalFrameSize = genTotalFrameSize(); int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 94d3e57aaace04..3d6cb567b43fa8 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -318,7 +318,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * Funclets have the following incoming arguments: * * catch: a0 = the exception object that was caught (see GT_CATCH_ARG) - * filter: a0 = the exception object to filter (see GT_CATCH_ARG), a1 = CallerSP of the containing function + * filter: a0 = the exception object to filter (see GT_CATCH_ARG) * finally/fault: none * * Funclets set the following registers on exit: @@ -347,8 +347,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * ~ alignment padding ~ // To make the whole frame 16 byte aligned * |-----------------------| * |Callee saved registers | // multiple of 8 bytes, not including FP/RA @@ -368,13 +366,7 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of * outgoing arguments for any call). * - * Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, - * and that location is the same relative to Caller-SP as in the main function where higher than - * the callee-saved registers. - * That is to say, the PSPSym's relative offset to Caller-SP is not depended on the callee-saved registers. - * - * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we - * must add buffer space for the saved varargs/argument registers here, if the main function did the same. + * Funclets do not have varargs arguments. * * Note that localloc cannot be used in a funclet. * @@ -464,45 +456,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet - // frame. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - if (isFilter) - { - // This is the first block of a filter - // Note that register a1 = CallerSP of the containing function - // A1 is overwritten by the first Load (new callerSP) - // A2 is scratch when we have a large constant offset - - // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or - // function) - genInstrWithConstant(INS_ld, EA_PTRSIZE, REG_A1, REG_A1, genFuncletInfo.fiCallerSP_to_PSP_slot_delta, - REG_A2, false); - regSet.verifyRegUsed(REG_A1); - - // Store the PSP value (aka CallerSP) - genInstrWithConstant(INS_sd, EA_PTRSIZE, REG_A1, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, - false); - - // re-establish the frame pointer - genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_FPBASE, REG_A1, - genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - } - else // This is a non-filter funclet - { - // A3 is scratch, A2 can also become scratch. - - // compute the CallerSP, given the frame pointer. a3 is scratch? - genInstrWithConstant(INS_addi, EA_PTRSIZE, REG_A3, REG_FPBASE, - -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_A2, false); - regSet.verifyRegUsed(REG_A3); - - genInstrWithConstant(INS_sd, EA_PTRSIZE, REG_A3, REG_SPBASE, genFuncletInfo.fiSP_to_PSP_slot_delta, REG_A2, - false); - } - } } /***************************************************************************** @@ -580,20 +533,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_RA) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); - // Because a method and funclets must have the same caller-relative PSPSym offset, - // if there is a PSPSym, we have to pad the funclet frame size for OSR. - // - int osrPad = 0; - if (compiler->opts.IsOSR()) - { - osrPad -= compiler->info.compPatchpointInfo->TotalFrameSize(); - - // OSR pad must be already aligned to stack size. - assert((osrPad % STACK_ALIGN) == 0); - } - /* Now save it for future use */ - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta() + osrPad; + genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; @@ -607,13 +548,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() delta_PSP -= TARGET_POINTER_SIZE; } - funcletFrameSize = funcletFrameSize - delta_PSP - osrPad; + funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); genFuncletInfo.fiSpDelta = -funcletFrameSize; genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_PSP_slot_delta = funcletFrameSize + delta_PSP + osrPad; - genFuncletInfo.fiCallerSP_to_PSP_slot_delta = osrPad + delta_PSP; #ifdef DEBUG if (verbose) @@ -623,22 +562,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - if (compiler->opts.IsOSR()) - { - printf(" OSR Pad: %d\n", osrPad); - } printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } assert(genFuncletInfo.fiSP_to_CalleeSaved_delta >= 0); - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiCallerSP_to_PSP_slot_delta == - compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! - } #endif // DEBUG } @@ -809,33 +737,6 @@ void CodeGen::genFnEpilog(BasicBlock* block) compiler->unwindEndEpilog(); } -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - - int SPtoCallerSPdelta = -genCallerSPtoInitialSPdelta(); - - if (compiler->opts.IsOSR()) - { - SPtoCallerSPdelta += compiler->info.compPatchpointInfo->TotalFrameSize(); - } - - // We will just use the initReg since it is an available register - // and we are probably done using it anyway... - regNumber regTmp = initReg; - *pInitRegZeroed = false; - - genInstrWithConstant(INS_addi, EA_PTRSIZE, regTmp, REG_SPBASE, SPtoCallerSPdelta, regTmp, false); - GetEmitter()->emitIns_S_R(INS_sd, EA_PTRSIZE, regTmp, compiler->lvaPSPSym, 0); -} - void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNumber initReg, bool* pInitRegZeroed) { regNumber rAddr; @@ -964,20 +865,10 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) BasicBlock* const nextBlock = block->Next(); // Generate a call to the finally, like this: - // mov a0,qword ptr [fp + 10H] / sp // Load a0 with PSPSym, or sp if PSPSym is not used // jal finally-funclet // j finally-return // Only for non-retless finally calls // The 'b' can be a NOP if we're going to the next block. - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - GetEmitter()->emitIns_R_S(INS_ld, EA_PTRSIZE, REG_A0, compiler->lvaPSPSym, 0); - } - else - { - GetEmitter()->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_A0, REG_SPBASE, 0); - } - if (block->HasFlag(BBF_RETLESS_CALL)) { GetEmitter()->emitIns_J(INS_jal, block->GetTarget()); @@ -1502,7 +1393,6 @@ void CodeGen::genLclHeap(GenTree* tree) regNumber targetReg = tree->GetRegNum(); regNumber regCnt = REG_NA; regNumber tempReg = REG_NA; - regNumber pspSymReg = REG_NA; var_types type = genActualType(size->gtType); emitAttr easz = emitTypeSize(type); BasicBlock* endLabel = nullptr; // can optimize for riscv64. @@ -3710,10 +3600,6 @@ int CodeGenInterface::genSPtoFPdelta() const assert(compiler->compCalleeRegsPushed >= 2); // always FP/RA. int delta = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - delta -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { delta -= TARGET_POINTER_SIZE; @@ -6884,8 +6770,7 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * 4. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc). * * For functions with GS and localloc, we had saved the frame pointer and RA at the top - * of the frame. Note that the funclet frames must follow the same rule, - * and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP. + * of the frame. * Since this frame type is relatively rare, we force using it via stress modes, for additional coverage. * * The frames look like the following (simplified to only include components that matter for establishing the @@ -6906,8 +6791,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) * |-----------------------| * | MonitorAcquired | // 8 bytes; for synchronized methods * |-----------------------| - * | PSP slot | // 8 bytes (omitted in NativeAOT ABI) - * |-----------------------| * |Callee saved registers | // not including FP/RA; multiple of 8 bytes * |-----------------------| * | Saved FP | // 8 bytes @@ -6995,10 +6878,6 @@ void CodeGen::genPushCalleeSavedRegisters(regNumber initReg, bool* pInitRegZeroe int totalFrameSize = genTotalFrameSize(); int leftFrameSize = 0; int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; @@ -7069,10 +6948,6 @@ void CodeGen::genPopCalleeSavedRegisters(bool jmpEpilog) int totalFrameSize = genTotalFrameSize(); int localFrameSize = compiler->compLclFrameSize; - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - localFrameSize -= TARGET_POINTER_SIZE; - } if ((compiler->lvaMonAcquired != BAD_VAR_NUM) && !compiler->opts.IsOSR()) { localFrameSize -= TARGET_POINTER_SIZE; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 2bac5cd88093f7..9d8a46890f7c2e 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -182,27 +182,9 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) if (compiler->UsesFunclets()) { // Generate a call to the finally, like this: - // mov rcx,qword ptr [rbp + 20H] // Load rcx with PSPSym // call finally-funclet // jmp finally-return // Only for non-retless finally calls // The jmp can be a NOP if we're going to the next block. - // If we're generating code for the main function (not a funclet), and there is no localloc, - // then RSP at this point is the same value as that stored in the PSPSym. So just copy RSP - // instead of loading the PSPSym in this case, or if PSPSym is not used (NativeAOT ABI). - - // x86 funclet ABI doesn't store the stack pointer in ECX. It's recovered from the context - // instead after executing the funclet. -#ifndef TARGET_X86 - if ((compiler->lvaPSPSym == BAD_VAR_NUM) || - (!compiler->compLocallocUsed && (compiler->funCurrentFunc()->funKind == FUNC_ROOT))) - { - inst_Mov(TYP_I_IMPL, REG_ARG_0, REG_SPBASE, /* canSkip */ false); - } - else - { - GetEmitter()->emitIns_R_S(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_ARG_0, compiler->lvaPSPSym, 0); - } -#endif // !TARGET_X86 if (block->HasFlag(BBF_RETLESS_CALL)) { @@ -10954,9 +10936,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) * * Funclets have the following incoming arguments: * - * catch/filter-handler: rcx = InitialSP, rdx = the exception object that was caught (see GT_CATCH_ARG) - * filter: rcx = InitialSP, rdx = the exception object to filter (see GT_CATCH_ARG) - * finally/fault: rcx = InitialSP + * catch/filter-handler: rcx = the exception object that was caught (see GT_CATCH_ARG) + * filter: rcx = the exception object to filter (see GT_CATCH_ARG) + * finally/fault: none * * Funclets set the following registers on exit: * @@ -10972,22 +10954,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) * ; in the funclet. Currently, we save the same set of callee-saved regs calculated for * ; the entire function. * sub sp, XXX ; Establish the rest of the frame. - * ; XXX is determined by lvaOutgoingArgSpaceSize plus space for the PSP slot, aligned - * ; up to preserve stack alignment. If we push an odd number of registers, we also - * ; generate this, to keep the stack aligned. - * - * ; Fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested - * ; filters. - * ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet - * ; epilog. - * ; Also, re-establish the frame pointer from the PSP. - * - * mov rbp, [rcx + PSP_slot_InitialSP_offset] ; Load the PSP (InitialSP of the main function stored in the - * ; PSP of the dynamically containing funclet or function) - * mov [rsp + PSP_slot_InitialSP_offset], rbp ; store the PSP in our frame - * lea ebp, [rbp + Function_InitialSP_to_FP_delta] ; re-establish the frame pointer of the parent frame. If - * ; Function_InitialSP_to_FP_delta==0, we don't need this - * ; instruction. + * ; XXX is determined by lvaOutgoingArgSpaceSize, aligned up to preserve stack alignment. + * ; If we push an odd number of registers, we also generate this, to keep the stack aligned. * * The epilog sequence is then: * @@ -11012,8 +10980,6 @@ void CodeGen::genFnEpilog(BasicBlock* block) * ~ possible 8 byte pad ~ * ~ for alignment ~ * |-----------------------| - * | PSP slot | // Omitted in NativeAOT ABI - * |-----------------------| * | Outgoing arg space | // this only exists if the function makes a call * |-----------------------| <---- Initial SP * | | | @@ -11050,10 +11016,9 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // We need to push ebp, since it's callee-saved. // We need to push the callee-saved registers. We only need to push the ones that we need, but we don't // keep track of that on a per-funclet basis, so we push the same set as in the main function. - // The only fixed-size frame we need to allocate is whatever is big enough for the PSPSym, since nothing else + // We do not need to allocate fixed-size frame, since nothing else // is stored here (all temps are allocated in the parent frame). - // We do need to allocate the outgoing argument space, in case there are calls here. This must be the same - // size as the parent frame's outgoing argument space, to keep the PSPSym offset the same. + // We do need to allocate the outgoing argument space, in case there are calls here. inst_RV(INS_push, REG_FPBASE, TYP_REF); compiler->unwindPush(REG_FPBASE); @@ -11082,27 +11047,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - - // If there is no PSPSym (NativeAOT ABI), we are done. - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - GetEmitter()->emitIns_R_AR(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_ARG_0, genFuncletInfo.fiPSP_slot_InitialSP_offset); - - regSet.verifyRegUsed(REG_FPBASE); - - GetEmitter()->emitIns_AR_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, genFuncletInfo.fiPSP_slot_InitialSP_offset); - - if (genFuncletInfo.fiFunction_InitialSP_to_FP_delta != 0) - { - GetEmitter()->emitIns_R_AR(INS_lea, EA_PTRSIZE, REG_FPBASE, REG_FPBASE, - genFuncletInfo.fiFunction_InitialSP_to_FP_delta); - } - - // We've modified EBP, but not really. Say that we haven't... - regSet.rsRemoveRegsModified(RBM_FPBASE); } /***************************************************************************** @@ -11167,8 +11111,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() #endif // UNIX_AMD64_ABI unsigned offset = compiler->lvaOutgoingArgSpaceSize; - genFuncletInfo.fiPSP_slot_InitialSP_offset = offset; - // How much stack do we allocate in the funclet? // We need to 16-byte align the stack. @@ -11182,12 +11124,9 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() unsigned calleeFPRegsSavedSize = genCountBits(compiler->compCalleeFPRegsSavedMask) * XMM_REGSIZE_BYTES; unsigned FPRegsPad = (calleeFPRegsSavedSize > 0) ? AlignmentPad(totalFrameSize, XMM_REGSIZE_BYTES) : 0; - unsigned PSPSymSize = (compiler->lvaPSPSym != BAD_VAR_NUM) ? REGSIZE_BYTES : 0; - totalFrameSize += FPRegsPad // Padding before pushing entire xmm regs + calleeFPRegsSavedSize // pushed callee-saved float regs // below calculated 'pad' will go here - + PSPSymSize // PSPSym + compiler->lvaOutgoingArgSpaceSize // outgoing arg space ; @@ -11195,7 +11134,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() genFuncletInfo.fiSpDelta = FPRegsPad // Padding to align SP on XMM_REGSIZE_BYTES boundary + calleeFPRegsSavedSize // Callee saved xmm regs - + pad + PSPSymSize // PSPSym + + pad // padding + compiler->lvaOutgoingArgSpaceSize // outgoing arg space ; @@ -11206,14 +11145,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf("Funclet prolog / epilog info\n"); printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); - printf(" PSP slot Initial SP offset: %d\n", genFuncletInfo.fiPSP_slot_InitialSP_offset); - } - - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { - assert(genFuncletInfo.fiPSP_slot_InitialSP_offset == - compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); // same offset used in main function and - // funclet! } #endif // DEBUG } @@ -11259,8 +11190,6 @@ void CodeGen::genFuncletProlog(BasicBlock* block) // This is the end of the OS-reported prolog for purposes of unwinding compiler->unwindEndProlog(); - // TODO We may need EBP restore sequence here if we introduce PSPSym - #ifdef UNIX_X86_ABI // Add a padding for 16-byte alignment inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE); @@ -11306,35 +11235,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() #endif // TARGET_X86 -void CodeGen::genSetPSPSym(regNumber initReg, bool* pInitRegZeroed) -{ - assert(compiler->compGeneratingProlog); - - if (compiler->lvaPSPSym == BAD_VAR_NUM) - { - return; - } - - noway_assert(isFramePointerUsed()); // We need an explicit frame pointer - -#if defined(TARGET_AMD64) - - // The PSP sym value is Initial-SP, not Caller-SP! - // We assume that RSP is Initial-SP when this function is called. That is, the stack frame - // has been established. - // - // We generate: - // mov [rbp-20h], rsp // store the Initial-SP (our current rsp) in the PSPsym - - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SPBASE, compiler->lvaPSPSym, 0); - -#else // TARGET* - - NYI("Set function PSP sym"); - -#endif // TARGET* -} - //----------------------------------------------------------------------------- // genZeroInitFrameUsingBlockInit: architecture-specific helper for genZeroInitFrame in the case // `genUseBlockInit` is set. diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ea7f52fb609ced..023787ef68f705 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2813,21 +2813,6 @@ class Compiler bool ehCallFinallyInCorrectRegion(BasicBlock* blockCallFinally, unsigned finallyIndex); #endif // DEBUG - // Do we need a PSPSym in the main function? For codegen purposes, we only need one - // if there is a filter that protects a region with a nested EH clause (such as a - // try/catch nested in the 'try' body of a try/filter/filter-handler). See - // genFuncletProlog() for more details. However, the VM seems to use it for more - // purposes, maybe including debugging. Until we are sure otherwise, always create - // a PSPSym for functions with any EH. - bool ehNeedsPSPSym() const - { -#ifdef TARGET_X86 - return false; -#else // TARGET_X86 - return compHndBBtabCount > 0; -#endif // TARGET_X86 - } - bool ehAnyFunclets(); // Are there any funclets in this function? unsigned ehFuncletCount(); // Return the count of funclets in the function @@ -4344,8 +4329,6 @@ class Compiler unsigned lvaStubArgumentVar = BAD_VAR_NUM; // variable representing the secret stub argument - unsigned lvaPSPSym = BAD_VAR_NUM; // variable representing the PSPSym - InlineInfo* impInlineInfo; // Only present for inlinees InlineStrategy* m_inlineStrategy; diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index a1c563f0ce0ea6..fd341fde235c0a 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4272,8 +4272,6 @@ bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum) result = result || (varNum == lvaOutgoingArgSpaceVar); #endif - result = result || (varNum == lvaPSPSym); - return result; } diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 539a8ca7c7c4b6..701921649350c5 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -3093,18 +3093,6 @@ PhaseStatus Compiler::fgCreateFunclets() assert(UsesFunclets()); assert(!fgFuncletsCreated); - // Allocate the PSPSym, if needed. PSPSym is not used by the NativeAOT ABI - if (!IsTargetAbi(CORINFO_NATIVEAOT_ABI)) - { - if (ehNeedsPSPSym()) - { - lvaPSPSym = lvaGrabTempWithImplicitUse(false DEBUGARG("PSPSym")); - LclVarDsc* lclPSPSym = lvaGetDesc(lvaPSPSym); - lclPSPSym->lvType = TYP_I_IMPL; - lvaSetVarDoNotEnregister(lvaPSPSym DEBUGARG(DoNotEnregisterReason::VMNeedsStackAddr)); - } - } - fgCreateFuncletPrologBlocks(); unsigned XTnum; diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 688f61f60923ed..f8153ef42b15ca 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3957,16 +3957,6 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz gcInfoEncoderWithLog->SetPrologSize(prologSize); } - if (compiler->lvaPSPSym != BAD_VAR_NUM) - { -#ifdef TARGET_AMD64 - // The PSPSym is relative to InitialSP on X64 and CallerSP on other platforms. - gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetInitialSPRelativeOffset(compiler->lvaPSPSym)); -#else // !TARGET_AMD64 - gcInfoEncoderWithLog->SetPSPSymStackSlot(compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)); -#endif // !TARGET_AMD64 - } - #ifdef TARGET_AMD64 if (compiler->ehAnyFunclets()) { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 5bf5fb170af2f1..2f94fdbe23ddbd 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -11749,10 +11749,6 @@ void Compiler::gtGetLclVarNameInfo(unsigned lclNum, const char** ilKindOut, cons ilName = "LocAllocSP"; } #endif // JIT32_GCENCODER - else if (lclNum == lvaPSPSym) - { - ilName = "PSPSym"; - } else { ilKind = "tmp"; diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 29d5e07e7d3d47..889147dc17c816 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -3069,13 +3069,6 @@ void Compiler::lvaSortByRefCount() lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::NoRegVars)); } - // No benefit in tracking the PSPSym (if any) - // - if (lclNum == lvaPSPSym) - { - varDsc->lvTracked = 0; - } - // Are we not optimizing and we have exception handlers? // if so mark all args and locals "do not enregister". // @@ -4149,9 +4142,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * | | * ~ localloc ~ // not in frames with EH * | | - * |-----------------------| - * | PSPSym | // only in frames with EH (thus no localloc) - * | | * |-----------------------| <---- RBP in localloc frames (max 240 bytes from Initial-SP) * | Arguments for the | * ~ next function ~ @@ -4218,8 +4208,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * |-----------------------| <---- Virtual '0' * |Callee saved registers | * |-----------------------| - * | PSPSym | // Only for frames with EH, which means FP-based frames - * |-----------------------| * ~ possible double align ~ * |-----------------------| * | security object | @@ -4309,8 +4297,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * |Callee saved registers | * | except fp/lr | * |-----------------------| - * | PSPSym | // Only for frames with EH, which requires FP-based frames - * |-----------------------| * | security object | * |-----------------------| * | ParamTypeArg | @@ -4357,8 +4343,6 @@ unsigned Compiler::lvaGetMaxSpillTempSize() * |-----------------------| * |Callee saved registers | * |-----------------------| - * | PSPSym | // Only for frames with EH, which requires FP-based frames - * |-----------------------| * | security object | * |-----------------------| * | ParamTypeArg | @@ -4502,27 +4486,6 @@ void Compiler::lvaFixVirtualFrameOffsets() { LclVarDsc* varDsc; -#if defined(TARGET_AMD64) - if (lvaPSPSym != BAD_VAR_NUM) - { - // We need to fix the offset of the PSPSym so there is no padding between it and the outgoing argument space. - // Without this code, lvaAlignFrame might have put the padding lower than the PSPSym, which would be between - // the PSPSym and the outgoing argument space. - varDsc = lvaGetDesc(lvaPSPSym); - assert(varDsc->lvFramePointerBased); // We always access it RBP-relative. - assert(!varDsc->lvMustInit); // It is never "must init". - varDsc->SetStackOffset(codeGen->genCallerSPtoInitialSPdelta() + lvaLclStackHomeSize(lvaOutgoingArgSpaceVar)); - - if (opts.IsOSR()) - { - // With OSR RBP points at the base of the OSR frame, but the virtual offsets - // are from the base of the Tier0 frame. Adjust. - // - varDsc->SetStackOffset(varDsc->GetStackOffset() - info.compPatchpointInfo->TotalFrameSize()); - } - } -#endif - // The delta to be added to virtual offset to adjust it relative to frame pointer or SP int delta = 0; int frameLocalsDelta = 0; @@ -4586,22 +4549,8 @@ void Compiler::lvaFixVirtualFrameOffsets() { int offset = lvaTable[lvaMonAcquired].GetStackOffset() + delta; lvaTable[lvaMonAcquired].SetStackOffset(offset); - - if (lvaPSPSym != BAD_VAR_NUM) - { - int offset = lvaTable[lvaPSPSym].GetStackOffset() + delta; - lvaTable[lvaPSPSym].SetStackOffset(offset); - delta += TARGET_POINTER_SIZE; - } - delta += lvaLclStackHomeSize(lvaMonAcquired); } - else if (lvaPSPSym != BAD_VAR_NUM) - { - int offset = lvaTable[lvaPSPSym].GetStackOffset() + delta; - lvaTable[lvaPSPSym].SetStackOffset(offset); - delta += TARGET_POINTER_SIZE; - } JITDUMP("--- delta bump %d for FP frame\n", delta); } @@ -5156,17 +5105,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } } -#if defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if (lvaPSPSym != BAD_VAR_NUM) - { - // On ARM/ARM64, if we need a PSPSym we allocate it early since funclets - // will need to have it at the same caller-SP relative offset so anything - // allocated before this will also leak into the funclet's frame. - noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); - } -#endif // TARGET_ARMARCH || TARGET_LOONGARCH64 || TARGET_RISCV64 - if (mustDoubleAlign) { if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT) @@ -5482,7 +5420,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() // These need to be located as the very first variables (highest memory address) // and so they have already been assigned an offset - if (lclNum == lvaPSPSym || + if ( #if defined(FEATURE_EH_WINDOWS_X86) lclNum == lvaShadowSPslotsVar || #endif // FEATURE_EH_WINDOWS_X86 @@ -5707,17 +5645,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals() } } -#if defined(TARGET_AMD64) - if (lvaPSPSym != BAD_VAR_NUM) - { - // On AMD64, if we need a PSPSym, allocate it last, immediately above the outgoing argument - // space. Any padding will be higher on the stack than this - // (including the padding added by lvaAlignFrame()). - noway_assert(codeGen->isFramePointerUsed()); // We need an explicit frame pointer - stkOffs = lvaAllocLocalAndSetVirtualOffset(lvaPSPSym, TARGET_POINTER_SIZE, stkOffs); - } -#endif // TARGET_AMD64 - #if FEATURE_FIXED_OUT_ARGS if (lvaOutgoingArgSpaceSize > 0) { diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index eaaf6fc4ccbc9d..9e180de420fabc 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -347,11 +347,11 @@ // Where is the exception object on entry to the handler block? #ifdef UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_ESI - #define RBM_EXCEPTION_OBJECT RBM_ESI + #define REG_EXCEPTION_OBJECT REG_EDI + #define RBM_EXCEPTION_OBJECT RBM_EDI #else // !UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_EDX - #define RBM_EXCEPTION_OBJECT RBM_EDX + #define REG_EXCEPTION_OBJECT REG_ECX + #define RBM_EXCEPTION_OBJECT RBM_ECX #endif // !UNIX_AMD64_ABI #define REG_JUMP_THUNK_PARAM REG_EAX From 01986aaedcab34a1d4131ee4dd4a101d1d4429ba Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 16:55:00 +0200 Subject: [PATCH 03/52] Update documentation --- docs/design/coreclr/botr/clr-abi.md | 34 ++++------------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 62e75ab0f7a125..0245fa93973bbd 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -322,33 +322,11 @@ Finally1: Note that JIT64 does not implement this properly. The C# compiler used to always insert all necessary "step" blocks. The Roslyn C# compiler at one point did not, but then was changed to once again insert them. -## The PSPSym and funclet parameters +## Funclet parameters -The *PSPSym* (which stands for Previous Stack Pointer Symbol) is a pointer-sized local variable used to access locals from the main function body. +For filter funclets the VM sets the frame register to be the same as the parent function. For second pass funclets the VM restores all non-volatile registers. The same convention is used across all platforms. -NativeAOT does not use PSPSym. For filter funclets the VM sets the frame register to be the same as the parent function. For second pass funclets the VM restores all non-volatile registers. The same convention is used across all platforms. - -CoreCLR uses PSPSym for all platforms except x86: the frame pointer on x86 is always preserved when the handlers are invoked. - -First, two definitions. - -*Caller-SP* is the value of the stack pointer in a function's caller before the call instruction is executed. That is, when function A calls function B, Caller-SP for B is the value of the stack pointer immediately before the call instruction in A (calling B) was executed. Note that this definition holds for both AMD64, which pushes the return value when a call instruction is executed, and for ARM, which doesn't. For AMD64, Caller-SP is the address above the call return address. - -*Initial-SP* is the initial value of the stack pointer after the fixed-size portion of the frame has been allocated. That is, before any "alloca"-type allocations. - -The value stored in PSPSym is the value of Initial-SP for AMD64 or Caller-SP for other platforms, for the main function. The stack offset of the PSPSym is reported to the VM in the GC information header. The value reported in the GC information is the offset of the PSPSym from Initial-SP for AMD64 or Caller-SP for other platforms. (Note that both the value stored, and the way the value is reported to the VM, differs between architectures. In particular, note that most things in the GC information header are reported as offsets relative to Caller-SP, but PSPSym on AMD64 is one exception, and maybe the only exception.) - -The VM uses the PSPSym to find other locals it cares about (such as the generics context in a funclet frame). The JIT uses it to re-establish the frame pointer register, so that the frame pointer is the same value in a funclet as it is in the main function body. - -When a funclet is called, it is passed the *Establisher Frame Pointer*. For AMD64 this is true for all funclets and it is passed as the first argument in RCX, but for ARM and ARM64 this is only true for first pass funclets (currently just filters) and it is passed as the second argument in R1. The Establisher Frame Pointer is a stack pointer of an interesting "parent" frame in the exception processing system. For the CLR, it points either to the main function frame or a dynamically enclosing funclet frame from the same function, for the funclet being invoked. The value of the Establisher Frame Pointer is Initial-SP on AMD64, Caller-SP on x86, ARM, and ARM64. - -Using the establisher frame, the funclet wants to load the value of the PSPSym. Since we don't know if the Establisher Frame is from the main function or a funclet, we design the main function and funclet frame layouts to place the PSPSym at an identical, small, constant offset from the Establisher Frame in each case. (This is also required because we only report a single offset to the PSPSym in the GC information, and that offset must be valid for the main function and all of its funclets). Then, the funclet uses this known offset to compute the PSPSym address and read its value. From this, it can compute the value of the frame pointer (which is a constant offset from the PSPSym value) and set the frame register to be the same as the parent function. Also, the funclet writes the value of the PSPSym to its own frame's PSPSym. This "copying" of the PSPSym happens for every funclet invocation, in particular, for every nested funclet invocation. - -On ARM and ARM64, for all second pass funclets (finally, fault, catch, and filter-handler) the VM restores all non-volatile registers to their values within the parent frame. This includes the frame register (`R11`). Thus, the PSPSym is not used to recompute the frame pointer register in this case, though the PSPSym is copied to the funclet's frame, as for all funclets. - -Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument (`REG_EXCEPTION_OBJECT`). On AMD64 it is the second argument and thus passed in RDX. On ARM and ARM64 this is the first argument and passed in R0. - -(Note that the JIT64 source code contains a comment that says, "The current CLR doesn't always pass the correct establisher frame to the funclet. Funclet may receive establisher frame of funclet when expecting that of original routine." It indicates this is the reason that a PSPSym is required in all funclets as well as the main function, whereas if the establisher frame was correctly reported, the PSPSym could be omitted in some cases.) +Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument (`REG_EXCEPTION_OBJECT`). On AMD64 it is passed in RCX (Windows ABI) or RSI (Unix ABI). On ARM and ARM64 this is the first argument and passed in R0. ## Funclet Return Values @@ -374,11 +352,7 @@ Some definitions: When an exception occurs, the VM is invoked to do some processing. If the exception is within a "try" region, it eventually calls a corresponding handler (which also includes calling filters). The exception location within a function might be where a "throw" instruction executes, the point of a processor exception like null pointer dereference or divide by zero, or the point of a call where the callee threw an exception but did not catch it. -On AMD64, all register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters. - -On ARM and ARM64, all registers are restored to their values at the exception point. - -On x86: TBD. +All non-volative registers are restored to their values at the exception point. ### Registers on return from a funclet From b1987e22b2f87951be41b1723d4c490c61eff1a3 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 17:41:22 +0200 Subject: [PATCH 04/52] Fix build --- src/coreclr/vm/eetwain.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 243906f65d45c7..bebcfe5aab80fe 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -371,6 +371,8 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, return E_FAIL; } + TADDR callerSP = oldStackBase + oldFixedStackSize; + #else PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform."); #endif @@ -744,6 +746,9 @@ HRESULT EECodeManager::FixContextForEnC(PCONTEXT pCtx, memset((void*)(size_t)(pCtx->Esp), 0, newInfo->stackSize - frameHeaderSize ); #elif defined(TARGET_AMD64) || defined(TARGET_ARM64) memset((void*)newStackBase, 0, newFixedStackSize - frameHeaderSize); +#else // !X86, !X64, !ARM64 + PORTABILITY_ASSERT("Edit-and-continue not enabled on this platform."); +#endif // 4) Put the variables from step 3 into their new locations. From d86572f37c8d281eb224dd6f3b2c3b23ea938a06 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 18:11:14 +0200 Subject: [PATCH 05/52] Fix ARM64 build --- src/coreclr/vm/eetwain.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index bebcfe5aab80fe..6b9fd566a55826 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -2036,10 +2036,10 @@ static inline TADDR GetFrameRestoreBase(PCONTEXT pContextRecord) return pContextRecord->Rbp; #elif defined(TARGET_X86) return pContextRecord->Ebp; -#elif defined(TARGET_ARM64) - return pContextRecord->X29; #elif defined(TARGET_ARM) return pContextRecord->R11; +#elif defined(TARGET_ARM64) + return pContextRecord->Fp; #elif defined(TARGET_LOONGARCH64) return pContextRecord->Fp; #elif defined(TARGET_RISCV64) From d248211578c276687b3a0ff520627eb631fcc325 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 19:03:16 +0200 Subject: [PATCH 06/52] Fix RISC-V and LA64 build --- src/coreclr/vm/loongarch64/asmhelpers.S | 2 +- src/coreclr/vm/riscv64/asmhelpers.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 78c44dffa84a22..cefd0c705c509a 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -886,7 +886,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Save the SP of this function st.d $fp, $a3, 0 // Restore frame pointer - mov $fp, $a1 + move $fp, $a1 // Invoke the filter funclet jirl $ra, $a2, 0 diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 2c049b7a44e2b1..b434f271d704d9 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -743,7 +743,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Save the SP of this function sd fp, 0(a3) // Restore frame pointer - mov fp, a1 + mv fp, a1 // Invoke the filter funclet jalr a2 From ed12de52b74068a085eb38b69d18dfa8965eb91e Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 19:22:26 +0200 Subject: [PATCH 07/52] Remove unused fiFunction_CallerSP_to_FP_delta --- src/coreclr/jit/codegen.h | 4 ---- src/coreclr/jit/codegenarm64.cpp | 2 -- src/coreclr/jit/codegenloongarch64.cpp | 3 --- src/coreclr/jit/codegenriscv64.cpp | 3 --- 4 files changed, 12 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index a2ac56cb63e795..a958964b0b6614 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -474,8 +474,6 @@ class CodeGen final : public CodeGenInterface struct FuncletFrameInfoDsc { regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR) - int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function - // (negative) int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive) int fiSP_to_CalleeSave_delta; // First callee-saved register slot offset from SP (positive) int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. @@ -506,8 +504,6 @@ class CodeGen final : public CodeGenInterface struct FuncletFrameInfoDsc { regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) - int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function - // (negative) int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) int fiSpDelta; // Stack pointer delta (negative) }; diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index d9afebc629aa05..088bac0ed997e5 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1654,8 +1654,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // The frame size and offsets must be finalized assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); - regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; assert((rsMaskSaveRegs & RBM_LR) != 0); assert((rsMaskSaveRegs & RBM_FP) != 0); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 3db4d9e8a5ebc2..041e7149f2fee9 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -546,8 +546,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_FP) != 0); /* Now save it for future use */ - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); - int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; genFuncletInfo.fiSP_to_CalleeSaved_delta = funcletFrameSize; @@ -574,7 +572,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 3d6cb567b43fa8..85968daaca756f 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -534,8 +534,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() assert((rsMaskSaveRegs & RBM_FP) != 0); /* Now save it for future use */ - genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta(); - int funcletFrameSize = compiler->lvaOutgoingArgSpaceSize; genFuncletInfo.fiSP_to_CalleeSaved_delta = funcletFrameSize; @@ -562,7 +560,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() printf(" Save regs: "); dspRegMask(genFuncletInfo.fiSaveRegs); printf("\n"); - printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_CallerSP_to_FP_delta); printf(" SP to CalleeSaved location delta: %d\n", genFuncletInfo.fiSP_to_CalleeSaved_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } From 24f60d73318548fc9d7ae53c47eef0f88fc5dd71 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 19:23:44 +0200 Subject: [PATCH 08/52] Remove unused fiFunction_InitialSP_to_FP_delta --- src/coreclr/jit/codegen.h | 1 - src/coreclr/jit/codegenxarch.cpp | 7 ------- 2 files changed, 8 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index a958964b0b6614..b3b64a741ceda6 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -490,7 +490,6 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - unsigned fiFunction_InitialSP_to_FP_delta; // Delta between Initial-SP and the frame pointer unsigned fiSpDelta; // Stack pointer delta }; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9d8a46890f7c2e..63130adcd68f5f 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -11096,12 +11096,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // finalized assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized - // Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize, - // that's ok, because we're figuring out an offset in the parent frame. - genFuncletInfo.fiFunction_InitialSP_to_FP_delta = - compiler->lvaToInitialSPRelativeOffset(0, true); // trick to find the Initial-SP-relative offset of the frame - // pointer. - assert(compiler->lvaOutgoingArgSpaceSize % REGSIZE_BYTES == 0); #ifndef UNIX_AMD64_ABI // No 4 slots for outgoing params on the stack for System V systems. @@ -11143,7 +11137,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() { printf("\n"); printf("Funclet prolog / epilog info\n"); - printf(" Function InitialSP-to-FP delta: %d\n", genFuncletInfo.fiFunction_InitialSP_to_FP_delta); printf(" SP delta: %d\n", genFuncletInfo.fiSpDelta); } #endif // DEBUG From a06a6a6b1e196a1f83fa612adfcf491bd30f0d87 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 19:43:23 +0200 Subject: [PATCH 09/52] Apply JIT diff --- src/coreclr/jit/codegen.h | 18 +++++++++--------- src/coreclr/jit/codegenarm.cpp | 4 ++-- src/coreclr/jit/codegenarm64.cpp | 16 +++++++--------- src/coreclr/jit/codegenloongarch64.cpp | 4 ++-- src/coreclr/jit/codegenriscv64.cpp | 4 ++-- 5 files changed, 22 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index b3b64a741ceda6..0732838865298f 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -473,12 +473,12 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR) - int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive) - int fiSP_to_CalleeSave_delta; // First callee-saved register slot offset from SP (positive) - int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. - int fiSpDelta1; // Stack pointer delta 1 (negative) - int fiSpDelta2; // Stack pointer delta 2 (negative) + regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes LR) + int fiSP_to_FPLR_save_delta; // FP/LR register save offset from SP (positive) + int fiSP_to_CalleeSave_delta; // First callee-saved register slot offset from SP (positive) + int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details. + int fiSpDelta1; // Stack pointer delta 1 (negative) + int fiSpDelta2; // Stack pointer delta 2 (negative) }; FuncletFrameInfoDsc genFuncletInfo; @@ -490,7 +490,7 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - unsigned fiSpDelta; // Stack pointer delta + unsigned fiSpDelta; // Stack pointer delta }; FuncletFrameInfoDsc genFuncletInfo; @@ -503,8 +503,8 @@ class CodeGen final : public CodeGenInterface struct FuncletFrameInfoDsc { regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA) - int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) - int fiSpDelta; // Stack pointer delta (negative) + int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive) + int fiSpDelta; // Stack pointer delta (negative) }; FuncletFrameInfoDsc genFuncletInfo; diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 4491b260e005e8..e5bdabf9ab0df2 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2435,8 +2435,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() /* Now save it for future use */ - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSpDelta = spDelta; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSpDelta = spDelta; #ifdef DEBUG if (verbose) diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 088bac0ed997e5..feaae354587f09 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -1682,8 +1682,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // If do two SP adjustments, each one must be aligned. This represents the largest possible stack size, if two // separate alignment slots are required. - unsigned const twoSpAdjustmentFuncletFrameSizeAligned = - saveRegsSizeAligned + outgoingArgSpaceAligned; + unsigned const twoSpAdjustmentFuncletFrameSizeAligned = saveRegsSizeAligned + outgoingArgSpaceAligned; assert((twoSpAdjustmentFuncletFrameSizeAligned % STACK_ALIGN) == 0); int SP_to_FPLR_save_delta; @@ -1695,8 +1694,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() if ((twoSpAdjustmentFuncletFrameSizeAligned <= 512) && !useFrameType5) { - unsigned const oneSpAdjustmentFuncletFrameSize = - saveRegsSize + compiler->lvaOutgoingArgSpaceSize; + unsigned const oneSpAdjustmentFuncletFrameSize = saveRegsSize + compiler->lvaOutgoingArgSpaceSize; unsigned const oneSpAdjustmentFuncletFrameSizeAligned = roundUp(oneSpAdjustmentFuncletFrameSize, STACK_ALIGN); assert(oneSpAdjustmentFuncletFrameSizeAligned <= twoSpAdjustmentFuncletFrameSizeAligned); @@ -1751,13 +1749,13 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES; } - SP_to_CalleeSave_delta = outgoingArgSpaceAligned + saveRegsAlignmentPad; + SP_to_CalleeSave_delta = outgoingArgSpaceAligned + saveRegsAlignmentPad; genFuncletInfo.fiFrameType = 5; } else { - SP_to_FPLR_save_delta = outgoingArgSpaceAligned; + SP_to_FPLR_save_delta = outgoingArgSpaceAligned; SP_to_CalleeSave_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsAlignmentPad; genFuncletInfo.fiFrameType = 3; @@ -1771,9 +1769,9 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() /* Now save it for future use */ - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; - genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; - genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_CalleeSave_delta; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSP_to_FPLR_save_delta = SP_to_FPLR_save_delta; + genFuncletInfo.fiSP_to_CalleeSave_delta = SP_to_CalleeSave_delta; #ifdef DEBUG if (verbose) diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 041e7149f2fee9..44444a3d58d52e 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -561,8 +561,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); - genFuncletInfo.fiSpDelta = -funcletFrameSize; - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSpDelta = -funcletFrameSize; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; #ifdef DEBUG if (verbose) diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 85968daaca756f..9c4dd558f0a434 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -549,8 +549,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() funcletFrameSize = funcletFrameSize - delta_PSP; funcletFrameSize = roundUp((unsigned)funcletFrameSize, STACK_ALIGN); - genFuncletInfo.fiSpDelta = -funcletFrameSize; - genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; + genFuncletInfo.fiSpDelta = -funcletFrameSize; + genFuncletInfo.fiSaveRegs = rsMaskSaveRegs; #ifdef DEBUG if (verbose) From f6ff6e155ff72f81416be30c1eba2ee8118c2b85 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 19:43:42 +0200 Subject: [PATCH 10/52] Fix typo --- docs/design/coreclr/botr/clr-abi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 0245fa93973bbd..2d6beef703db41 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -352,7 +352,7 @@ Some definitions: When an exception occurs, the VM is invoked to do some processing. If the exception is within a "try" region, it eventually calls a corresponding handler (which also includes calling filters). The exception location within a function might be where a "throw" instruction executes, the point of a processor exception like null pointer dereference or divide by zero, or the point of a call where the callee threw an exception but did not catch it. -All non-volative registers are restored to their values at the exception point. +All non-volatile registers are restored to their values at the exception point. ### Registers on return from a funclet From 951b7eb38578b5e94cff930133fbbbe0a241830a Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 20:56:28 +0200 Subject: [PATCH 11/52] Remove unused fiFunctionCallerSPtoFPdelta --- src/coreclr/jit/codegen.h | 1 - src/coreclr/jit/codegenarm.cpp | 2 -- 2 files changed, 3 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 0732838865298f..d3b2f09a8ecd3e 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -460,7 +460,6 @@ class CodeGen final : public CodeGenInterface struct FuncletFrameInfoDsc { regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR) - unsigned fiFunctionCallerSPtoFPdelta; // Delta between caller SP and the frame pointer unsigned fiSpDelta; // Stack pointer delta }; diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index e5bdabf9ab0df2..fe5897422005e3 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2416,7 +2416,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // (also assumed in genFnProlog()). assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0); unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; - genFuncletInfo.fiFunctionCallerSPtoFPdelta = preSpillRegArgSize + 2 * REGSIZE_BYTES; regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); @@ -2443,7 +2442,6 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() { printf("\n"); printf("Funclet prolog / epilog info\n"); - printf(" Function CallerSP-to-FP delta: %d\n", genFuncletInfo.fiFunctionCallerSPtoFPdelta); printf(" Save regs: "); dspRegMask(rsMaskSaveRegs); printf("\n"); From 9c697df08d0d1da49d659c7f5706ef9dea35fc0d Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 14 Apr 2025 22:14:58 +0200 Subject: [PATCH 12/52] sp -> rsp --- src/coreclr/vm/amd64/AsmHelpers.asm | 4 ++-- src/coreclr/vm/amd64/asmhelpers.S | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 01dd64221ea0bf..46fa949a7d3c98 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -590,7 +590,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT movdqa xmm15, [r8 + 272 + 9*10h] ; Save the SP of this function. - mov [r9], sp + mov [r9], rsp ; Invoke the funclet call rdx @@ -614,7 +614,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT FUNCLET_CALL_PROLOGUE 0, 1 ; Save the SP of this function - mov [r9], sp + mov [r9], rsp ; Invoke the filter funclet mov rbp, rdx diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 4c8cc7c8e6866b..30ed0520488c6e 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -417,7 +417,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler mov r15, [rdx + 96] // Save the SP of this function. - mov [r9], sp + mov [r9], rsp // Invoke the funclet call rsi From fdee150d78ccd348c507a562ec926124d7bbd515 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 11:07:27 +0200 Subject: [PATCH 13/52] Save establisher frame in funclet frame, use it in GetExactGenericsToken --- src/coreclr/vm/amd64/AsmHelpers.asm | 6 +++ src/coreclr/vm/amd64/asmhelpers.S | 18 ++++++-- src/coreclr/vm/arm64/asmhelpers.S | 40 +++++++++++------- src/coreclr/vm/arm64/asmhelpers.asm | 38 ++++++++++------- src/coreclr/vm/eetwain.cpp | 56 ++++++++++++++----------- src/coreclr/vm/i386/ehhelpers.S | 5 +++ src/coreclr/vm/i386/ehhelpers.asm | 5 +++ src/coreclr/vm/loongarch64/asmhelpers.S | 39 ++++++++++------- src/coreclr/vm/riscv64/asmhelpers.S | 47 +++++++++++++-------- 9 files changed, 165 insertions(+), 89 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 46fa949a7d3c98..ec58ba95b4a43b 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -563,6 +563,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT ; RDX = PC to invoke ; R8 = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; [RSP+40] = establisher frame address (Initial SP) ; FUNCLET_CALL_PROLOGUE 0, 1 @@ -598,6 +599,10 @@ NESTED_ENTRY CallEHFunclet, _TEXT FUNCLET_CALL_EPILOGUE ret + +PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + dq arguments_scratch_area_size + 8 + 8 * 8h + stack_alloc_size + NESTED_END CallEHFunclet, _TEXT ; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -609,6 +614,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; RDX = RBP of main function ; R8 = PC to invoke ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; [RSP+40] = establisher frame address (Initial SP) ; FUNCLET_CALL_PROLOGUE 0, 1 diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 30ed0520488c6e..15d53d31d1686f 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -404,9 +404,10 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // RSI = PC to invoke // RDX = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // R8 = establisher frame (InitialSP) // - FUNCLET_CALL_PROLOGUE 0, 1 + FUNCLET_CALL_PROLOGUE 1, 0 // Restore RBX, RBP, R12, R13, R14, R15 from CONTEXT mov rbx, [rdx + 0] @@ -416,8 +417,11 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler mov r14, [rdx + 88] mov r15, [rdx + 96] + // Save establisher frame pointer into our stack frame + mov [rsp], r8 + // Save the SP of this function. - mov [r9], rsp + mov [rcx], rsp // Invoke the funclet call rsi @@ -425,6 +429,10 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler FUNCLET_CALL_EPILOGUE ret + +PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + .quad 0 + NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -436,9 +444,13 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // RSI = RBP of main function // RDX = PC to invoke // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // R8 = establisher frame (InitialSP) // - FUNCLET_CALL_PROLOGUE 0, 1 + FUNCLET_CALL_PROLOGUE 1, 0 + + // Save establisher frame pointer into our stack frame + mov [rsp], r8 // Save the SP of this function mov [rcx], rsp diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 97ef790aa8df25..d8e891abbec382 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -362,21 +362,25 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // X1 = PC to invoke // X2 = address of X19 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // X4 = establisher frame (CallerSP) // - // Using below prolog instead of PROLOG_SAVE_REG_PAIR_INDEXED fp,lr, -96 + // Using below prolog instead of PROLOG_SAVE_REG_PAIR_INDEXED fp,lr, -112 // is intentional. Above statement would also emit instruction to save // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_INDEXED x29, lr, -96 + PROLOG_SAVE_REG_PAIR_INDEXED x29, lr, -112 // Spill callee saved registers - PROLOG_SAVE_REG_PAIR x19, x20, 16 - PROLOG_SAVE_REG_PAIR x21, x22, 32 - PROLOG_SAVE_REG_PAIR x23, x24, 48 - PROLOG_SAVE_REG_PAIR x25, x26, 64 - PROLOG_SAVE_REG_PAIR x27, x28, 80 + PROLOG_SAVE_REG_PAIR x19, x20, 32 + PROLOG_SAVE_REG_PAIR x21, x22, 48 + PROLOG_SAVE_REG_PAIR x23, x24, 64 + PROLOG_SAVE_REG_PAIR x25, x26, 80 + PROLOG_SAVE_REG_PAIR x27, x28, 96 + + // Save establisher frame pointer into our stack frame + str x4, [sp, 16] // Save the SP of this function mov x4, sp @@ -393,21 +397,24 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler blr x1 nop - EPILOG_RESTORE_REG_PAIR x19, x20, 16 - EPILOG_RESTORE_REG_PAIR x21, x22, 32 - EPILOG_RESTORE_REG_PAIR x23, x24, 48 - EPILOG_RESTORE_REG_PAIR x25, x26, 64 - EPILOG_RESTORE_REG_PAIR x27, x28, 80 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 96 + EPILOG_RESTORE_REG_PAIR x19, x20, 32 + EPILOG_RESTORE_REG_PAIR x21, x22, 48 + EPILOG_RESTORE_REG_PAIR x23, x24, 64 + EPILOG_RESTORE_REG_PAIR x25, x26, 80 + EPILOG_RESTORE_REG_PAIR x27, x28, 96 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 112 EPILOG_RETURN + PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + .quad 16 ; Offset of establisher frame inside our frame + NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -16 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 // On entry: // @@ -415,7 +422,10 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // X1 = FP of main function // X2 = PC to invoke // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // X4 = establisher frame (CallerSP) // + // Save establisher frame pointer into our stack frame + str x4, [sp, 16] // Save the SP of this function str fp, [x3] // Restore frame pointer @@ -423,7 +433,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet blr x2 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 EPILOG_RETURN NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 80d6e94c74bd4e..3761a0ef1e3cdb 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -701,6 +701,7 @@ COMToCLRDispatchHelper_RegSetup ; X1 = PC to invoke ; X2 = address of X19 register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; X4 = establisher frame (CallerSP) ; ; Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-16! @@ -708,14 +709,17 @@ COMToCLRDispatchHelper_RegSetup ; sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body ; of method. However, this method needs to be able to change fp before calling funclet. ; This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-96! + PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-112! ; Spill callee saved registers - PROLOG_SAVE_REG_PAIR x19, x20, 16 - PROLOG_SAVE_REG_PAIR x21, x22, 32 - PROLOG_SAVE_REG_PAIR x23, x24, 48 - PROLOG_SAVE_REG_PAIR x25, x26, 64 - PROLOG_SAVE_REG_PAIR x27, x28, 80 + PROLOG_SAVE_REG_PAIR x19, x20, 32 + PROLOG_SAVE_REG_PAIR x21, x22, 48 + PROLOG_SAVE_REG_PAIR x23, x24, 64 + PROLOG_SAVE_REG_PAIR x25, x26, 80 + PROLOG_SAVE_REG_PAIR x27, x28, 96 + + ; Save establisher frame pointer into our stack frame + str x4, [sp, 16] ; Save the SP of this function. We cannot store SP directly. mov fp, sp @@ -732,21 +736,24 @@ COMToCLRDispatchHelper_RegSetup blr x1 nop - EPILOG_RESTORE_REG_PAIR x19, x20, 16 - EPILOG_RESTORE_REG_PAIR x21, x22, 32 - EPILOG_RESTORE_REG_PAIR x23, x24, 48 - EPILOG_RESTORE_REG_PAIR x25, x26, 64 - EPILOG_RESTORE_REG_PAIR x27, x28, 80 - EPILOG_RESTORE_REG_PAIR fp, lr, #96! + EPILOG_RESTORE_REG_PAIR x19, x20, 32 + EPILOG_RESTORE_REG_PAIR x21, x22, 48 + EPILOG_RESTORE_REG_PAIR x23, x24, 64 + EPILOG_RESTORE_REG_PAIR x25, x26, 80 + EPILOG_RESTORE_REG_PAIR x27, x28, 96 + EPILOG_RESTORE_REG_PAIR fp, lr, #112! EPILOG_RETURN + PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + DCQ 16 ; Offset of establisher frame inside our frame + NESTED_END CallEHFunclet ; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the ; frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet - PROLOG_SAVE_REG_PAIR fp, lr, #-16! + PROLOG_SAVE_REG_PAIR fp, lr, #-32! ; On entry: ; @@ -754,7 +761,10 @@ COMToCLRDispatchHelper_RegSetup ; X1 = FP of the main function ; X2 = PC to invoke ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; X4 = establisher frame (CallerSP) ; + ; Save establisher frame pointer into our stack frame + str x4, [sp, 16] ; Save the SP of this function str fp, [x3] ; Restore frame pointer @@ -762,7 +772,7 @@ COMToCLRDispatchHelper_RegSetup ; Invoke the filter funclet blr x2 - EPILOG_RESTORE_REG_PAIR fp, lr, #16! + EPILOG_RESTORE_REG_PAIR fp, lr, #32! EPILOG_RETURN NESTED_END CallEHFilterFunclet diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 6b9fd566a55826..9e5d0058653a89 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1663,6 +1663,8 @@ PTR_VOID EECodeManager::GetExactGenericsToken(PREGDISPLAY pContext, return EECodeManager::GetExactGenericsToken(GetCallerSp(pContext), pCodeInfo); } +EXTERN_C SIZE_T g_OffsetOfEstablisherFrameInFuncletSP; + //static PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, EECodeInfo * pCodeInfo) @@ -1679,6 +1681,27 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, INT32 spOffsetGenericsContext = gcInfoDecoder.GetGenericsInstContextStackSlot(); if (spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT) { + if (pCodeInfo->IsFunclet()) + { +#if DACCESS_COMPILE + // TODO: ? + return NULL; +#else + // Recover the establisher frame (InitialSP/CallerSP) from the funclet + // caller. + baseStackSlot += g_OffsetOfEstablisherFrameInFuncletSP; + baseStackSlot = *(SIZE_T*)baseStackSlot; +#ifdef TARGET_AMD64 + // On AMD64 the PSPSym stores the "Initial SP": the stack pointer at the end of + // prolog, before any dynamic allocations. + // However, the GenericsContext offset is relative to the caller SP for all + // platforms. So here we adjust to convert AMD64's initial sp to a caller SP. + // But we have to be careful to use the main function's EECodeInfo, not the + // funclet's EECodeInfo because they have different stack sizes! + baseStackSlot += pCodeInfo->GetMainFunctionInfo().GetFixedStackSize(); +#endif // TARGET_AMD64 +#endif // DACCESS_COMPILE + } TADDR taSlot = (TADDR)( spOffsetGenericsContext + baseStackSlot ); TADDR taExactGenericsToken = *PTR_TADDR(taSlot); return PTR_VOID(taExactGenericsToken); @@ -1994,10 +2017,10 @@ void EECodeManager::LeaveCatch(GCInfoToken gcInfoToken, #ifndef TARGET_WASM // This is an assembly helper that enables us to call into EH funclets. -EXTERN_C DWORD_PTR STDCALL CallEHFunclet(Object *pThrowable, UINT_PTR pFuncletToInvoke, UINT_PTR *pFirstNonVolReg, UINT_PTR *pFuncletCallerSP); +EXTERN_C DWORD_PTR STDCALL CallEHFunclet(Object *pThrowable, UINT_PTR pFuncletToInvoke, UINT_PTR *pFirstNonVolReg, UINT_PTR *pFuncletCallerSP, UINT_PTR establisherFrame); // This is an assembly helper that enables us to call into EH filter funclets. -EXTERN_C DWORD_PTR STDCALL CallEHFilterFunclet(Object *pThrowable, TADDR CallerSP, UINT_PTR pFuncletToInvoke, UINT_PTR *pFuncletCallerSP); +EXTERN_C DWORD_PTR STDCALL CallEHFilterFunclet(Object *pThrowable, TADDR FP, UINT_PTR pFuncletToInvoke, UINT_PTR *pFuncletCallerSP, UINT_PTR establisherFrame); typedef DWORD_PTR (HandlerFn)(UINT_PTR uStackFrame, Object* pExceptionObj); @@ -2030,26 +2053,6 @@ static inline UINT_PTR *GetFirstNonVolatileRegisterAddress(PCONTEXT pContextReco #endif } -static inline TADDR GetFrameRestoreBase(PCONTEXT pContextRecord) -{ -#if defined(TARGET_AMD64) - return pContextRecord->Rbp; -#elif defined(TARGET_X86) - return pContextRecord->Ebp; -#elif defined(TARGET_ARM) - return pContextRecord->R11; -#elif defined(TARGET_ARM64) - return pContextRecord->Fp; -#elif defined(TARGET_LOONGARCH64) - return pContextRecord->Fp; -#elif defined(TARGET_RISCV64) - return pContextRecord->Fp; -#else - PORTABILITY_ASSERT("GetFrameRestoreBase"); - return NULL; -#endif -} - typedef DWORD_PTR (HandlerFn)(UINT_PTR uStackFrame, Object* pExceptionObj); static UINT_PTR GetEstablisherFrame(REGDISPLAY* pvRegDisplay, ExInfo* exInfo) { @@ -2095,6 +2098,7 @@ DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDIS // Since the actual caller of the funclet is the assembly helper, pass the reference // to the CallerStackFrame instance so that it can be updated. UINT_PTR *pFuncletCallerSP = &(pExInfo->m_csfEHClause.SP); + UINT_PTR establisherFrame = GetEstablisherFrame(pRD, pExInfo); if (isFilterFunclet) { @@ -2102,16 +2106,18 @@ DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDIS // it will retrieve the framepointer for accessing the locals in the parent // method. dwResult = CallEHFilterFunclet(OBJECTREFToObject(throwable), - GetFrameRestoreBase(pRD->pCurrentContext), + GetFP(pRD->pCurrentContext), CastHandlerFn(pfnHandler), - pFuncletCallerSP); + pFuncletCallerSP, + establisherFrame); } else { dwResult = CallEHFunclet(OBJECTREFToObject(throwable), CastHandlerFn(pfnHandler), GetFirstNonVolatileRegisterAddress(pRD->pCurrentContext), - pFuncletCallerSP); + pFuncletCallerSP, + establisherFrame); } #endif // TARGET_WASM diff --git a/src/coreclr/vm/i386/ehhelpers.S b/src/coreclr/vm/i386/ehhelpers.S index 3b65e1b384d916..da5ad73a62747d 100644 --- a/src/coreclr/vm/i386/ehhelpers.S +++ b/src/coreclr/vm/i386/ehhelpers.S @@ -25,6 +25,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // [ebp+12] = PC to invoke // [ebp+16] = address of EDI register in CONTEXT record // used to restore the non-volatile registers of CrawlFrame // [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // [ebp+24] = establisher frame (CallerSP) // // Save the SP of this function @@ -51,6 +52,9 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler ESP_EPILOG_POP ebp ESP_EPILOG_END +PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + .word 32 + 16 // our frame size + 5th argument + ret 16 NESTED_END CallEHFunclet, _TEXT @@ -75,6 +79,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // [ebp+12] = FP to restore // [ebp+16] = PC to invoke // [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // [ebp+24] = establisher frame (CallerSP) // // Save the SP of this function diff --git a/src/coreclr/vm/i386/ehhelpers.asm b/src/coreclr/vm/i386/ehhelpers.asm index ddae1ff34a7577..234958a03edba5 100644 --- a/src/coreclr/vm/i386/ehhelpers.asm +++ b/src/coreclr/vm/i386/ehhelpers.asm @@ -29,6 +29,7 @@ _CallEHFunclet@16 proc public ; [ebp+12] = PC to invoke ; [ebp+16] = address of EDI register in CONTEXT record ; used to restore the non-volatile registers of CrawlFrame ; [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; [ebp+24] = establisher frame (CallerSP) ; ; Save the SP of this function @@ -54,6 +55,9 @@ _CallEHFunclet@16 proc public ret 16 +PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + dd 36 ; 4 saved regs + return address + 5th parameter + _CallEHFunclet@16 endp ; DWORD_PTR STDCALL CallEHFilterFunclet(Object *pThrowable, TADDR CallerSP, UINT_PTR pFuncletToInvoke, UINT_PTR *pFuncletCallerSP); @@ -73,6 +77,7 @@ _CallEHFilterFunclet@16 proc public ; [ebp+12] = FP to restore ; [ebp+16] = PC to invoke ; [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + ; [ebp+24] = establisher frame (CallerSP) ; ; Save the SP of this function diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index cefd0c705c509a..38b95f9be562b7 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -830,18 +830,22 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // $a1 = PC to invoke // $a2 = address of s0 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // $a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // $a4 = establisher frame (CallerSP) // // $fp,$ra - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 96, 0 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 112, 0 // Spill callee saved registers // $s0,$s1 - PROLOG_SAVE_REG_PAIR 23, 24, 16 - PROLOG_SAVE_REG_PAIR 25, 26, 32 - PROLOG_SAVE_REG_PAIR 27, 28, 48 - PROLOG_SAVE_REG_PAIR 29, 30, 64 - PROLOG_SAVE_REG 31, 80 + PROLOG_SAVE_REG_PAIR 23, 24, 32 + PROLOG_SAVE_REG_PAIR 25, 26, 48 + PROLOG_SAVE_REG_PAIR 27, 28, 64 + PROLOG_SAVE_REG_PAIR 29, 30, 80 + PROLOG_SAVE_REG 31, 96 + + // Save establisher frame pointer into our stack frame + st.d $a4, $sp, 16 // Save the SP of this function st.d $sp, $a3, 0 @@ -860,21 +864,25 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // Invoke the funclet jirl $ra, $a1, 0 - EPILOG_RESTORE_REG_PAIR 23, 24, 16 - EPILOG_RESTORE_REG_PAIR 25, 26, 32 - EPILOG_RESTORE_REG_PAIR 27, 28, 48 - EPILOG_RESTORE_REG_PAIR 29, 30, 64 - EPILOG_RESTORE_REG 31, 80 + EPILOG_RESTORE_REG_PAIR 23, 24, 32 + EPILOG_RESTORE_REG_PAIR 25, 26, 48 + EPILOG_RESTORE_REG_PAIR 27, 28, 64 + EPILOG_RESTORE_REG_PAIR 29, 30, 80 + EPILOG_RESTORE_REG 31, 96 // $fp,$ra - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 96 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 112 EPILOG_RETURN + +PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + .quad 16 ; Offset of establisher frame inside our frame + NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // $fp,$ra - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 16 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 32 // On entry: // @@ -882,7 +890,10 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // $a1 = FP of main function // $a2 = PC to invoke // $a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // $a4 = establisher frame (CallerSP) // + // Save establisher frame pointer into our stack frame + st.d $a4, $sp, 16 // Save the SP of this function st.d $fp, $a3, 0 // Restore frame pointer @@ -890,7 +901,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet jirl $ra, $a2, 0 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 EPILOG_RETURN NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index b434f271d704d9..0a37a458fd4f28 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -682,18 +682,22 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // a1 = PC to invoke // a2 = address of s0 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // a4 = establisher frame (CallerSP) // - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 128, 0 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 144, 0 // Spill callee saved registers - PROLOG_SAVE_REG_PAIR s1, s2, 16 - PROLOG_SAVE_REG_PAIR s3, s4, 32 - PROLOG_SAVE_REG_PAIR s5, s6, 48 - PROLOG_SAVE_REG_PAIR s7, s8, 64 - PROLOG_SAVE_REG_PAIR s9, s10, 80 - PROLOG_SAVE_REG_PAIR s11, gp, 96 - PROLOG_SAVE_REG tp, 112 + PROLOG_SAVE_REG_PAIR s1, s2, 32 + PROLOG_SAVE_REG_PAIR s3, s4, 48 + PROLOG_SAVE_REG_PAIR s5, s6, 64 + PROLOG_SAVE_REG_PAIR s7, s8, 80 + PROLOG_SAVE_REG_PAIR s9, s10, 96 + PROLOG_SAVE_REG_PAIR s11, gp, 112 + PROLOG_SAVE_REG tp, 128 + + // Save establisher frame pointer into our stack frame + sd a4, 16(sp) // Save the SP of this function sd sp, 0(a3) @@ -716,22 +720,26 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // Invoke the funclet jalr a1 - EPILOG_RESTORE_REG_PAIR s1, s2, 16 - EPILOG_RESTORE_REG_PAIR s3, s4, 32 - EPILOG_RESTORE_REG_PAIR s5, s6, 48 - EPILOG_RESTORE_REG_PAIR s7, s8, 64 - EPILOG_RESTORE_REG_PAIR s9, s10, 80 - EPILOG_RESTORE_REG_PAIR s11, gp, 96 - EPILOG_RESTORE_REG tp, 112 + EPILOG_RESTORE_REG_PAIR s1, s2, 32 + EPILOG_RESTORE_REG_PAIR s3, s4, 48 + EPILOG_RESTORE_REG_PAIR s5, s6, 64 + EPILOG_RESTORE_REG_PAIR s7, s8, 80 + EPILOG_RESTORE_REG_PAIR s9, s10, 96 + EPILOG_RESTORE_REG_PAIR s11, gp, 112 + EPILOG_RESTORE_REG tp, 128 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 128 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 144 EPILOG_RETURN + +PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP + .quad 16 ; Offset of establisher frame inside our frame + NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 16 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 32 // On entry: // @@ -739,7 +747,10 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // a1 = FP of main function // a2 = PC to invoke // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // a4 = establisher frame (CallerSP) // + // Save establisher frame pointer into our stack frame + sd a4, 16(sp) // Save the SP of this function sd fp, 0(a3) // Restore frame pointer @@ -747,7 +758,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet jalr a2 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 EPILOG_RETURN NESTED_END CallEHFilterFunclet, _TEXT From 7085e5f7d839b8af873616c2e11b907e647e12e0 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 12:06:23 +0200 Subject: [PATCH 14/52] Try to restore the former ABI --- src/coreclr/jit/codegenxarch.cpp | 8 +++--- src/coreclr/jit/targetamd64.h | 8 +++--- .../Runtime/amd64/ExceptionHandling.S | 9 +++++-- .../Runtime/amd64/ExceptionHandling.asm | 9 +++++-- src/coreclr/vm/amd64/AsmHelpers.asm | 27 ++++++++++++++----- src/coreclr/vm/amd64/asmhelpers.S | 8 +++--- src/coreclr/vm/arm/ehhelpers.S | 10 +++++++ src/coreclr/vm/arm64/asmhelpers.S | 4 +-- src/coreclr/vm/arm64/asmhelpers.asm | 4 +-- src/coreclr/vm/eetwain.cpp | 23 +++++++++++++--- src/coreclr/vm/i386/ehhelpers.S | 3 --- src/coreclr/vm/i386/ehhelpers.asm | 3 --- src/coreclr/vm/loongarch64/asmhelpers.S | 4 +-- src/coreclr/vm/riscv64/asmhelpers.S | 4 +-- 14 files changed, 80 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 63130adcd68f5f..0fb8a3da923ac2 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10936,9 +10936,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) * * Funclets have the following incoming arguments: * - * catch/filter-handler: rcx = the exception object that was caught (see GT_CATCH_ARG) - * filter: rcx = the exception object to filter (see GT_CATCH_ARG) - * finally/fault: none + * catch/filter-handler: rcx/rdi = unused, rdx/rsi = the exception object that was caught (see GT_CATCH_ARG) + * filter: rcx/rdi = unused, rdx/rsi = the exception object to filter (see GT_CATCH_ARG) + * finally/fault: rcx/rdi = unused * * Funclets set the following registers on exit: * @@ -10946,6 +10946,8 @@ void CodeGen::genFnEpilog(BasicBlock* block) * filter: rax = non-zero if the handler should handle the exception, zero otherwise (see GT_RETFILT) * finally/fault: none * + * First parameter (rcx/rdi) is a placeholder for establisher frame which is no longer used. + * * The AMD64 funclet prolog sequence is: * * push ebp diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index 9e180de420fabc..eaaf6fc4ccbc9d 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -347,11 +347,11 @@ // Where is the exception object on entry to the handler block? #ifdef UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_EDI - #define RBM_EXCEPTION_OBJECT RBM_EDI + #define REG_EXCEPTION_OBJECT REG_ESI + #define RBM_EXCEPTION_OBJECT RBM_ESI #else // !UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_ECX - #define RBM_EXCEPTION_OBJECT RBM_ECX + #define REG_EXCEPTION_OBJECT REG_EDX + #define RBM_EXCEPTION_OBJECT RBM_EDX #endif // !UNIX_AMD64_ABI #define REG_JUMP_THUNK_PARAM REG_EAX diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S index 6d6a3337dcf50f..c5ce852e46fd0e 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S @@ -330,7 +330,8 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler mov [rax], rcx #endif - mov rdi, [rsp + locArg0] // rdi <- exception object + mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame + mov rsi, [rsp + locArg0] // rsi <- exception object call qword ptr [rsp + locArg1] // call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -469,6 +470,7 @@ NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler mov [rax], rcx #endif + mov rdi, [rsi + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame call qword ptr [rsp + locArg0] // handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 @@ -516,7 +518,10 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - call rsi + mov rax, rsi // rax <- handler funclet address + mov rsi, rdi // rsi <- exception object + mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame + call rax ALTERNATE_ENTRY RhpCallFilterFunclet2 diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm index 4d225ba46054d0..741b916f00b904 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -434,7 +434,8 @@ endif movdqa xmm14,[r8 + OFFSETOF__REGDISPLAY__Xmm + 8*10h] movdqa xmm15,[r8 + OFFSETOF__REGDISPLAY__Xmm + 9*10h] - mov rcx, [rsp + rsp_offsetof_arguments + 0h] ;; rcx <- exception object + mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame + mov rdx, [rsp + rsp_offsetof_arguments + 0h] ;; rdx <- exception object call qword ptr [rsp + rsp_offsetof_arguments + 8h] ;; call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -638,6 +639,7 @@ if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the deb mov [rax], r9 endif + mov rcx, [rdx + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame call qword ptr [rsp + rsp_offsetof_arguments + 0h] ;; handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 @@ -700,7 +702,10 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - call rdx + mov rax, rdx ;; rax <- handler funclet address + mov rdx, rcx ;; rdx <- exception object + mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame + call rax ALTERNATE_ENTRY RhpCallFilterFunclet2 diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index ec58ba95b4a43b..73e54b0a76bc4b 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -563,7 +563,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT ; RDX = PC to invoke ; R8 = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; [RSP+40] = establisher frame address (Initial SP) + ; [RSP+40] = establisher frame address (InitialSP) ; FUNCLET_CALL_PROLOGUE 0, 1 @@ -590,19 +590,24 @@ NESTED_ENTRY CallEHFunclet, _TEXT movdqa xmm14, [r8 + 272 + 8*10h] movdqa xmm15, [r8 + 272 + 9*10h] + ; Swap input parameters to avoid trashing them + mov rax, rdx + mov rdx, rcx + + ; Save establisher frame pointer into the argument scratch area of the funclet + ; and put it in rcx parameter (older R2R ABI) + mov rcx, [rsp + arguments_scratch_area_size + 8 + 8 * 8h + stack_alloc_size] + mov [rsp], rcx + ; Save the SP of this function. mov [r9], rsp ; Invoke the funclet - call rdx + call rax FUNCLET_CALL_EPILOGUE ret - -PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - dq arguments_scratch_area_size + 8 + 8 * 8h + stack_alloc_size - NESTED_END CallEHFunclet, _TEXT ; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -614,7 +619,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; RDX = RBP of main function ; R8 = PC to invoke ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; [RSP+40] = establisher frame address (Initial SP) + ; [RSP+40] = establisher frame address (InitialSP) ; FUNCLET_CALL_PROLOGUE 0, 1 @@ -622,6 +627,14 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; Save the SP of this function mov [r9], rsp + ; Move throwable into the second parameter + mov rdx, rcx + + ; Save establisher frame pointer into the argument scratch area of the funclet + ; and put it in rcx parameter (older R2R ABI) + mov rcx, [rsp + arguments_scratch_area_size + 8 + 8 * 8h + stack_alloc_size] + mov [rsp], rcx + ; Invoke the filter funclet mov rbp, rdx call r8 diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 15d53d31d1686f..67fbd97385d33d 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -424,15 +424,13 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler mov [rcx], rsp // Invoke the funclet + mov rsi, rdi + mov rdi, r8 // rdi = InitialSP (older R2R ABI) call rsi FUNCLET_CALL_EPILOGUE ret - -PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - .quad 0 - NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -457,6 +455,8 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet mov rbp, rsi + mov rsi, rdi + mov rdi, r8 // rdi = InitialSP (older R2R ABI) call rdx FUNCLET_CALL_EPILOGUE diff --git a/src/coreclr/vm/arm/ehhelpers.S b/src/coreclr/vm/arm/ehhelpers.S index 006ade52bbda70..17d889865e2b5f 100644 --- a/src/coreclr/vm/arm/ehhelpers.S +++ b/src/coreclr/vm/arm/ehhelpers.S @@ -107,9 +107,13 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // R1 = PC to invoke // R2 = address of R4 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // R3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // [SP] = establisher frame (CallerSP) // // Save the SP of this function str sp, [r3] + // Save the establisher frame into our frame + ldr r3, [sp, 40] + str r3, [sp] // apply the non-volatiles corresponding to the CrawlFrame ldm r2!, {r4-r6} add r2, r2, #4 @@ -128,6 +132,7 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 PROLOG_PUSH "{r7, lr}" PROLOG_STACK_SAVE r7 + alloc_stack 8 // On entry: // @@ -135,14 +140,19 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // R1 = FP of main method // R2 = PC to invoke // R3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. + // [SP] = establisher frame (CallerSP) // // Save the SP of this function str sp, [r3] // Restore frame pointer mov r11, r1 + // Set r1 to CallerSP (older R2R ABI) and save it into our frame + ldr r1, [sp, 16] + sdr r1, [sp] // Invoke the filter funclet blx r2 + free_stack 8 EPILOG_POP "{r7, pc}" NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index d8e891abbec382..110fe3c6570343 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -405,9 +405,6 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 112 EPILOG_RETURN - PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - .quad 16 ; Offset of establisher frame inside our frame - NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -431,6 +428,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Restore frame pointer mov fp, x1 // Invoke the filter funclet + mov x1, x4 // x1 = CallerSP (older R2R ABI) blr x2 EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 3761a0ef1e3cdb..812e9f48c8c482 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -744,9 +744,6 @@ COMToCLRDispatchHelper_RegSetup EPILOG_RESTORE_REG_PAIR fp, lr, #112! EPILOG_RETURN - PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - DCQ 16 ; Offset of establisher frame inside our frame - NESTED_END CallEHFunclet ; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -770,6 +767,7 @@ COMToCLRDispatchHelper_RegSetup ; Restore frame pointer mov fp, x1 ; Invoke the filter funclet + mov x1, x4 ; x1 = CallerSP (older R2R ABI) blr x2 EPILOG_RESTORE_REG_PAIR fp, lr, #32! diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 9e5d0058653a89..246aa252b8b4e9 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1683,13 +1683,28 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, { if (pCodeInfo->IsFunclet()) { -#if DACCESS_COMPILE - // TODO: ? - return NULL; + SIZE_T offsetOfEstablisherFrameInFuncletSP; +#if defined(TARGET_AMD64) + offsetOfEstablisherFrameInFuncletSP = 0; +#elif defined(TARGET_X86) + // CallEHFunclet frame size + return address + 16 (5th parameter) +#ifdef UNIX_X86_ABI + offsetOfEstablisherFrameInFuncletSP = 48; #else + offsetOfEstablisherFrameInFuncletSP = 36; +#endif +#elif defined(TARGET_ARM64) || defined(TARGET_RISC64) || defined(TARGET_LOONGARCH64) + // Stored past FP/LR pair + offsetOfEstablisherFrameInFuncletSP = 16; +#elif defined(TARGET_ARM) + offsetOfEstablisherFrameInFuncletSP = 0; +#else + PORTABILITY_ASSERT("offsetOfEstablisherFrameInFuncletSP"); +#endif + // Recover the establisher frame (InitialSP/CallerSP) from the funclet // caller. - baseStackSlot += g_OffsetOfEstablisherFrameInFuncletSP; + baseStackSlot += offsetOfEstablisherFrameInFuncletSP; baseStackSlot = *(SIZE_T*)baseStackSlot; #ifdef TARGET_AMD64 // On AMD64 the PSPSym stores the "Initial SP": the stack pointer at the end of diff --git a/src/coreclr/vm/i386/ehhelpers.S b/src/coreclr/vm/i386/ehhelpers.S index da5ad73a62747d..63ba07d3ebf111 100644 --- a/src/coreclr/vm/i386/ehhelpers.S +++ b/src/coreclr/vm/i386/ehhelpers.S @@ -52,9 +52,6 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler ESP_EPILOG_POP ebp ESP_EPILOG_END -PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - .word 32 + 16 // our frame size + 5th argument - ret 16 NESTED_END CallEHFunclet, _TEXT diff --git a/src/coreclr/vm/i386/ehhelpers.asm b/src/coreclr/vm/i386/ehhelpers.asm index 234958a03edba5..87fbdb40638ce2 100644 --- a/src/coreclr/vm/i386/ehhelpers.asm +++ b/src/coreclr/vm/i386/ehhelpers.asm @@ -55,9 +55,6 @@ _CallEHFunclet@16 proc public ret 16 -PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - dd 36 ; 4 saved regs + return address + 5th parameter - _CallEHFunclet@16 endp ; DWORD_PTR STDCALL CallEHFilterFunclet(Object *pThrowable, TADDR CallerSP, UINT_PTR pFuncletToInvoke, UINT_PTR *pFuncletCallerSP); diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index 38b95f9be562b7..a3a859c66d0ad7 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -873,9 +873,6 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 112 EPILOG_RETURN -PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - .quad 16 ; Offset of establisher frame inside our frame - NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -899,6 +896,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Restore frame pointer move $fp, $a1 // Invoke the filter funclet + move $a1, $a4 // a1 = CallerSP (older R2R ABI) jirl $ra, $a2, 0 EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 0a37a458fd4f28..a7a26d1e5caedf 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -731,9 +731,6 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 144 EPILOG_RETURN -PATCH_LABEL g_OffsetOfEstablisherFrameInFuncletSP - .quad 16 ; Offset of establisher frame inside our frame - NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the @@ -756,6 +753,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Restore frame pointer mv fp, a1 // Invoke the filter funclet + mv a1, a4 // a1 = CallerSP (older R2R ABI) jalr a2 EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 From 8365e15b7a8ac06b408d854a71f7a555957f6075 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 12:20:50 +0200 Subject: [PATCH 15/52] Fix build --- src/coreclr/vm/eetwain.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 246aa252b8b4e9..deba419b20359b 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1715,7 +1715,6 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, // funclet's EECodeInfo because they have different stack sizes! baseStackSlot += pCodeInfo->GetMainFunctionInfo().GetFixedStackSize(); #endif // TARGET_AMD64 -#endif // DACCESS_COMPILE } TADDR taSlot = (TADDR)( spOffsetGenericsContext + baseStackSlot ); TADDR taExactGenericsToken = *PTR_TADDR(taSlot); From d05eeca9c9dba2d6eafc815dede2529e930c2bb8 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 12:55:33 +0200 Subject: [PATCH 16/52] WIP --- src/coreclr/vm/amd64/AsmHelpers.asm | 4 +++- src/coreclr/vm/amd64/asmhelpers.S | 4 +++- src/coreclr/vm/eetwain.cpp | 3 +++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 73e54b0a76bc4b..dae4376b4ab0ac 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -627,6 +627,9 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; Save the SP of this function mov [r9], rsp + ; Restore RBP to match main funtion RBP + mov rbp, rdx + ; Move throwable into the second parameter mov rdx, rcx @@ -636,7 +639,6 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT mov [rsp], rcx ; Invoke the filter funclet - mov rbp, rdx call r8 FUNCLET_CALL_EPILOGUE diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 67fbd97385d33d..03f4237fe60d7b 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -447,6 +447,9 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler FUNCLET_CALL_PROLOGUE 1, 0 + // Restore RBP to match main funtion RBP + mov rbp, rsi + // Save establisher frame pointer into our stack frame mov [rsp], r8 @@ -454,7 +457,6 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler mov [rcx], rsp // Invoke the filter funclet - mov rbp, rsi mov rsi, rdi mov rdi, r8 // rdi = InitialSP (older R2R ABI) call rdx diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index deba419b20359b..e21773987718a0 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1683,6 +1683,9 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, { if (pCodeInfo->IsFunclet()) { + // TODO: Should we check the return address to see if it's CallEHFunclet, + // CallEHFilterFunclet + SIZE_T offsetOfEstablisherFrameInFuncletSP; #if defined(TARGET_AMD64) offsetOfEstablisherFrameInFuncletSP = 0; From bb43879a1b005b5568e79ed515e013cc4b05aa4e Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 13:01:00 +0200 Subject: [PATCH 17/52] Remove mentions of USE_FUNCLET_CALL_HELPER from documentation --- docs/design/coreclr/botr/guide-for-porting.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/design/coreclr/botr/guide-for-porting.md b/docs/design/coreclr/botr/guide-for-porting.md index 106dc157061f21..15b4e4c65d32c0 100644 --- a/docs/design/coreclr/botr/guide-for-porting.md +++ b/docs/design/coreclr/botr/guide-for-porting.md @@ -386,12 +386,10 @@ Here is an annotated list of the stubs implemented for Unix on Arm64. application 11. `CallEHFunclet` – Used to call catch, finally and fault funclets. Behavior - is specific to exactly how funclets are implemented. Only used if - USE_FUNCLET_CALL_HELPER is set + is specific to exactly how funclets are implemented. 12. `CallEHFilterFunclet` – Used to call filter funclets. Behavior is specific - to exactly how funclets are implemented. Only used if - USE_FUNCLET_CALL_HELPER is set + to exactly how funclets are implemented. 13. `ResolveWorkerChainLookupAsmStub`/ `ResolveWorkerAsmStub` Used for virtual stub dispatch (virtual call support for interface, and some virtual From 43c8dc89af4a57ae68827da6532cd356213a5d41 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 13:18:56 +0200 Subject: [PATCH 18/52] Build fixes --- src/coreclr/vm/arm/ehhelpers.S | 2 +- src/coreclr/vm/eetwain.cpp | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/arm/ehhelpers.S b/src/coreclr/vm/arm/ehhelpers.S index 17d889865e2b5f..a7527416594f83 100644 --- a/src/coreclr/vm/arm/ehhelpers.S +++ b/src/coreclr/vm/arm/ehhelpers.S @@ -148,7 +148,7 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 mov r11, r1 // Set r1 to CallerSP (older R2R ABI) and save it into our frame ldr r1, [sp, 16] - sdr r1, [sp] + str r1, [sp] // Invoke the filter funclet blx r2 diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index e21773987718a0..9f994ce70ea845 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1663,8 +1663,6 @@ PTR_VOID EECodeManager::GetExactGenericsToken(PREGDISPLAY pContext, return EECodeManager::GetExactGenericsToken(GetCallerSp(pContext), pCodeInfo); } -EXTERN_C SIZE_T g_OffsetOfEstablisherFrameInFuncletSP; - //static PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, EECodeInfo * pCodeInfo) @@ -1696,7 +1694,7 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, #else offsetOfEstablisherFrameInFuncletSP = 36; #endif -#elif defined(TARGET_ARM64) || defined(TARGET_RISC64) || defined(TARGET_LOONGARCH64) +#elif defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) // Stored past FP/LR pair offsetOfEstablisherFrameInFuncletSP = 16; #elif defined(TARGET_ARM) From 42815c4a07d6543b03569d1cbc9cc28cbfa3fa72 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 13:33:34 +0200 Subject: [PATCH 19/52] Apply JIT format --- src/coreclr/jit/codegen.h | 4 ++-- src/coreclr/jit/codegenarm.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index d3b2f09a8ecd3e..e83697c65cfe6e 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -459,8 +459,8 @@ class CodeGen final : public CodeGenInterface // same. struct FuncletFrameInfoDsc { - regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR) - unsigned fiSpDelta; // Stack pointer delta + regMaskTP fiSaveRegs; // Set of registers saved in the funclet prolog (includes LR) + unsigned fiSpDelta; // Stack pointer delta }; FuncletFrameInfoDsc genFuncletInfo; diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index fe5897422005e3..c1cca5bcf0c6e7 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2415,7 +2415,7 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo() // (plus the "pre spill regs"). Note that we assume r12 and r13 aren't saved // (also assumed in genFnProlog()). assert((regSet.rsMaskCalleeSaved & (RBM_R12 | RBM_R13)) == 0); - unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; + unsigned preSpillRegArgSize = genCountBits(regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; regMaskTP rsMaskSaveRegs = regSet.rsMaskCalleeSaved; unsigned saveRegsCount = genCountBits(rsMaskSaveRegs); From 4ceea64b5211396f6bacd32935e676568bf011fd Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 13:49:22 +0200 Subject: [PATCH 20/52] WASM build fix --- src/coreclr/vm/eetwain.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 9f994ce70ea845..25adb01bd2be6a 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1699,6 +1699,8 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, offsetOfEstablisherFrameInFuncletSP = 16; #elif defined(TARGET_ARM) offsetOfEstablisherFrameInFuncletSP = 0; +#elif defined(TARGET_WASM) + _ASSERTE(!"CallFunclet for WASM not implemented yet"); #else PORTABILITY_ASSERT("offsetOfEstablisherFrameInFuncletSP"); #endif From 21b9077e1c66deced9d68b72814da5c0cd0ec9db Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 14:40:03 +0200 Subject: [PATCH 21/52] Fix linux-x64 CallEHFunclet --- src/coreclr/vm/amd64/asmhelpers.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 03f4237fe60d7b..7503b3e626d4a3 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -424,9 +424,10 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler mov [rcx], rsp // Invoke the funclet + mov rax, rsi mov rsi, rdi mov rdi, r8 // rdi = InitialSP (older R2R ABI) - call rsi + call rax FUNCLET_CALL_EPILOGUE From 41e98a41ba92f1615377bacdf627edaa08c0a5a4 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 15:37:30 +0200 Subject: [PATCH 22/52] WASM build fix --- src/coreclr/vm/eetwain.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 25adb01bd2be6a..7d3faa59153e7c 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1701,6 +1701,7 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, offsetOfEstablisherFrameInFuncletSP = 0; #elif defined(TARGET_WASM) _ASSERTE(!"CallFunclet for WASM not implemented yet"); + offsetOfEstablisherFrameInFuncletSP = 0; #else PORTABILITY_ASSERT("offsetOfEstablisherFrameInFuncletSP"); #endif From 78bc3861518b2fa78c4167ec36338a9f1ae33d8f Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 16:53:55 +0200 Subject: [PATCH 23/52] WIP --- src/coreclr/inc/eetwain.h | 3 ++- src/coreclr/vm/eetwain.cpp | 18 ++++++++++++------ 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/coreclr/inc/eetwain.h b/src/coreclr/inc/eetwain.h index 8dfd26356440cd..6cea5c130a7b81 100644 --- a/src/coreclr/inc/eetwain.h +++ b/src/coreclr/inc/eetwain.h @@ -484,7 +484,8 @@ PTR_VOID GetExactGenericsToken(PREGDISPLAY pContext, static PTR_VOID GetExactGenericsToken(SIZE_T baseStackSlot, - EECodeInfo * pCodeInfo); + EECodeInfo * pCodeInfo, + UINT_PTR returnAddress = 0); #endif // FEATURE_EH_FUNCLETS && USE_GC_INFO_DECODER diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 7d3faa59153e7c..005ef6bd25cbb9 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1660,12 +1660,17 @@ PTR_VOID EECodeManager::GetExactGenericsToken(PREGDISPLAY pContext, { LIMITED_METHOD_DAC_CONTRACT; - return EECodeManager::GetExactGenericsToken(GetCallerSp(pContext), pCodeInfo); + pCodeInfo->GetCodeManager()->EnsureCallerContextIsValid(pContext, NULL); + + return EECodeManager::GetExactGenericsToken(GetSP(pContext->pCallerContext), + pCodeInfo, + GetIP(pContext->pCallerContext)); } //static PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, - EECodeInfo * pCodeInfo) + EECodeInfo * pCodeInfo, + UINT_PTR returnAddress /* = 0 */) { LIMITED_METHOD_DAC_CONTRACT; @@ -1679,11 +1684,12 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, INT32 spOffsetGenericsContext = gcInfoDecoder.GetGenericsInstContextStackSlot(); if (spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT) { - if (pCodeInfo->IsFunclet()) + // Presumably profiler callbacks are not generated for funclets + // so we can use returnAddress == 0 and skip this. + if (returnAddress != 0 && + pCodeInfo->IsFunclet() && + !ExecutionManager::IsManagedCode(returnAddress)) { - // TODO: Should we check the return address to see if it's CallEHFunclet, - // CallEHFilterFunclet - SIZE_T offsetOfEstablisherFrameInFuncletSP; #if defined(TARGET_AMD64) offsetOfEstablisherFrameInFuncletSP = 0; From 4c843d812a3bcc158bc01063a2994a331fe26c19 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 16:57:32 +0200 Subject: [PATCH 24/52] Add comment --- src/coreclr/vm/eetwain.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 005ef6bd25cbb9..60dbbb823b64df 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1684,6 +1684,17 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, INT32 spOffsetGenericsContext = gcInfoDecoder.GetGenericsInstContextStackSlot(); if (spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT) { + // For funclets we need to recover the establisher frame from + // the main function. + // + // Finally funclets can be called directly from the managed code + // in non-exceptional path. In that case the we don't need to do + // any adjustment. + // + // For calls made through CallEHFunclet and CallEHFilterFunclet + // we recover the establisher frame address from the frame of the + // helper call. + // // Presumably profiler callbacks are not generated for funclets // so we can use returnAddress == 0 and skip this. if (returnAddress != 0 && From 6db462565659568fdce6ff98a2c5513c648ec790 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Tue, 15 Apr 2025 17:33:21 +0200 Subject: [PATCH 25/52] Cleanup --- src/coreclr/vm/amd64/AsmHelpers.asm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index dae4376b4ab0ac..246b9c13faca29 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -596,7 +596,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT ; Save establisher frame pointer into the argument scratch area of the funclet ; and put it in rcx parameter (older R2R ABI) - mov rcx, [rsp + arguments_scratch_area_size + 8 + 8 * 8h + stack_alloc_size] + mov rcx, [rsp + rsp_offsetof_arguments + 20h] mov [rsp], rcx ; Save the SP of this function. @@ -635,7 +635,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; Save establisher frame pointer into the argument scratch area of the funclet ; and put it in rcx parameter (older R2R ABI) - mov rcx, [rsp + arguments_scratch_area_size + 8 + 8 * 8h + stack_alloc_size] + mov rcx, [rsp + rsp_offsetof_arguments + 20h] mov [rsp], rcx ; Invoke the filter funclet From f429aa6a1f6eff7673fd3dd08fd816482184e167 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 09:47:41 +0200 Subject: [PATCH 26/52] Reencode generics instance context stack slot as SP/FP based --- src/coreclr/inc/eetwain.h | 7 ++- src/coreclr/inc/gcinfodecoder.h | 1 + src/coreclr/jit/gcencode.cpp | 10 ++-- src/coreclr/vm/amd64/profiler.cpp | 2 +- src/coreclr/vm/arm/profiler.cpp | 2 +- src/coreclr/vm/arm64/profiler.cpp | 2 +- src/coreclr/vm/eetwain.cpp | 68 +++---------------------- src/coreclr/vm/gcinfodecoder.cpp | 5 ++ src/coreclr/vm/loongarch64/profiler.cpp | 2 +- src/coreclr/vm/riscv64/profiler.cpp | 2 +- 10 files changed, 27 insertions(+), 74 deletions(-) diff --git a/src/coreclr/inc/eetwain.h b/src/coreclr/inc/eetwain.h index 6cea5c130a7b81..0635fdf3c997fe 100644 --- a/src/coreclr/inc/eetwain.h +++ b/src/coreclr/inc/eetwain.h @@ -483,10 +483,9 @@ PTR_VOID GetExactGenericsToken(PREGDISPLAY pContext, EECodeInfo * pCodeInfo); static -PTR_VOID GetExactGenericsToken(SIZE_T baseStackSlot, - EECodeInfo * pCodeInfo, - UINT_PTR returnAddress = 0); - +PTR_VOID GetExactGenericsToken(TADDR sp, + TADDR fp, + EECodeInfo * pCodeInfo); #endif // FEATURE_EH_FUNCLETS && USE_GC_INFO_DECODER diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index 450e1fbf2f9ca8..ff129026098723 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -583,6 +583,7 @@ class TGcInfoDecoder INT32 GetReversePInvokeFrameStackSlot(); bool HasMethodDescGenericsInstContext(); bool HasMethodTableGenericsInstContext(); + bool HasStackBaseRegister(); bool GetIsVarArg(); bool WantsReportOnlyLeaf(); #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index f8153ef42b15ca..91a7ea6de42d06 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3870,7 +3870,8 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz assert(false); } - const int offset = compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(), + const int genericContextArgOffset = compiler->lvaCachedGenericContextArgOffset(); + const int offset = compiler->lvaToCallerSPRelativeOffset(genericContextArgOffset, compiler->isFramePointerUsed()); #ifdef DEBUG @@ -3894,7 +3895,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz } #endif - gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(offset, ctxtParamType); + gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(genericContextArgOffset, ctxtParamType); } // As discussed above, handle the case where the generics context is obtained via // the method table of "this". @@ -3913,7 +3914,8 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz useRootFrameSlot = ppInfo->HasKeptAliveThis(); } - const int offset = compiler->lvaToCallerSPRelativeOffset(compiler->lvaCachedGenericContextArgOffset(), + const int genericContextArgOffset = compiler->lvaCachedGenericContextArgOffset(); + const int offset = compiler->lvaToCallerSPRelativeOffset(genericContextArgOffset, compiler->isFramePointerUsed(), useRootFrameSlot); #ifdef DEBUG @@ -3937,7 +3939,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz } #endif - gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(offset, GENERIC_CONTEXTPARAM_THIS); + gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(genericContextArgOffset, GENERIC_CONTEXTPARAM_THIS); } if (compiler->getNeedsGSSecurityCookie()) diff --git a/src/coreclr/vm/amd64/profiler.cpp b/src/coreclr/vm/amd64/profiler.cpp index f49cd3ddc07c04..62c431ab4a04ee 100644 --- a/src/coreclr/vm/amd64/profiler.cpp +++ b/src/coreclr/vm/amd64/profiler.cpp @@ -156,7 +156,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig * pSig, void * platformSpecificHa EECodeInfo codeInfo((PCODE)pData->ip); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledRsp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeRsp), (TADDR)(pData->rbp), &codeInfo); } } } diff --git a/src/coreclr/vm/arm/profiler.cpp b/src/coreclr/vm/arm/profiler.cpp index a57d1cc3eb81b6..64cea780442760 100644 --- a/src/coreclr/vm/arm/profiler.cpp +++ b/src/coreclr/vm/arm/profiler.cpp @@ -163,7 +163,7 @@ Stack for the above call will look as follows (stack growing downwards): EECodeInfo codeInfo((PCODE)pData->Pc); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeSp), (TADDR)(pData->R11), &codeInfo); } } } diff --git a/src/coreclr/vm/arm64/profiler.cpp b/src/coreclr/vm/arm64/profiler.cpp index 471677347ea578..8adaeb283c1eae 100644 --- a/src/coreclr/vm/arm64/profiler.cpp +++ b/src/coreclr/vm/arm64/profiler.cpp @@ -86,7 +86,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHan EECodeInfo codeInfo((PCODE)pData->Pc); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeSp), (TADDR)(pData->Fp), &codeInfo); } } } diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 60dbbb823b64df..2106478fcfa2d3 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1660,17 +1660,15 @@ PTR_VOID EECodeManager::GetExactGenericsToken(PREGDISPLAY pContext, { LIMITED_METHOD_DAC_CONTRACT; - pCodeInfo->GetCodeManager()->EnsureCallerContextIsValid(pContext, NULL); - - return EECodeManager::GetExactGenericsToken(GetSP(pContext->pCallerContext), - pCodeInfo, - GetIP(pContext->pCallerContext)); + return EECodeManager::GetExactGenericsToken(GetSP(pContext->pCurrentContext), + GetFP(pContext->pCurrentContext), + pCodeInfo); } //static -PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, - EECodeInfo * pCodeInfo, - UINT_PTR returnAddress /* = 0 */) +PTR_VOID EECodeManager::GetExactGenericsToken(TADDR sp, + TADDR fp, + EECodeInfo * pCodeInfo) { LIMITED_METHOD_DAC_CONTRACT; @@ -1684,59 +1682,7 @@ PTR_VOID EECodeManager::GetExactGenericsToken(SIZE_T baseStackSlot, INT32 spOffsetGenericsContext = gcInfoDecoder.GetGenericsInstContextStackSlot(); if (spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT) { - // For funclets we need to recover the establisher frame from - // the main function. - // - // Finally funclets can be called directly from the managed code - // in non-exceptional path. In that case the we don't need to do - // any adjustment. - // - // For calls made through CallEHFunclet and CallEHFilterFunclet - // we recover the establisher frame address from the frame of the - // helper call. - // - // Presumably profiler callbacks are not generated for funclets - // so we can use returnAddress == 0 and skip this. - if (returnAddress != 0 && - pCodeInfo->IsFunclet() && - !ExecutionManager::IsManagedCode(returnAddress)) - { - SIZE_T offsetOfEstablisherFrameInFuncletSP; -#if defined(TARGET_AMD64) - offsetOfEstablisherFrameInFuncletSP = 0; -#elif defined(TARGET_X86) - // CallEHFunclet frame size + return address + 16 (5th parameter) -#ifdef UNIX_X86_ABI - offsetOfEstablisherFrameInFuncletSP = 48; -#else - offsetOfEstablisherFrameInFuncletSP = 36; -#endif -#elif defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - // Stored past FP/LR pair - offsetOfEstablisherFrameInFuncletSP = 16; -#elif defined(TARGET_ARM) - offsetOfEstablisherFrameInFuncletSP = 0; -#elif defined(TARGET_WASM) - _ASSERTE(!"CallFunclet for WASM not implemented yet"); - offsetOfEstablisherFrameInFuncletSP = 0; -#else - PORTABILITY_ASSERT("offsetOfEstablisherFrameInFuncletSP"); -#endif - - // Recover the establisher frame (InitialSP/CallerSP) from the funclet - // caller. - baseStackSlot += offsetOfEstablisherFrameInFuncletSP; - baseStackSlot = *(SIZE_T*)baseStackSlot; -#ifdef TARGET_AMD64 - // On AMD64 the PSPSym stores the "Initial SP": the stack pointer at the end of - // prolog, before any dynamic allocations. - // However, the GenericsContext offset is relative to the caller SP for all - // platforms. So here we adjust to convert AMD64's initial sp to a caller SP. - // But we have to be careful to use the main function's EECodeInfo, not the - // funclet's EECodeInfo because they have different stack sizes! - baseStackSlot += pCodeInfo->GetMainFunctionInfo().GetFixedStackSize(); -#endif // TARGET_AMD64 - } + TADDR baseStackSlot = gcInfoDecoder.HasStackBaseRegister() ? fp : sp; TADDR taSlot = (TADDR)( spOffsetGenericsContext + baseStackSlot ); TADDR taExactGenericsToken = *PTR_TADDR(taSlot); return PTR_VOID(taExactGenericsToken); diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index a038ddf2a921c2..044969b393363d 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -458,6 +458,11 @@ template bool TGcInfoDecoder::HasMetho return (m_headerFlags & GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK) == GC_INFO_HAS_GENERICS_INST_CONTEXT_MT; } +template bool TGcInfoDecoder::HasStackBaseRegister() +{ + return (m_headerFlags & GC_INFO_HAS_STACK_BASE_REGISTER) == GC_INFO_HAS_STACK_BASE_REGISTER; +} + #ifdef PARTIALLY_INTERRUPTIBLE_GC_SUPPORTED // This is used for gcinfodumper: is the given offset diff --git a/src/coreclr/vm/loongarch64/profiler.cpp b/src/coreclr/vm/loongarch64/profiler.cpp index 0d926a88f96605..55f558018ad609 100644 --- a/src/coreclr/vm/loongarch64/profiler.cpp +++ b/src/coreclr/vm/loongarch64/profiler.cpp @@ -85,7 +85,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHan EECodeInfo codeInfo((PCODE)pData->Pc); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeSp), (TADDR)(pData->Fp), &codeInfo); } } } diff --git a/src/coreclr/vm/riscv64/profiler.cpp b/src/coreclr/vm/riscv64/profiler.cpp index b4dd3e7152b786..952f79b4b3ab11 100644 --- a/src/coreclr/vm/riscv64/profiler.cpp +++ b/src/coreclr/vm/riscv64/profiler.cpp @@ -83,7 +83,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHan EECodeInfo codeInfo((PCODE)pData->Pc); // We want to pass the caller SP here. - pData->hiddenArg = EECodeManager::GetExactGenericsToken((SIZE_T)(pData->profiledSp), &codeInfo); + pData->hiddenArg = EECodeManager::GetExactGenericsToken((TADDR)(pData->probeSp), (TADDR)(pData->Fp), &codeInfo); } } } From 1679464dd7e8d884cfb8c18a70950e3a3f3a9b72 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 11:00:03 +0200 Subject: [PATCH 27/52] Remove establisher frame pointer again --- src/coreclr/jit/codegenxarch.cpp | 6 +-- src/coreclr/jit/targetamd64.h | 8 ++-- src/coreclr/vm/amd64/AsmHelpers.asm | 56 +++++++++---------------- src/coreclr/vm/amd64/asmhelpers.S | 21 ++-------- src/coreclr/vm/arm/ehhelpers.S | 10 ----- src/coreclr/vm/arm64/asmhelpers.S | 41 +++++++----------- src/coreclr/vm/arm64/asmhelpers.asm | 41 +++++++----------- src/coreclr/vm/codeman.h | 1 - src/coreclr/vm/eetwain.cpp | 38 ++--------------- src/coreclr/vm/i386/ehhelpers.S | 2 - src/coreclr/vm/i386/ehhelpers.asm | 2 - src/coreclr/vm/jitinterface.cpp | 37 ---------------- src/coreclr/vm/loongarch64/asmhelpers.S | 36 +++++++--------- src/coreclr/vm/riscv64/asmhelpers.S | 45 ++++++++------------ 14 files changed, 99 insertions(+), 245 deletions(-) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 0fb8a3da923ac2..522bc258be7667 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -10936,9 +10936,9 @@ void CodeGen::genFnEpilog(BasicBlock* block) * * Funclets have the following incoming arguments: * - * catch/filter-handler: rcx/rdi = unused, rdx/rsi = the exception object that was caught (see GT_CATCH_ARG) - * filter: rcx/rdi = unused, rdx/rsi = the exception object to filter (see GT_CATCH_ARG) - * finally/fault: rcx/rdi = unused + * catch/filter-handler: rcx/rdi = the exception object that was caught (see GT_CATCH_ARG) + * filter: rcx/rdi = the exception object to filter (see GT_CATCH_ARG) + * finally/fault: none * * Funclets set the following registers on exit: * diff --git a/src/coreclr/jit/targetamd64.h b/src/coreclr/jit/targetamd64.h index eaaf6fc4ccbc9d..9e180de420fabc 100644 --- a/src/coreclr/jit/targetamd64.h +++ b/src/coreclr/jit/targetamd64.h @@ -347,11 +347,11 @@ // Where is the exception object on entry to the handler block? #ifdef UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_ESI - #define RBM_EXCEPTION_OBJECT RBM_ESI + #define REG_EXCEPTION_OBJECT REG_EDI + #define RBM_EXCEPTION_OBJECT RBM_EDI #else // !UNIX_AMD64_ABI - #define REG_EXCEPTION_OBJECT REG_EDX - #define RBM_EXCEPTION_OBJECT RBM_EDX + #define REG_EXCEPTION_OBJECT REG_ECX + #define RBM_EXCEPTION_OBJECT RBM_ECX #endif // !UNIX_AMD64_ABI #define REG_JUMP_THUNK_PARAM REG_EAX diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 246b9c13faca29..8e5449c1092db7 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -563,47 +563,37 @@ NESTED_ENTRY CallEHFunclet, _TEXT ; RDX = PC to invoke ; R8 = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; [RSP+40] = establisher frame address (InitialSP) ; FUNCLET_CALL_PROLOGUE 0, 1 ; Restore RBX, RBP, RSI, RDI, R12, R13, R14, R15 from CONTEXT - mov rbx, [r8 + 0] - mov rbp, [r8 + 16] - mov rsi, [r8 + 24] - mov rdi, [r8 + 32] - mov r12, [r8 + 72] - mov r13, [r8 + 80] - mov r14, [r8 + 88] - mov r15, [r8 + 96] + mov rbx, [r8 + OFFSETOF__CONTEXT__Rbx - OFFSETOF__CONTEXT__Rbx] + mov rbp, [r8 + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] + mov rsi, [r8 + OFFSETOF__CONTEXT__Rsi - OFFSETOF__CONTEXT__Rbx] + mov rdi, [r8 + OFFSETOF__CONTEXT__Rdi - OFFSETOF__CONTEXT__Rbx] + mov r12, [r8 + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] + mov r13, [r8 + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] + mov r14, [r8 + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] + mov r15, [r8 + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] ; Restore XMM registers from CONTEXT - movdqa xmm6, [r8 + 272 + 0*10h] - movdqa xmm7, [r8 + 272 + 1*10h] - movdqa xmm8, [r8 + 272 + 2*10h] - movdqa xmm9, [r8 + 272 + 3*10h] - movdqa xmm10, [r8 + 272 + 4*10h] - movdqa xmm11, [r8 + 272 + 5*10h] - movdqa xmm12, [r8 + 272 + 6*10h] - movdqa xmm13, [r8 + 272 + 7*10h] - movdqa xmm14, [r8 + 272 + 8*10h] - movdqa xmm15, [r8 + 272 + 9*10h] - - ; Swap input parameters to avoid trashing them - mov rax, rdx - mov rdx, rcx - - ; Save establisher frame pointer into the argument scratch area of the funclet - ; and put it in rcx parameter (older R2R ABI) - mov rcx, [rsp + rsp_offsetof_arguments + 20h] - mov [rsp], rcx + movdqa xmm6, [r8 + OFFSETOF__CONTEXT__Xmm6 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm7, [r8 + OFFSETOF__CONTEXT__Xmm7 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm8, [r8 + OFFSETOF__CONTEXT__Xmm8 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm9, [r8 + OFFSETOF__CONTEXT__Xmm9 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm10, [r8 + OFFSETOF__CONTEXT__Xmm10 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm11, [r8 + OFFSETOF__CONTEXT__Xmm11 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm12, [r8 + OFFSETOF__CONTEXT__Xmm12 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm13, [r8 + OFFSETOF__CONTEXT__Xmm13 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm14, [r8 + OFFSETOF__CONTEXT__Xmm14 - OFFSETOF__CONTEXT__Rbx] + movdqa xmm15, [r8 + OFFSETOF__CONTEXT__Xmm15 - OFFSETOF__CONTEXT__Rbx] ; Save the SP of this function. mov [r9], rsp ; Invoke the funclet - call rax + call rdx FUNCLET_CALL_EPILOGUE @@ -619,7 +609,6 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; RDX = RBP of main function ; R8 = PC to invoke ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; [RSP+40] = establisher frame address (InitialSP) ; FUNCLET_CALL_PROLOGUE 0, 1 @@ -627,17 +616,12 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; Save the SP of this function mov [r9], rsp - ; Restore RBP to match main funtion RBP + ; Restore RBP to match main function RBP mov rbp, rdx ; Move throwable into the second parameter mov rdx, rcx - ; Save establisher frame pointer into the argument scratch area of the funclet - ; and put it in rcx parameter (older R2R ABI) - mov rcx, [rsp + rsp_offsetof_arguments + 20h] - mov [rsp], rcx - ; Invoke the filter funclet call r8 diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 7503b3e626d4a3..947971004f2e73 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -404,10 +404,9 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // RSI = PC to invoke // RDX = address of RBX register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // R8 = establisher frame (InitialSP) // - FUNCLET_CALL_PROLOGUE 1, 0 + FUNCLET_CALL_PROLOGUE 0, 1 // Restore RBX, RBP, R12, R13, R14, R15 from CONTEXT mov rbx, [rdx + 0] @@ -417,17 +416,11 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler mov r14, [rdx + 88] mov r15, [rdx + 96] - // Save establisher frame pointer into our stack frame - mov [rsp], r8 - // Save the SP of this function. mov [rcx], rsp // Invoke the funclet - mov rax, rsi - mov rsi, rdi - mov rdi, r8 // rdi = InitialSP (older R2R ABI) - call rax + call rsi FUNCLET_CALL_EPILOGUE @@ -443,24 +436,18 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // RSI = RBP of main function // RDX = PC to invoke // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // R8 = establisher frame (InitialSP) // - FUNCLET_CALL_PROLOGUE 1, 0 + FUNCLET_CALL_PROLOGUE 0, 1 // Restore RBP to match main funtion RBP mov rbp, rsi - // Save establisher frame pointer into our stack frame - mov [rsp], r8 - // Save the SP of this function mov [rcx], rsp // Invoke the filter funclet - mov rsi, rdi - mov rdi, r8 // rdi = InitialSP (older R2R ABI) - call rdx + call rsi FUNCLET_CALL_EPILOGUE diff --git a/src/coreclr/vm/arm/ehhelpers.S b/src/coreclr/vm/arm/ehhelpers.S index a7527416594f83..006ade52bbda70 100644 --- a/src/coreclr/vm/arm/ehhelpers.S +++ b/src/coreclr/vm/arm/ehhelpers.S @@ -107,13 +107,9 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // R1 = PC to invoke // R2 = address of R4 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // R3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // [SP] = establisher frame (CallerSP) // // Save the SP of this function str sp, [r3] - // Save the establisher frame into our frame - ldr r3, [sp, 40] - str r3, [sp] // apply the non-volatiles corresponding to the CrawlFrame ldm r2!, {r4-r6} add r2, r2, #4 @@ -132,7 +128,6 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 PROLOG_PUSH "{r7, lr}" PROLOG_STACK_SAVE r7 - alloc_stack 8 // On entry: // @@ -140,19 +135,14 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // R1 = FP of main method // R2 = PC to invoke // R3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // [SP] = establisher frame (CallerSP) // // Save the SP of this function str sp, [r3] // Restore frame pointer mov r11, r1 - // Set r1 to CallerSP (older R2R ABI) and save it into our frame - ldr r1, [sp, 16] - str r1, [sp] // Invoke the filter funclet blx r2 - free_stack 8 EPILOG_POP "{r7, pc}" NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 110fe3c6570343..5d43c46aa1cb9f 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -362,25 +362,21 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // X1 = PC to invoke // X2 = address of X19 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // X4 = establisher frame (CallerSP) // - // Using below prolog instead of PROLOG_SAVE_REG_PAIR_INDEXED fp,lr, -112 + // Using below prolog instead of PROLOG_SAVE_REG_PAIR_INDEXED fp,lr, -96 // is intentional. Above statement would also emit instruction to save // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_INDEXED x29, lr, -112 + PROLOG_SAVE_REG_PAIR_INDEXED x29, lr, -96 // Spill callee saved registers - PROLOG_SAVE_REG_PAIR x19, x20, 32 - PROLOG_SAVE_REG_PAIR x21, x22, 48 - PROLOG_SAVE_REG_PAIR x23, x24, 64 - PROLOG_SAVE_REG_PAIR x25, x26, 80 - PROLOG_SAVE_REG_PAIR x27, x28, 96 - - // Save establisher frame pointer into our stack frame - str x4, [sp, 16] + PROLOG_SAVE_REG_PAIR x19, x20, 16 + PROLOG_SAVE_REG_PAIR x21, x22, 32 + PROLOG_SAVE_REG_PAIR x23, x24, 48 + PROLOG_SAVE_REG_PAIR x25, x26, 64 + PROLOG_SAVE_REG_PAIR x27, x28, 80 // Save the SP of this function mov x4, sp @@ -397,21 +393,20 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler blr x1 nop - EPILOG_RESTORE_REG_PAIR x19, x20, 32 - EPILOG_RESTORE_REG_PAIR x21, x22, 48 - EPILOG_RESTORE_REG_PAIR x23, x24, 64 - EPILOG_RESTORE_REG_PAIR x25, x26, 80 - EPILOG_RESTORE_REG_PAIR x27, x28, 96 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 112 + EPILOG_RESTORE_REG_PAIR x19, x20, 16 + EPILOG_RESTORE_REG_PAIR x21, x22, 32 + EPILOG_RESTORE_REG_PAIR x23, x24, 48 + EPILOG_RESTORE_REG_PAIR x25, x26, 64 + EPILOG_RESTORE_REG_PAIR x27, x28, 80 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 96 EPILOG_RETURN NESTED_END CallEHFunclet, _TEXT -// This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the -// frame pointer for accessing the locals in the parent method. +// This helper enables us to call into a filter funclet after restoring Fp register NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 + PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -16 // On entry: // @@ -419,19 +414,15 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // X1 = FP of main function // X2 = PC to invoke // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // X4 = establisher frame (CallerSP) // - // Save establisher frame pointer into our stack frame - str x4, [sp, 16] // Save the SP of this function str fp, [x3] // Restore frame pointer mov fp, x1 // Invoke the filter funclet - mov x1, x4 // x1 = CallerSP (older R2R ABI) blr x2 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 16 EPILOG_RETURN NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 812e9f48c8c482..8360b4d774ffdd 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -701,25 +701,21 @@ COMToCLRDispatchHelper_RegSetup ; X1 = PC to invoke ; X2 = address of X19 register in CONTEXT record; used to restore the non-volatile registers of CrawlFrame ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; X4 = establisher frame (CallerSP) ; - ; Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-16! + ; Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,lr, #-96! ; is intentional. Above statement would also emit instruction to save ; sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body ; of method. However, this method needs to be able to change fp before calling funclet. ; This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-112! + PROLOG_SAVE_REG_PAIR_NO_FP fp,lr, #-96! ; Spill callee saved registers - PROLOG_SAVE_REG_PAIR x19, x20, 32 - PROLOG_SAVE_REG_PAIR x21, x22, 48 - PROLOG_SAVE_REG_PAIR x23, x24, 64 - PROLOG_SAVE_REG_PAIR x25, x26, 80 - PROLOG_SAVE_REG_PAIR x27, x28, 96 - - ; Save establisher frame pointer into our stack frame - str x4, [sp, 16] + PROLOG_SAVE_REG_PAIR x19, x20, 16 + PROLOG_SAVE_REG_PAIR x21, x22, 32 + PROLOG_SAVE_REG_PAIR x23, x24, 48 + PROLOG_SAVE_REG_PAIR x25, x26, 64 + PROLOG_SAVE_REG_PAIR x27, x28, 80 ; Save the SP of this function. We cannot store SP directly. mov fp, sp @@ -736,21 +732,20 @@ COMToCLRDispatchHelper_RegSetup blr x1 nop - EPILOG_RESTORE_REG_PAIR x19, x20, 32 - EPILOG_RESTORE_REG_PAIR x21, x22, 48 - EPILOG_RESTORE_REG_PAIR x23, x24, 64 - EPILOG_RESTORE_REG_PAIR x25, x26, 80 - EPILOG_RESTORE_REG_PAIR x27, x28, 96 - EPILOG_RESTORE_REG_PAIR fp, lr, #112! + EPILOG_RESTORE_REG_PAIR x19, x20, 16 + EPILOG_RESTORE_REG_PAIR x21, x22, 32 + EPILOG_RESTORE_REG_PAIR x23, x24, 48 + EPILOG_RESTORE_REG_PAIR x25, x26, 64 + EPILOG_RESTORE_REG_PAIR x27, x28, 80 + EPILOG_RESTORE_REG_PAIR fp, lr, #96! EPILOG_RETURN NESTED_END CallEHFunclet - ; This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the - ; frame pointer for accessing the locals in the parent method. + ; This helper enables us to call into a filter funclet after restoring Fp register NESTED_ENTRY CallEHFilterFunclet - PROLOG_SAVE_REG_PAIR fp, lr, #-32! + PROLOG_SAVE_REG_PAIR fp, lr, #-16! ; On entry: ; @@ -758,19 +753,15 @@ COMToCLRDispatchHelper_RegSetup ; X1 = FP of the main function ; X2 = PC to invoke ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; X4 = establisher frame (CallerSP) ; - ; Save establisher frame pointer into our stack frame - str x4, [sp, 16] ; Save the SP of this function str fp, [x3] ; Restore frame pointer mov fp, x1 ; Invoke the filter funclet - mov x1, x4 ; x1 = CallerSP (older R2R ABI) blr x2 - EPILOG_RESTORE_REG_PAIR fp, lr, #32! + EPILOG_RESTORE_REG_PAIR fp, lr, #16! EPILOG_RETURN NESTED_END CallEHFilterFunclet diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index f15202bc321eff..a3491554d8a75c 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -2918,7 +2918,6 @@ ULONG GetFixedStackSize(); ULONG GetFixedStackSize(); void GetOffsetsFromUnwindInfo(ULONG* pRSPOffset, ULONG* pRBPOffset); - ULONG GetFrameOffsetFromUnwindInfo(); #endif // TARGET_AMD64 private: diff --git a/src/coreclr/vm/eetwain.cpp b/src/coreclr/vm/eetwain.cpp index 2106478fcfa2d3..dfb495e4e50114 100644 --- a/src/coreclr/vm/eetwain.cpp +++ b/src/coreclr/vm/eetwain.cpp @@ -1998,10 +1998,10 @@ void EECodeManager::LeaveCatch(GCInfoToken gcInfoToken, #ifndef TARGET_WASM // This is an assembly helper that enables us to call into EH funclets. -EXTERN_C DWORD_PTR STDCALL CallEHFunclet(Object *pThrowable, UINT_PTR pFuncletToInvoke, UINT_PTR *pFirstNonVolReg, UINT_PTR *pFuncletCallerSP, UINT_PTR establisherFrame); +EXTERN_C DWORD_PTR STDCALL CallEHFunclet(Object *pThrowable, UINT_PTR pFuncletToInvoke, UINT_PTR *pFirstNonVolReg, UINT_PTR *pFuncletCallerSP); // This is an assembly helper that enables us to call into EH filter funclets. -EXTERN_C DWORD_PTR STDCALL CallEHFilterFunclet(Object *pThrowable, TADDR FP, UINT_PTR pFuncletToInvoke, UINT_PTR *pFuncletCallerSP, UINT_PTR establisherFrame); +EXTERN_C DWORD_PTR STDCALL CallEHFilterFunclet(Object *pThrowable, TADDR FP, UINT_PTR pFuncletToInvoke, UINT_PTR *pFuncletCallerSP); typedef DWORD_PTR (HandlerFn)(UINT_PTR uStackFrame, Object* pExceptionObj); @@ -2034,33 +2034,6 @@ static inline UINT_PTR *GetFirstNonVolatileRegisterAddress(PCONTEXT pContextReco #endif } -typedef DWORD_PTR (HandlerFn)(UINT_PTR uStackFrame, Object* pExceptionObj); -static UINT_PTR GetEstablisherFrame(REGDISPLAY* pvRegDisplay, ExInfo* exInfo) -{ -#ifdef HOST_AMD64 - _ASSERTE(exInfo->m_frameIter.m_crawl.GetRegisterSet() == pvRegDisplay); - if (exInfo->m_frameIter.m_crawl.GetCodeInfo()->HasFrameRegister()) - { - ULONG frameOffset = exInfo->m_frameIter.m_crawl.GetCodeInfo()->GetFrameOffsetFromUnwindInfo(); - return pvRegDisplay->pCurrentContext->Rbp - 16 * frameOffset; - } - else - { - return pvRegDisplay->SP; - } -#elif defined(HOST_ARM64) - return pvRegDisplay->SP; -#elif defined(HOST_ARM) - return pvRegDisplay->SP; -#elif defined(HOST_X86) - return pvRegDisplay->SP; -#elif defined(HOST_RISCV64) - return pvRegDisplay->SP; -#elif defined(HOST_LOONGARCH64) - return pvRegDisplay->SP; -#endif -} - #endif // TARGET_WASM // Call catch, finally or filter funclet. @@ -2079,7 +2052,6 @@ DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDIS // Since the actual caller of the funclet is the assembly helper, pass the reference // to the CallerStackFrame instance so that it can be updated. UINT_PTR *pFuncletCallerSP = &(pExInfo->m_csfEHClause.SP); - UINT_PTR establisherFrame = GetEstablisherFrame(pRD, pExInfo); if (isFilterFunclet) { @@ -2089,16 +2061,14 @@ DWORD_PTR EECodeManager::CallFunclet(OBJECTREF throwable, void* pHandler, REGDIS dwResult = CallEHFilterFunclet(OBJECTREFToObject(throwable), GetFP(pRD->pCurrentContext), CastHandlerFn(pfnHandler), - pFuncletCallerSP, - establisherFrame); + pFuncletCallerSP); } else { dwResult = CallEHFunclet(OBJECTREFToObject(throwable), CastHandlerFn(pfnHandler), GetFirstNonVolatileRegisterAddress(pRD->pCurrentContext), - pFuncletCallerSP, - establisherFrame); + pFuncletCallerSP); } #endif // TARGET_WASM diff --git a/src/coreclr/vm/i386/ehhelpers.S b/src/coreclr/vm/i386/ehhelpers.S index 63ba07d3ebf111..3b65e1b384d916 100644 --- a/src/coreclr/vm/i386/ehhelpers.S +++ b/src/coreclr/vm/i386/ehhelpers.S @@ -25,7 +25,6 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // [ebp+12] = PC to invoke // [ebp+16] = address of EDI register in CONTEXT record // used to restore the non-volatile registers of CrawlFrame // [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // [ebp+24] = establisher frame (CallerSP) // // Save the SP of this function @@ -76,7 +75,6 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // [ebp+12] = FP to restore // [ebp+16] = PC to invoke // [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // [ebp+24] = establisher frame (CallerSP) // // Save the SP of this function diff --git a/src/coreclr/vm/i386/ehhelpers.asm b/src/coreclr/vm/i386/ehhelpers.asm index 87fbdb40638ce2..ddae1ff34a7577 100644 --- a/src/coreclr/vm/i386/ehhelpers.asm +++ b/src/coreclr/vm/i386/ehhelpers.asm @@ -29,7 +29,6 @@ _CallEHFunclet@16 proc public ; [ebp+12] = PC to invoke ; [ebp+16] = address of EDI register in CONTEXT record ; used to restore the non-volatile registers of CrawlFrame ; [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; [ebp+24] = establisher frame (CallerSP) ; ; Save the SP of this function @@ -74,7 +73,6 @@ _CallEHFilterFunclet@16 proc public ; [ebp+12] = FP to restore ; [ebp+16] = PC to invoke ; [ebp+20] = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - ; [ebp+24] = establisher frame (CallerSP) ; ; Save the SP of this function diff --git a/src/coreclr/vm/jitinterface.cpp b/src/coreclr/vm/jitinterface.cpp index 484f27f8a4b9e4..f6a3498943cc63 100644 --- a/src/coreclr/vm/jitinterface.cpp +++ b/src/coreclr/vm/jitinterface.cpp @@ -15105,41 +15105,4 @@ void EECodeInfo::GetOffsetsFromUnwindInfo(ULONG* pRSPOffset, ULONG* pRBPOffset) *pRBPOffset = StackOffset; } #undef kRBP - -ULONG EECodeInfo::GetFrameOffsetFromUnwindInfo() -{ - WRAPPER_NO_CONTRACT; - - SUPPORTS_DAC; - - // moduleBase is a target address. - TADDR moduleBase = GetModuleBase(); - - DWORD unwindInfo = RUNTIME_FUNCTION__GetUnwindInfoAddress(GetFunctionEntry()); - - if ((unwindInfo & RUNTIME_FUNCTION_INDIRECT) != 0) - { - unwindInfo = RUNTIME_FUNCTION__GetUnwindInfoAddress(PTR_RUNTIME_FUNCTION(moduleBase + (unwindInfo & ~RUNTIME_FUNCTION_INDIRECT))); - } - - UNWIND_INFO * pInfo = GetUnwindInfoHelper(unwindInfo); - _ASSERTE((pInfo->Flags & UNW_FLAG_CHAININFO) == 0); - - // Either we are not using a frame pointer, or we are using rbp as the frame pointer. - if ( (pInfo->FrameRegister != 0) && (pInfo->FrameRegister != kRBP) ) - { - _ASSERTE(!"GetRbpOffset() - non-RBP frame pointer used, violating assumptions of the security stackwalk cache"); - DebugBreak(); - } - - ULONG frameOffset = pInfo->FrameOffset; -#ifdef UNIX_AMD64_ABI - if ((frameOffset == 15) && (pInfo->UnwindCode[0].UnwindOp == UWOP_SET_FPREG_LARGE)) - { - frameOffset = *(ULONG*)&pInfo->UnwindCode[1]; - } -#endif - - return frameOffset; -} #endif // defined(TARGET_AMD64) diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index a3a859c66d0ad7..aec42b36d7d174 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -830,22 +830,18 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // $a1 = PC to invoke // $a2 = address of s0 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // $a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // $a4 = establisher frame (CallerSP) // // $fp,$ra - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 112, 0 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 96, 0 // Spill callee saved registers // $s0,$s1 - PROLOG_SAVE_REG_PAIR 23, 24, 32 - PROLOG_SAVE_REG_PAIR 25, 26, 48 - PROLOG_SAVE_REG_PAIR 27, 28, 64 - PROLOG_SAVE_REG_PAIR 29, 30, 80 - PROLOG_SAVE_REG 31, 96 - - // Save establisher frame pointer into our stack frame - st.d $a4, $sp, 16 + PROLOG_SAVE_REG_PAIR 23, 24, 16 + PROLOG_SAVE_REG_PAIR 25, 26, 32 + PROLOG_SAVE_REG_PAIR 27, 28, 48 + PROLOG_SAVE_REG_PAIR 29, 30, 64 + PROLOG_SAVE_REG 31, 80 // Save the SP of this function st.d $sp, $a3, 0 @@ -864,13 +860,13 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // Invoke the funclet jirl $ra, $a1, 0 - EPILOG_RESTORE_REG_PAIR 23, 24, 32 - EPILOG_RESTORE_REG_PAIR 25, 26, 48 - EPILOG_RESTORE_REG_PAIR 27, 28, 64 - EPILOG_RESTORE_REG_PAIR 29, 30, 80 - EPILOG_RESTORE_REG 31, 96 + EPILOG_RESTORE_REG_PAIR 23, 24, 16 + EPILOG_RESTORE_REG_PAIR 25, 26, 32 + EPILOG_RESTORE_REG_PAIR 27, 28, 48 + EPILOG_RESTORE_REG_PAIR 29, 30, 64 + EPILOG_RESTORE_REG 31, 80 // $fp,$ra - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 112 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 96 EPILOG_RETURN NESTED_END CallEHFunclet, _TEXT @@ -879,7 +875,7 @@ NESTED_END CallEHFunclet, _TEXT // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // $fp,$ra - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 32 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 16 // On entry: // @@ -887,19 +883,15 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // $a1 = FP of main function // $a2 = PC to invoke // $a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // $a4 = establisher frame (CallerSP) // - // Save establisher frame pointer into our stack frame - st.d $a4, $sp, 16 // Save the SP of this function st.d $fp, $a3, 0 // Restore frame pointer move $fp, $a1 // Invoke the filter funclet - move $a1, $a4 // a1 = CallerSP (older R2R ABI) jirl $ra, $a2, 0 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 16 EPILOG_RETURN NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index a7a26d1e5caedf..0f08700b44c86e 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -682,22 +682,18 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // a1 = PC to invoke // a2 = address of s0 register in CONTEXT record// used to restore the non-volatile registers of CrawlFrame // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // a4 = establisher frame (CallerSP) // - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 144, 0 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 128, 0 // Spill callee saved registers - PROLOG_SAVE_REG_PAIR s1, s2, 32 - PROLOG_SAVE_REG_PAIR s3, s4, 48 - PROLOG_SAVE_REG_PAIR s5, s6, 64 - PROLOG_SAVE_REG_PAIR s7, s8, 80 - PROLOG_SAVE_REG_PAIR s9, s10, 96 - PROLOG_SAVE_REG_PAIR s11, gp, 112 - PROLOG_SAVE_REG tp, 128 - - // Save establisher frame pointer into our stack frame - sd a4, 16(sp) + PROLOG_SAVE_REG_PAIR s1, s2, 16 + PROLOG_SAVE_REG_PAIR s3, s4, 32 + PROLOG_SAVE_REG_PAIR s5, s6, 48 + PROLOG_SAVE_REG_PAIR s7, s8, 64 + PROLOG_SAVE_REG_PAIR s9, s10, 80 + PROLOG_SAVE_REG_PAIR s11, gp, 96 + PROLOG_SAVE_REG tp, 112 // Save the SP of this function sd sp, 0(a3) @@ -720,23 +716,22 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // Invoke the funclet jalr a1 - EPILOG_RESTORE_REG_PAIR s1, s2, 32 - EPILOG_RESTORE_REG_PAIR s3, s4, 48 - EPILOG_RESTORE_REG_PAIR s5, s6, 64 - EPILOG_RESTORE_REG_PAIR s7, s8, 80 - EPILOG_RESTORE_REG_PAIR s9, s10, 96 - EPILOG_RESTORE_REG_PAIR s11, gp, 112 - EPILOG_RESTORE_REG tp, 128 + EPILOG_RESTORE_REG_PAIR s1, s2, 16 + EPILOG_RESTORE_REG_PAIR s3, s4, 32 + EPILOG_RESTORE_REG_PAIR s5, s6, 48 + EPILOG_RESTORE_REG_PAIR s7, s8, 64 + EPILOG_RESTORE_REG_PAIR s9, s10, 80 + EPILOG_RESTORE_REG_PAIR s11, gp, 96 + EPILOG_RESTORE_REG tp, 112 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 144 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 128 EPILOG_RETURN - NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 32 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 16 // On entry: // @@ -744,19 +739,15 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // a1 = FP of main function // a2 = PC to invoke // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. - // a4 = establisher frame (CallerSP) // - // Save establisher frame pointer into our stack frame - sd a4, 16(sp) // Save the SP of this function sd fp, 0(a3) // Restore frame pointer mv fp, a1 // Invoke the filter funclet - mv a1, a4 // a1 = CallerSP (older R2R ABI) jalr a2 - EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 32 + EPILOG_RESTORE_REG_PAIR_INDEXED fp, ra, 16 EPILOG_RETURN NESTED_END CallEHFilterFunclet, _TEXT From e02b5c1433f4cc2231c1e49449d87604125bd920 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 11:21:57 +0200 Subject: [PATCH 28/52] Use symbolic constants --- src/coreclr/vm/amd64/asmhelpers.S | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 947971004f2e73..0212123e719900 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -409,12 +409,12 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler FUNCLET_CALL_PROLOGUE 0, 1 // Restore RBX, RBP, R12, R13, R14, R15 from CONTEXT - mov rbx, [rdx + 0] - mov rbp, [rdx + 16] - mov r12, [rdx + 72] - mov r13, [rdx + 80] - mov r14, [rdx + 88] - mov r15, [rdx + 96] + mov rbx, [rdx + OFFSETOF__CONTEXT__Rbx - OFFSETOF__CONTEXT__Rbx] + mov rbp, [rdx + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] + mov r12, [rdx + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] + mov r13, [rdx + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] + mov r14, [rdx + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] + mov r15, [rdx + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] // Save the SP of this function. mov [rcx], rsp From 0e93d0d2b841d2baa2d24a97ce6aa38cdcf7347c Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 11:57:25 +0200 Subject: [PATCH 29/52] Cleanup the code in gcencode.cpp --- src/coreclr/jit/gcencode.cpp | 37 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 91a7ea6de42d06..edd2994dc11ea7 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3870,13 +3870,14 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz assert(false); } - const int genericContextArgOffset = compiler->lvaCachedGenericContextArgOffset(); - const int offset = compiler->lvaToCallerSPRelativeOffset(genericContextArgOffset, - compiler->isFramePointerUsed()); + const int offset = compiler->lvaCachedGenericContextArgOffset(); #ifdef DEBUG if (compiler->opts.IsOSR()) { + const int callerSpOffset = + compiler->lvaToCallerSPRelativeOffset(offset, compiler->isFramePointerUsed()); + // Sanity check the offset vs saved patchpoint info. // const PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo; @@ -3885,12 +3886,12 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz // subtract off 2 register slots (saved FP, saved RA). // const int osrOffset = ppInfo->GenericContextArgOffset() - 2 * REGSIZE_BYTES; - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // PP info has virtual offset. This is also the caller SP offset. // const int osrOffset = ppInfo->GenericContextArgOffset(); - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #endif } #endif @@ -3903,24 +3904,16 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz { assert(compiler->info.compThisArg != BAD_VAR_NUM); + const int offset = compiler->lvaCachedGenericContextArgOffset(); + +#ifdef DEBUG // OSR can report the root method's frame slot, if that method reported context. // If not, the OSR frame will have saved the needed context. - // - bool useRootFrameSlot = true; - if (compiler->opts.IsOSR()) + if (compiler->opts.IsOSR() && compiler->info.compPatchpointInfo->HasKeptAliveThis()) { - const PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo; + const int callerSpOffset = + compiler->lvaToCallerSPRelativeOffset(offset, compiler->isFramePointerUsed(), true); - useRootFrameSlot = ppInfo->HasKeptAliveThis(); - } - - const int genericContextArgOffset = compiler->lvaCachedGenericContextArgOffset(); - const int offset = compiler->lvaToCallerSPRelativeOffset(genericContextArgOffset, - compiler->isFramePointerUsed(), useRootFrameSlot); - -#ifdef DEBUG - if (compiler->opts.IsOSR() && useRootFrameSlot) - { // Sanity check the offset vs saved patchpoint info. // const PatchpointInfo* const ppInfo = compiler->info.compPatchpointInfo; @@ -3929,17 +3922,17 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz // subtract off 2 register slots (saved FP, saved RA). // const int osrOffset = ppInfo->KeptAliveThisOffset() - 2 * REGSIZE_BYTES; - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) // PP info has virtual offset. This is also the caller SP offset. // const int osrOffset = ppInfo->KeptAliveThisOffset(); - assert(offset == osrOffset); + assert(callerSpOffset == osrOffset); #endif } #endif - gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(genericContextArgOffset, GENERIC_CONTEXTPARAM_THIS); + gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(offset, GENERIC_CONTEXTPARAM_THIS); } if (compiler->getNeedsGSSecurityCookie()) From e3b98b60251ad4afc5b8a19d1692f7ebbb2e9af9 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 11:57:39 +0200 Subject: [PATCH 30/52] Fix NativeAOT x64 --- src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S | 9 ++------- .../nativeaot/Runtime/amd64/ExceptionHandling.asm | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S index c5ce852e46fd0e..cffacee7b358c0 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.S @@ -330,8 +330,7 @@ NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler mov [rax], rcx #endif - mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame - mov rsi, [rsp + locArg0] // rsi <- exception object + mov rdi, [rsp + locArg0] // rsi <- exception object call qword ptr [rsp + locArg1] // call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -470,7 +469,6 @@ NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler mov [rax], rcx #endif - mov rdi, [rsi + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame call qword ptr [rsp + locArg0] // handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 @@ -518,10 +516,7 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler mov rax, [rdx + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - mov rax, rsi // rax <- handler funclet address - mov rsi, rdi // rsi <- exception object - mov rdi, [rdx + OFFSETOF__REGDISPLAY__SP] // rdi <- establisher frame - call rax + call rsi ALTERNATE_ENTRY RhpCallFilterFunclet2 diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm index 741b916f00b904..4d225ba46054d0 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -434,8 +434,7 @@ endif movdqa xmm14,[r8 + OFFSETOF__REGDISPLAY__Xmm + 8*10h] movdqa xmm15,[r8 + OFFSETOF__REGDISPLAY__Xmm + 9*10h] - mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame - mov rdx, [rsp + rsp_offsetof_arguments + 0h] ;; rdx <- exception object + mov rcx, [rsp + rsp_offsetof_arguments + 0h] ;; rcx <- exception object call qword ptr [rsp + rsp_offsetof_arguments + 8h] ;; call handler funclet ALTERNATE_ENTRY RhpCallCatchFunclet2 @@ -639,7 +638,6 @@ if 0 ;; _DEBUG ;; @TODO: temporarily removed because trashing RBP breaks the deb mov [rax], r9 endif - mov rcx, [rdx + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame call qword ptr [rsp + rsp_offsetof_arguments + 0h] ;; handler funclet address ALTERNATE_ENTRY RhpCallFinallyFunclet2 @@ -702,10 +700,7 @@ NESTED_ENTRY RhpCallFilterFunclet, _TEXT mov rax, [r8 + OFFSETOF__REGDISPLAY__pRbp] mov rbp, [rax] - mov rax, rdx ;; rax <- handler funclet address - mov rdx, rcx ;; rdx <- exception object - mov rcx, [r8 + OFFSETOF__REGDISPLAY__SP] ;; rcx <- establisher frame - call rax + call rdx ALTERNATE_ENTRY RhpCallFilterFunclet2 From c315047fa119818c942942ed95d870a503f0a670 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 12:13:03 +0200 Subject: [PATCH 31/52] Apply JIT format --- src/coreclr/jit/gcencode.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index edd2994dc11ea7..7c7a3ccac1a424 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3875,8 +3875,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz #ifdef DEBUG if (compiler->opts.IsOSR()) { - const int callerSpOffset = - compiler->lvaToCallerSPRelativeOffset(offset, compiler->isFramePointerUsed()); + const int callerSpOffset = compiler->lvaToCallerSPRelativeOffset(offset, compiler->isFramePointerUsed()); // Sanity check the offset vs saved patchpoint info. // From 792d01da02f4600147e5f3572f924ae7fca8ed68 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 12:44:14 +0200 Subject: [PATCH 32/52] Fix CallEHFilterFunclet on linux-x64 --- src/coreclr/vm/amd64/asmhelpers.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 0212123e719900..6f0543af85b2e1 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -447,7 +447,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler mov [rcx], rsp // Invoke the filter funclet - call rsi + call rdx FUNCLET_CALL_EPILOGUE From 4317b070ebba5f50a00dee3c88029951684d29c2 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 13:31:48 +0200 Subject: [PATCH 33/52] Build fix --- src/coreclr/jit/gcencode.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index 7c7a3ccac1a424..cd96c8d0a8063b 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3895,7 +3895,7 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz } #endif - gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(genericContextArgOffset, ctxtParamType); + gcInfoEncoderWithLog->SetGenericsInstContextStackSlot(offset, ctxtParamType); } // As discussed above, handle the case where the generics context is obtained via // the method table of "this". From b7b6b0849a5dd583dcc916391cc265aa1b0530a6 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 13:46:59 +0200 Subject: [PATCH 34/52] Attempt to fix unwinding info for ARM64 --- src/coreclr/vm/arm64/asmhelpers.S | 5 +++-- src/coreclr/vm/arm64/asmhelpers.asm | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 5d43c46aa1cb9f..93849bafe4090e 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -369,7 +369,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_INDEXED x29, lr, -96 + PROLOG_SAVE_REG_PAIR_NO_FP x29, lr, -96 // Spill callee saved registers PROLOG_SAVE_REG_PAIR x19, x20, 16 @@ -406,7 +406,7 @@ NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet after restoring Fp register NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -16 + PROLOG_SAVE_REG_PAIR_NO_FP fp, lr, -16 // On entry: // @@ -416,6 +416,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function + mov fp, sp str fp, [x3] // Restore frame pointer mov fp, x1 diff --git a/src/coreclr/vm/arm64/asmhelpers.asm b/src/coreclr/vm/arm64/asmhelpers.asm index 8360b4d774ffdd..bd4f60ed08ea83 100644 --- a/src/coreclr/vm/arm64/asmhelpers.asm +++ b/src/coreclr/vm/arm64/asmhelpers.asm @@ -745,7 +745,7 @@ COMToCLRDispatchHelper_RegSetup ; This helper enables us to call into a filter funclet after restoring Fp register NESTED_ENTRY CallEHFilterFunclet - PROLOG_SAVE_REG_PAIR fp, lr, #-16! + PROLOG_SAVE_REG_PAIR_NO_FP fp, lr, #-16! ; On entry: ; @@ -755,6 +755,7 @@ COMToCLRDispatchHelper_RegSetup ; X3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. ; ; Save the SP of this function + mov fp, sp str fp, [x3] ; Restore frame pointer mov fp, x1 From f630b13e6c1976483cb77023cd5bdcbd5bcaa816 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 14:10:43 +0200 Subject: [PATCH 35/52] Reuse the PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED macro from NativeAOT --- src/coreclr/pal/inc/unixasmmacrosarm64.inc | 7 +++++++ src/coreclr/vm/arm64/asmhelpers.S | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/coreclr/pal/inc/unixasmmacrosarm64.inc b/src/coreclr/pal/inc/unixasmmacrosarm64.inc index 1e9a8a1e2bba7e..640716f8058d5d 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm64.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm64.inc @@ -114,6 +114,13 @@ C_FUNC(\Name\()_End): .endif .endm +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs + stp \reg1, \reg2, [sp, \ofs]! + .cfi_adjust_cfa_offset -\ofs + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 +.endm + .macro EPILOG_RESTORE_REG reg, ofs ldr \reg, [sp, \ofs] .cfi_restore \reg diff --git a/src/coreclr/vm/arm64/asmhelpers.S b/src/coreclr/vm/arm64/asmhelpers.S index 93849bafe4090e..13a18cf7855390 100644 --- a/src/coreclr/vm/arm64/asmhelpers.S +++ b/src/coreclr/vm/arm64/asmhelpers.S @@ -369,7 +369,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body // of method. However, this method needs to be able to change fp before calling funclet. // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_NO_FP x29, lr, -96 + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED x29, lr, -96 // Spill callee saved registers PROLOG_SAVE_REG_PAIR x19, x20, 16 @@ -406,7 +406,7 @@ NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet after restoring Fp register NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_NO_FP fp, lr, -16 + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED fp, lr, -16 // On entry: // From 7be6fff625f00111b428bba03cd573445d8701c4 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 16:31:50 +0200 Subject: [PATCH 36/52] Fix RV64 and LA64 CallEHFilterFunclet unwind info --- src/coreclr/vm/loongarch64/asmhelpers.S | 4 ++-- src/coreclr/vm/riscv64/asmhelpers.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/loongarch64/asmhelpers.S b/src/coreclr/vm/loongarch64/asmhelpers.S index aec42b36d7d174..4d136993665af1 100644 --- a/src/coreclr/vm/loongarch64/asmhelpers.S +++ b/src/coreclr/vm/loongarch64/asmhelpers.S @@ -875,7 +875,7 @@ NESTED_END CallEHFunclet, _TEXT // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // $fp,$ra - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 16 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, 16, 0 // On entry: // @@ -885,7 +885,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // $a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function - st.d $fp, $a3, 0 + st.d $sp, $a3, 0 // Restore frame pointer move $fp, $a1 // Invoke the filter funclet diff --git a/src/coreclr/vm/riscv64/asmhelpers.S b/src/coreclr/vm/riscv64/asmhelpers.S index 0f08700b44c86e..37ab3732fac0b9 100644 --- a/src/coreclr/vm/riscv64/asmhelpers.S +++ b/src/coreclr/vm/riscv64/asmhelpers.S @@ -731,7 +731,7 @@ NESTED_END CallEHFunclet, _TEXT // This helper enables us to call into a filter funclet by passing it the CallerSP to lookup the // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 16 + PROLOG_SAVE_REG_PAIR_INDEXED fp, ra, 16, 0 // On entry: // @@ -741,7 +741,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // a3 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // // Save the SP of this function - sd fp, 0(a3) + sd sp, 0(a3) // Restore frame pointer mv fp, a1 // Invoke the filter funclet From c97c7cf6f0aafdc0e8cf454a948c3ff5623778a7 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 21:28:07 +0200 Subject: [PATCH 37/52] Light version of CallEH[Filter]Funclet for x64 without restoring CONTEXT (matches previous behavior) --- docs/design/coreclr/botr/clr-abi.md | 2 +- src/coreclr/vm/amd64/AsmHelpers.asm | 92 ++++------------------------- src/coreclr/vm/amd64/asmhelpers.S | 54 ++--------------- 3 files changed, 17 insertions(+), 131 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 2d6beef703db41..148af13f280bcc 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -324,7 +324,7 @@ Note that JIT64 does not implement this properly. The C# compiler used to always ## Funclet parameters -For filter funclets the VM sets the frame register to be the same as the parent function. For second pass funclets the VM restores all non-volatile registers. The same convention is used across all platforms. +For filter funclets the VM sets the frame register to be the same as the parent function. For second pass funclets the VM restores only the frame register on AMD64 and all non-volatile registers on all other platforms. Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument (`REG_EXCEPTION_OBJECT`). On AMD64 it is passed in RCX (Windows ABI) or RSI (Unix ABI). On ARM and ARM64 this is the first argument and passed in R0. diff --git a/src/coreclr/vm/amd64/AsmHelpers.asm b/src/coreclr/vm/amd64/AsmHelpers.asm index 8e5449c1092db7..c1315be15ad7fa 100644 --- a/src/coreclr/vm/amd64/AsmHelpers.asm +++ b/src/coreclr/vm/amd64/AsmHelpers.asm @@ -507,54 +507,6 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT jmp METHODDESC_REGISTER LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT -;; -;; Prologue of all funclet calling helpers (CallXXXXFunclet) -;; -FUNCLET_CALL_PROLOGUE macro localsCount, alignStack - PUSH_CALLEE_SAVED_REGISTERS - - arguments_scratch_area_size = 20h - xmm_save_area_size = 10 * 10h ;; xmm6..xmm15 save area - stack_alloc_size = arguments_scratch_area_size + localsCount * 8 + alignStack * 8 + xmm_save_area_size - rsp_offsetof_arguments = stack_alloc_size + 8*8h + 8h - rsp_offsetof_locals = arguments_scratch_area_size + xmm_save_area_size - - alloc_stack stack_alloc_size - - save_xmm128_postrsp xmm6, (arguments_scratch_area_size + 0 * 10h) - save_xmm128_postrsp xmm7, (arguments_scratch_area_size + 1 * 10h) - save_xmm128_postrsp xmm8, (arguments_scratch_area_size + 2 * 10h) - save_xmm128_postrsp xmm9, (arguments_scratch_area_size + 3 * 10h) - save_xmm128_postrsp xmm10, (arguments_scratch_area_size + 4 * 10h) - save_xmm128_postrsp xmm11, (arguments_scratch_area_size + 5 * 10h) - save_xmm128_postrsp xmm12, (arguments_scratch_area_size + 6 * 10h) - save_xmm128_postrsp xmm13, (arguments_scratch_area_size + 7 * 10h) - save_xmm128_postrsp xmm14, (arguments_scratch_area_size + 8 * 10h) - save_xmm128_postrsp xmm15, (arguments_scratch_area_size + 9 * 10h) - - END_PROLOGUE -endm - -;; -;; Epilogue of all funclet calling helpers (CallXXXXFunclet) -;; -FUNCLET_CALL_EPILOGUE macro - movdqa xmm6, [rsp + arguments_scratch_area_size + 0 * 10h] - movdqa xmm7, [rsp + arguments_scratch_area_size + 1 * 10h] - movdqa xmm8, [rsp + arguments_scratch_area_size + 2 * 10h] - movdqa xmm9, [rsp + arguments_scratch_area_size + 3 * 10h] - movdqa xmm10, [rsp + arguments_scratch_area_size + 4 * 10h] - movdqa xmm11, [rsp + arguments_scratch_area_size + 5 * 10h] - movdqa xmm12, [rsp + arguments_scratch_area_size + 6 * 10h] - movdqa xmm13, [rsp + arguments_scratch_area_size + 7 * 10h] - movdqa xmm14, [rsp + arguments_scratch_area_size + 8 * 10h] - movdqa xmm15, [rsp + arguments_scratch_area_size + 9 * 10h] - - add rsp, stack_alloc_size - - POP_CALLEE_SAVED_REGISTERS -endm - ; This helper enables us to call into a funclet after restoring Fp register NESTED_ENTRY CallEHFunclet, _TEXT ; On entry: @@ -565,38 +517,19 @@ NESTED_ENTRY CallEHFunclet, _TEXT ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. ; - FUNCLET_CALL_PROLOGUE 0, 1 + push_nonvol_reg rbp + alloc_stack 20h ; argument scratch space for the call + END_PROLOGUE - ; Restore RBX, RBP, RSI, RDI, R12, R13, R14, R15 from CONTEXT - mov rbx, [r8 + OFFSETOF__CONTEXT__Rbx - OFFSETOF__CONTEXT__Rbx] + ; Restore RBP mov rbp, [r8 + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] - mov rsi, [r8 + OFFSETOF__CONTEXT__Rsi - OFFSETOF__CONTEXT__Rbx] - mov rdi, [r8 + OFFSETOF__CONTEXT__Rdi - OFFSETOF__CONTEXT__Rbx] - mov r12, [r8 + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] - mov r13, [r8 + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] - mov r14, [r8 + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] - mov r15, [r8 + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] - - ; Restore XMM registers from CONTEXT - movdqa xmm6, [r8 + OFFSETOF__CONTEXT__Xmm6 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm7, [r8 + OFFSETOF__CONTEXT__Xmm7 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm8, [r8 + OFFSETOF__CONTEXT__Xmm8 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm9, [r8 + OFFSETOF__CONTEXT__Xmm9 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm10, [r8 + OFFSETOF__CONTEXT__Xmm10 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm11, [r8 + OFFSETOF__CONTEXT__Xmm11 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm12, [r8 + OFFSETOF__CONTEXT__Xmm12 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm13, [r8 + OFFSETOF__CONTEXT__Xmm13 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm14, [r8 + OFFSETOF__CONTEXT__Xmm14 - OFFSETOF__CONTEXT__Rbx] - movdqa xmm15, [r8 + OFFSETOF__CONTEXT__Xmm15 - OFFSETOF__CONTEXT__Rbx] - ; Save the SP of this function. mov [r9], rsp - ; Invoke the funclet call rdx - FUNCLET_CALL_EPILOGUE - + add rsp, 20h + pop rbp ret NESTED_END CallEHFunclet, _TEXT @@ -611,22 +544,19 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT ; R9 = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. ; - FUNCLET_CALL_PROLOGUE 0, 1 + push_nonvol_reg rbp + alloc_stack 20h ; argument scratch space for the call + END_PROLOGUE ; Save the SP of this function mov [r9], rsp - ; Restore RBP to match main function RBP mov rbp, rdx - - ; Move throwable into the second parameter - mov rdx, rcx - ; Invoke the filter funclet call r8 - FUNCLET_CALL_EPILOGUE - + add rsp, 20h + pop rbp ret NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 6f0543af85b2e1..162543c3885637 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -363,39 +363,6 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT jmp METHODDESC_REGISTER LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT - -// -// Prologue of all funclet calling helpers (CallXXXXFunclet) -// -.macro FUNCLET_CALL_PROLOGUE localsCount, alignStack - push_nonvol_reg r15 // save preserved regs for OS stackwalker - push_nonvol_reg r14 // ... - push_nonvol_reg r13 // ... - push_nonvol_reg r12 // ... - push_nonvol_reg rbx // ... - push_nonvol_reg rbp // ... - - stack_alloc_size = \localsCount * 8 + \alignStack * 8 - - alloc_stack stack_alloc_size - - END_PROLOGUE -.endm - -// -// Epilogue of all funclet calling helpers (CallXXXXFunclet) -// -.macro FUNCLET_CALL_EPILOGUE - free_stack stack_alloc_size - - pop_nonvol_reg rbp - pop_nonvol_reg rbx - pop_nonvol_reg r12 - pop_nonvol_reg r13 - pop_nonvol_reg r14 - pop_nonvol_reg r15 -.endm - // This helper enables us to call into a funclet after restoring Fp register NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // On entry: @@ -406,24 +373,16 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // - FUNCLET_CALL_PROLOGUE 0, 1 + push_nonvol_reg rbp - // Restore RBX, RBP, R12, R13, R14, R15 from CONTEXT - mov rbx, [rdx + OFFSETOF__CONTEXT__Rbx - OFFSETOF__CONTEXT__Rbx] + // Restore RBP mov rbp, [rdx + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] - mov r12, [rdx + OFFSETOF__CONTEXT__R12 - OFFSETOF__CONTEXT__Rbx] - mov r13, [rdx + OFFSETOF__CONTEXT__R13 - OFFSETOF__CONTEXT__Rbx] - mov r14, [rdx + OFFSETOF__CONTEXT__R14 - OFFSETOF__CONTEXT__Rbx] - mov r15, [rdx + OFFSETOF__CONTEXT__R15 - OFFSETOF__CONTEXT__Rbx] - // Save the SP of this function. mov [rcx], rsp - // Invoke the funclet call rsi - FUNCLET_CALL_EPILOGUE - + pop_nonvol_reg rbp ret NESTED_END CallEHFunclet, _TEXT @@ -438,18 +397,15 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // RCX = address of the location where the SP of funclet's caller (i.e. this helper) should be saved. // - FUNCLET_CALL_PROLOGUE 0, 1 + push_nonvol_reg rbp // Restore RBP to match main funtion RBP mov rbp, rsi - // Save the SP of this function mov [rcx], rsp - // Invoke the filter funclet call rdx - FUNCLET_CALL_EPILOGUE - + pop_nonvol_reg rbp ret NESTED_END CallEHFilterFunclet, _TEXT From c78ea880d45950296d5974751403f86e0f7d6c05 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 21:50:06 +0200 Subject: [PATCH 38/52] Remove obsolete docs and comments --- docs/design/coreclr/botr/clr-abi.md | 6 ------ docs/design/features/OsrDetailsAndDebugging.md | 2 -- src/coreclr/vm/gc_unwind_x86.inl | 3 --- 3 files changed, 11 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 148af13f280bcc..742119d2b797cd 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -670,12 +670,6 @@ x64 currently saves RBP, RSI and RDI while ARM64 saves just FP and LR. However, EnC remap is not supported inside funclets. The stack layout of funclets does not matter for EnC. -## Considerations with regards to PSPSym - -As explained previously in this document, on x64 we have Initial RSP == PSPSym. For EnC methods, as we disallow remappings after localloc (see below), we furthermore have RBP == PSPSym. -For ARM64 we have Caller SP == PSPSym and the FP points to the previously saved FP/LR pair. For EnC the JIT always sets up the stack frame so that the FP/LR pair is at Caller SP - 16 and does not save any additional callee saves. -These invariants allow the VM to compute new value of the frame pointer and PSPSym after the edit without any additional information. Note that the frame pointer and PSPSym do not change values or location on ARM64. However, EH may be added to a function in which case a new PSPSym needs to be materialized, even on ARM64. Location of PSPSym is found via GC info. - ## Localloc Localloc is allowed in EnC code, but remap is disallowed after the method has executed a localloc instruction. VM uses the invariants above (`RSP == RBP` on x64, `FP + 16 == SP + stack size` on ARM64) to detect whether localloc was executed by the method. diff --git a/docs/design/features/OsrDetailsAndDebugging.md b/docs/design/features/OsrDetailsAndDebugging.md index 3ca35c2b9198eb..e1080fbc8bd792 100644 --- a/docs/design/features/OsrDetailsAndDebugging.md +++ b/docs/design/features/OsrDetailsAndDebugging.md @@ -307,8 +307,6 @@ On Arm64 we have epilog unwind codes and the second SP adjust does not appear to OSR funclets are more or less normal funclets. -On Arm64, to satisfy PSPSym reporting constraints, the funclet frame must be padded to include the Tier0 frame size. This is conceptually similar to the way the funclet frames also pad for homed varargs arguments -- in both cases the padded space is never used, it is just there to ensure the PSPSym ends up at the same caller-SP relative offset for the main function and any funclet. - #### OSR Unwind Info On x64 the prolog unwind includes a phantom SP adjustment at offset 0 for the Tier0 frame. diff --git a/src/coreclr/vm/gc_unwind_x86.inl b/src/coreclr/vm/gc_unwind_x86.inl index 10ce8b99a951eb..21898a4383ba4f 100644 --- a/src/coreclr/vm/gc_unwind_x86.inl +++ b/src/coreclr/vm/gc_unwind_x86.inl @@ -2944,9 +2944,6 @@ bool UnwindEbpDoubleAlignFrame( #ifdef FEATURE_EH_FUNCLETS // Funclets' frame pointers(EBP) are always restored so they can access to main function's local variables. // Therefore the value of EBP is invalid for unwinder so we should use ESP instead. - // TODO If funclet frame layout is changed from CodeGen::genFuncletProlog() and genFuncletEpilog(), - // we need to change here accordingly. It is likely to have changes when introducing PSPSym. - // TODO Currently we assume that ESP of funclet frames is always fixed but actually it could change. if (isFunclet) { baseSP = curESP; From 67ef94a529b0c000ab77046775d37eb090a45ff8 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 23:19:41 +0200 Subject: [PATCH 39/52] Quick test for linux-x64 test failure --- src/coreclr/vm/amd64/asmhelpers.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 162543c3885637..3e4fff413c7896 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -374,6 +374,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // push_nonvol_reg rbp + alloc_stack 20h // Restore RBP mov rbp, [rdx + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] @@ -382,6 +383,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // Invoke the funclet call rsi + free_stack 20h pop_nonvol_reg rbp ret NESTED_END CallEHFunclet, _TEXT @@ -398,6 +400,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // push_nonvol_reg rbp + alloc_stack 20h // Restore RBP to match main funtion RBP mov rbp, rsi @@ -406,6 +409,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet call rdx + free_stack 20h pop_nonvol_reg rbp ret NESTED_END CallEHFilterFunclet, _TEXT From 2c87f2395f5e080039b9131f05d708c57bd99f3b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Wed, 16 Apr 2025 23:39:21 +0200 Subject: [PATCH 40/52] Fix hex notation --- src/coreclr/vm/amd64/asmhelpers.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 3e4fff413c7896..9edb215b1a9c86 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -374,7 +374,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // push_nonvol_reg rbp - alloc_stack 20h + alloc_stack 0x20 // Restore RBP mov rbp, [rdx + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] @@ -383,7 +383,7 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // Invoke the funclet call rsi - free_stack 20h + free_stack 0x20 pop_nonvol_reg rbp ret NESTED_END CallEHFunclet, _TEXT @@ -400,7 +400,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // push_nonvol_reg rbp - alloc_stack 20h + alloc_stack 0x20 // Restore RBP to match main funtion RBP mov rbp, rsi @@ -409,7 +409,7 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet call rdx - free_stack 20h + free_stack 0x20 pop_nonvol_reg rbp ret NESTED_END CallEHFilterFunclet, _TEXT From 298b8503401420b04eec953ecc94db1e36b9cd3c Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 17 Apr 2025 08:44:31 +0200 Subject: [PATCH 41/52] Reduce the scope of throw-in-filter workaround on linux-x64 --- src/coreclr/vm/amd64/asmhelpers.S | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index 9edb215b1a9c86..bef155354e49bc 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -374,7 +374,6 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // push_nonvol_reg rbp - alloc_stack 0x20 // Restore RBP mov rbp, [rdx + OFFSETOF__CONTEXT__Rbp - OFFSETOF__CONTEXT__Rbx] @@ -383,7 +382,6 @@ NESTED_ENTRY CallEHFunclet, _TEXT, NoHandler // Invoke the funclet call rsi - free_stack 0x20 pop_nonvol_reg rbp ret NESTED_END CallEHFunclet, _TEXT @@ -400,16 +398,16 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // push_nonvol_reg rbp - alloc_stack 0x20 + alloc_stack 0x10 // TODO: Why do we need this to make throw-in-filter work - // Restore RBP to match main funtion RBP + // Restore RBP mov rbp, rsi // Save the SP of this function mov [rcx], rsp // Invoke the filter funclet call rdx - free_stack 0x20 + free_stack 0x10 pop_nonvol_reg rbp ret NESTED_END CallEHFilterFunclet, _TEXT From 10ffb86ada57e6a1f64bc54e05d8131f7f7c7b43 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 17 Apr 2025 08:57:37 +0200 Subject: [PATCH 42/52] Fix stack trashing on linux-x64 --- src/coreclr/vm/amd64/asmhelpers.S | 2 -- src/coreclr/vm/exceptionhandling.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/coreclr/vm/amd64/asmhelpers.S b/src/coreclr/vm/amd64/asmhelpers.S index bef155354e49bc..08a2db5cae34ac 100644 --- a/src/coreclr/vm/amd64/asmhelpers.S +++ b/src/coreclr/vm/amd64/asmhelpers.S @@ -398,7 +398,6 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // push_nonvol_reg rbp - alloc_stack 0x10 // TODO: Why do we need this to make throw-in-filter work // Restore RBP mov rbp, rsi @@ -407,7 +406,6 @@ NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler // Invoke the filter funclet call rdx - free_stack 0x10 pop_nonvol_reg rbp ret NESTED_END CallEHFilterFunclet, _TEXT diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index 7bfc55436cbaf4..ea49d416e98141 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -3245,7 +3245,7 @@ extern "C" void * QCALLTYPE CallCatchFunclet(QCall::ObjectHandleOnStack exceptio #endif // HOST_WINDOWS #if defined(HOST_AMD64) - ULONG64* returnAddress = (ULONG64*)targetSp; + ULONG64* returnAddress = (ULONG64*)(targetSp - 8); *returnAddress = pvRegDisplay->pCurrentContext->Rip; #ifdef HOST_WINDOWS if (targetSSP != 0) From 414e31179881f079a4ecea29e0f63032cc35371d Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 17 Apr 2025 09:17:55 +0200 Subject: [PATCH 43/52] Update documentation --- docs/design/coreclr/botr/clr-abi.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 742119d2b797cd..78ce14c656da78 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -324,9 +324,7 @@ Note that JIT64 does not implement this properly. The C# compiler used to always ## Funclet parameters -For filter funclets the VM sets the frame register to be the same as the parent function. For second pass funclets the VM restores only the frame register on AMD64 and all non-volatile registers on all other platforms. - -Catch, Filter, and Filter-handlers also get an Exception object (GC ref) as an argument (`REG_EXCEPTION_OBJECT`). On AMD64 it is passed in RCX (Windows ABI) or RSI (Unix ABI). On ARM and ARM64 this is the first argument and passed in R0. +Catch, Filter, and Filter-handlers get an Exception object (GC ref) as an argument (`REG_EXCEPTION_OBJECT`). On AMD64 it is passed in RCX (Windows ABI) or RSI (Unix ABI). On ARM and ARM64 this is the first argument and passed in R0. ## Funclet Return Values @@ -352,7 +350,11 @@ Some definitions: When an exception occurs, the VM is invoked to do some processing. If the exception is within a "try" region, it eventually calls a corresponding handler (which also includes calling filters). The exception location within a function might be where a "throw" instruction executes, the point of a processor exception like null pointer dereference or divide by zero, or the point of a call where the callee threw an exception but did not catch it. -All non-volatile registers are restored to their values at the exception point. +The VM sets the frame register to be the same as the parent function. + +On CoreCLR/AMD64, all other register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters and the frame register. + +On all other platforms and NativeAOT/AMD64, all non-volatile registers are restored to their values at the exception point. ### Registers on return from a funclet From 3bee83b4fef7f73407607f3c3208a8c3a5976483 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 20 Apr 2025 12:11:40 +0200 Subject: [PATCH 44/52] Prevent peephole optimization in funclet prolog/epilog; it results in unwinding information not matching the code and error when unwinding inside the funclet epilog --- src/coreclr/jit/emit.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index e9f71218c5b960..275dc3080398b3 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2890,6 +2890,7 @@ class emitter else { return (curInsIG->igFlags & IGF_EXTEND) && + ((curInsIG->igFlags & (IGF_FUNCLET_PROLOG | IGF_FUNCLET_EPILOG)) == 0) && ((prevInsIG->igFlags & IGF_NOGCINTERRUPT) == (curInsIG->igFlags & IGF_NOGCINTERRUPT)); } } From 3340a709aa82290db7868a62eb020e69bea883e8 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 20 Apr 2025 12:12:20 +0200 Subject: [PATCH 45/52] Remove support for encoding the PSPSym in GC info --- src/coreclr/gcinfo/gcinfoencoder.cpp | 26 ++------------------------ src/coreclr/inc/gcinfoencoder.h | 9 +++------ src/coreclr/jit/gcencode.cpp | 9 --------- 3 files changed, 5 insertions(+), 39 deletions(-) diff --git a/src/coreclr/gcinfo/gcinfoencoder.cpp b/src/coreclr/gcinfo/gcinfoencoder.cpp index ffac0f713ab77e..959d42d91bc387 100644 --- a/src/coreclr/gcinfo/gcinfoencoder.cpp +++ b/src/coreclr/gcinfo/gcinfoencoder.cpp @@ -357,7 +357,6 @@ GcInfoSize& GcInfoSize::operator+=(const GcInfoSize& other) SecObjSize += other.SecObjSize; GsCookieSize += other.GsCookieSize; GenericsCtxSize += other.GenericsCtxSize; - PspSymSize += other.PspSymSize; StackBaseSize += other.StackBaseSize; ReversePInvokeFrameSize += other.ReversePInvokeFrameSize; FixedAreaSize += other.FixedAreaSize; @@ -406,7 +405,6 @@ void GcInfoSize::Log(DWORD level, const char * header) LogSpew(LF_GCINFO, level, "Prolog/Epilog: %zu\n", ProEpilogSize); LogSpew(LF_GCINFO, level, "SecObj: %zu\n", SecObjSize); LogSpew(LF_GCINFO, level, "GsCookie: %zu\n", GsCookieSize); - LogSpew(LF_GCINFO, level, "PspSym: %zu\n", PspSymSize); LogSpew(LF_GCINFO, level, "GenericsCtx: %zu\n", GenericsCtxSize); LogSpew(LF_GCINFO, level, "StackBase: %zu\n", StackBaseSize); LogSpew(LF_GCINFO, level, "FixedArea: %zu\n", FixedAreaSize); @@ -471,7 +469,6 @@ template TGcInfoEncoder::TGcInfoEncode m_GSCookieValidRangeStart = 0; _ASSERTE(sizeof(m_GSCookieValidRangeEnd) == sizeof(UINT32)); m_GSCookieValidRangeEnd = (UINT32) (-1); // == UINT32.MaxValue - m_PSPSymStackSlot = NO_PSP_SYM; m_GenericsInstContextStackSlot = NO_GENERICS_INST_CONTEXT; m_contextParamType = GENERIC_CONTEXTPARAM_NONE; @@ -702,14 +699,6 @@ template void TGcInfoEncoder::SetGSCoo m_GSCookieValidRangeEnd = validRangeEnd; } -template void TGcInfoEncoder::SetPSPSymStackSlot( INT32 spOffsetPSPSym ) -{ - _ASSERTE( spOffsetPSPSym != NO_PSP_SYM ); - _ASSERTE( m_PSPSymStackSlot == NO_PSP_SYM || m_PSPSymStackSlot == spOffsetPSPSym ); - - m_PSPSymStackSlot = spOffsetPSPSym; -} - template void TGcInfoEncoder::SetGenericsInstContextStackSlot( INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type) { _ASSERTE( spOffsetGenericsContext != NO_GENERICS_INST_CONTEXT); @@ -941,7 +930,7 @@ template void TGcInfoEncoder::Build() UINT32 hasContextParamType = (m_GenericsInstContextStackSlot != NO_GENERICS_INST_CONTEXT); UINT32 hasReversePInvokeFrame = (m_ReversePInvokeFrameSlot != NO_REVERSE_PINVOKE_FRAME); - BOOL slimHeader = (!m_IsVarArg && !hasGSCookie && (m_PSPSymStackSlot == NO_PSP_SYM) && + BOOL slimHeader = (!m_IsVarArg && !hasGSCookie && !hasContextParamType && (m_InterruptibleRanges.Count() == 0) && !hasReversePInvokeFrame && ((m_StackBaseRegister == NO_STACK_BASE_REGISTER) || (GcInfoEncoding::NORMALIZE_STACK_BASE_REGISTER(m_StackBaseRegister) == 0))) && #ifdef TARGET_AMD64 @@ -970,7 +959,7 @@ template void TGcInfoEncoder::Build() GCINFO_WRITE(m_Info1, (m_IsVarArg ? 1 : 0), 1, FlagsSize); GCINFO_WRITE(m_Info1, 0 /* unused - was hasSecurityObject */, 1, FlagsSize); GCINFO_WRITE(m_Info1, (hasGSCookie ? 1 : 0), 1, FlagsSize); - GCINFO_WRITE(m_Info1, ((m_PSPSymStackSlot != NO_PSP_SYM) ? 1 : 0), 1, FlagsSize); + GCINFO_WRITE(m_Info1, 0 /* unused - was hasPSPSymStackSlot */, 1, FlagsSize); GCINFO_WRITE(m_Info1, m_contextParamType, 2, FlagsSize); #if defined(TARGET_LOONGARCH64) assert(m_StackBaseRegister == 22 || 3 == m_StackBaseRegister); @@ -1037,17 +1026,6 @@ template void TGcInfoEncoder::Build() } - // Encode the offset to the PSPSym. - // The PSPSym is relative to the caller SP on IA64 and the initial stack pointer before stack allocations on X64. - if(m_PSPSymStackSlot != NO_PSP_SYM) - { - _ASSERTE(!slimHeader); -#ifdef _DEBUG - LOG((LF_GCINFO, LL_INFO1000, "Parent PSP at " FMT_STK "\n", DBG_STK(m_PSPSymStackSlot))); -#endif - GCINFO_WRITE_VARL_S(m_Info1, GcInfoEncoding::NORMALIZE_STACK_SLOT(m_PSPSymStackSlot), GcInfoEncoding::PSP_SYM_STACK_SLOT_ENCBASE, PspSymSize); - } - // Encode the offset to the generics type context. if(m_GenericsInstContextStackSlot != NO_GENERICS_INST_CONTEXT) { diff --git a/src/coreclr/inc/gcinfoencoder.h b/src/coreclr/inc/gcinfoencoder.h index f147d9566e9e87..ce07197a8447bc 100644 --- a/src/coreclr/inc/gcinfoencoder.h +++ b/src/coreclr/inc/gcinfoencoder.h @@ -23,7 +23,7 @@ - Flag: isVarArg, unused (was hasSecurityObject), hasGSCookie, - hasPSPSymStackSlot, + unused (was hasPSPSymStackSlot), hasGenericsInstContextStackSlot, hasStackBaseregister, wantsReportOnlyLeaf (AMD64 use only), @@ -34,9 +34,9 @@ - CodeLength - Prolog (if hasGenericsInstContextStackSlot || hasGSCookie) - Epilog (if hasGSCookie) - - SecurityObjectStackSlot (if any) + - SecurityObjectStackSlot (if any; no longer used) - GSCookieStackSlot (if any) - - PSPSymStackSlot (if any) + - PSPSymStackSlot (if any; no longer used) - GenericsInstContextStackSlot (if any) - StackBaseRegister (if any) - SizeOfEditAndContinuePreservedArea (if any) @@ -128,7 +128,6 @@ struct GcInfoSize size_t ProEpilogSize; size_t SecObjSize; size_t GsCookieSize; - size_t PspSymSize; size_t GenericsCtxSize; size_t StackBaseSize; size_t ReversePInvokeFrameSize; @@ -408,7 +407,6 @@ class TGcInfoEncoder void SetPrologSize( UINT32 prologSize ); void SetGSCookieStackSlot( INT32 spOffsetGSCookie, UINT32 validRangeStart, UINT32 validRangeEnd ); - void SetPSPSymStackSlot( INT32 spOffsetPSPSym ); void SetGenericsInstContextStackSlot( INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type); void SetReversePInvokeFrameSlot(INT32 spOffset); void SetIsVarArg(); @@ -492,7 +490,6 @@ class TGcInfoEncoder INT32 m_GSCookieStackSlot; UINT32 m_GSCookieValidRangeStart; UINT32 m_GSCookieValidRangeEnd; - INT32 m_PSPSymStackSlot; INT32 m_GenericsInstContextStackSlot; GENERIC_CONTEXTPARAM_TYPE m_contextParamType; UINT32 m_CodeLength; diff --git a/src/coreclr/jit/gcencode.cpp b/src/coreclr/jit/gcencode.cpp index cd96c8d0a8063b..15b61746e453f6 100644 --- a/src/coreclr/jit/gcencode.cpp +++ b/src/coreclr/jit/gcencode.cpp @@ -3749,15 +3749,6 @@ class GcInfoEncoderWithLogging } } - void SetPSPSymStackSlot(INT32 spOffsetPSPSym) - { - m_gcInfoEncoder->SetPSPSymStackSlot(spOffsetPSPSym); - if (m_doLogging) - { - printf("Set PSPSym stack slot to %d.\n", spOffsetPSPSym); - } - } - void SetGenericsInstContextStackSlot(INT32 spOffsetGenericsContext, GENERIC_CONTEXTPARAM_TYPE type) { m_gcInfoEncoder->SetGenericsInstContextStackSlot(spOffsetGenericsContext, type); From 5d7a4c077fa5124c492f9aada59daf4e5f8c56b1 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 20 Apr 2025 12:12:40 +0200 Subject: [PATCH 46/52] Minor doc update --- docs/design/coreclr/botr/clr-abi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index 78ce14c656da78..c48c0ed6a3bb66 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -350,7 +350,7 @@ Some definitions: When an exception occurs, the VM is invoked to do some processing. If the exception is within a "try" region, it eventually calls a corresponding handler (which also includes calling filters). The exception location within a function might be where a "throw" instruction executes, the point of a processor exception like null pointer dereference or divide by zero, or the point of a call where the callee threw an exception but did not catch it. -The VM sets the frame register to be the same as the parent function. +The VM sets the frame register to be the same as the parent function. This allows the funclets to access local variables using frame-relative addresses. On CoreCLR/AMD64, all other register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters and the frame register. From f41cb02c1dd0999b9f854c3922fb1a4444afac1d Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 20 Apr 2025 17:05:52 +0200 Subject: [PATCH 47/52] Revert "Prevent peephole optimization in funclet prolog/epilog; it results in unwinding information not matching the code and error when unwinding inside the funclet epilog" This reverts commit 3bee83b4fef7f73407607f3c3208a8c3a5976483. --- src/coreclr/jit/emit.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 275dc3080398b3..e9f71218c5b960 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -2890,7 +2890,6 @@ class emitter else { return (curInsIG->igFlags & IGF_EXTEND) && - ((curInsIG->igFlags & (IGF_FUNCLET_PROLOG | IGF_FUNCLET_EPILOG)) == 0) && ((prevInsIG->igFlags & IGF_NOGCINTERRUPT) == (curInsIG->igFlags & IGF_NOGCINTERRUPT)); } } From 8d17a6a5db9a244daff4d20ffc62d81bad75d80a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 20 Apr 2025 15:58:04 +0200 Subject: [PATCH 48/52] don't optimize prologs/epilogues in OptimizePostIndexed --- src/coreclr/jit/emitarm64.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ea9a9b53b2797e..b64e7e3444e10b 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17406,6 +17406,12 @@ bool emitter::OptimizePostIndexed(instruction ins, regNumber reg, ssize_t imm, e return false; } + if (emitComp->compGeneratingUnwindProlog || emitComp->compGeneratingUnwindEpilog) + { + // Don't remove instructions while generating "unwind" part of prologs or epilogs + return false; + } + // Cannot allow post indexing if the load itself is already modifying the // register. regNumber loadStoreDataReg = emitLastIns->idReg1(); From fba3040c345a8c31ed7d3792ff621cbb51ea260b Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 20 Apr 2025 21:45:23 +0200 Subject: [PATCH 49/52] R2R / GC info versioning --- src/coreclr/inc/gcinfodecoder.h | 4 ++-- src/coreclr/inc/readytorun.h | 7 +++++-- src/coreclr/vm/gcinfodecoder.cpp | 4 +++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/coreclr/inc/gcinfodecoder.h b/src/coreclr/inc/gcinfodecoder.h index ff129026098723..49ca335b69e2b3 100644 --- a/src/coreclr/inc/gcinfodecoder.h +++ b/src/coreclr/inc/gcinfodecoder.h @@ -218,7 +218,7 @@ enum GcInfoDecoderFlags DECODE_INTERRUPTIBILITY = 0x08, DECODE_GC_LIFETIMES = 0x10, DECODE_NO_VALIDATION = 0x20, - DECODE_PSP_SYM = 0x40, + DECODE_PSP_SYM = 0x40, // Unused starting with v4 format DECODE_GENERICS_INST_CONTEXT = 0x80, // stack location of instantiation context for generics // (this may be either the 'this' ptr or the instantiation secret param) DECODE_GS_COOKIE = 0x100, // stack location of the GS cookie @@ -237,7 +237,7 @@ enum GcInfoHeaderFlags GC_INFO_IS_VARARG = 0x1, // unused = 0x2, // was GC_INFO_HAS_SECURITY_OBJECT GC_INFO_HAS_GS_COOKIE = 0x4, - GC_INFO_HAS_PSP_SYM = 0x8, + GC_INFO_HAS_PSP_SYM = 0x8, // Unused starting with v4 format GC_INFO_HAS_GENERICS_INST_CONTEXT_MASK = 0x30, GC_INFO_HAS_GENERICS_INST_CONTEXT_NONE = 0x00, GC_INFO_HAS_GENERICS_INST_CONTEXT_MT = 0x10, diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index b097179f9bf67c..61e9f50504c3eb 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -19,10 +19,10 @@ // src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h // If you update this, ensure you run `git grep MINIMUM_READYTORUN_MAJOR_VERSION` // and handle pending work. -#define READYTORUN_MAJOR_VERSION 12 +#define READYTORUN_MAJOR_VERSION 13 #define READYTORUN_MINOR_VERSION 0x0000 -#define MINIMUM_READYTORUN_MAJOR_VERSION 12 +#define MINIMUM_READYTORUN_MAJOR_VERSION 13 // R2R Version 2.1 adds the InliningInfo section // R2R Version 2.2 adds the ProfileDataInfo section @@ -40,6 +40,9 @@ // R2R Version 10.1 adds Unbox_TypeTest helper // R2R Version 11 uses GCInfo v4, which encodes safe points without -1 offset and does not track return kinds in GCInfo // R2R Version 12 requires all return buffers to be always on the stack +// R2R Version 13 removes usage of PSPSym, changes ABI for funclets to match NativeAOT, changes register for +// exception parameter on AMD64, and redefines generics instance context stack slot in GCInfo v4 +// to be SP/FP relative struct READYTORUN_CORE_HEADER { diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 044969b393363d..39a06fa3e233fe 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -169,13 +169,15 @@ template bool TGcInfoDecoder::Predecod return true; } +#ifdef DECODE_OLD_FORMATS // Decode the offset to the PSPSym. // The PSPSym is relative to the caller SP on IA64 and the initial stack pointer before any stack allocation on X64 (InitialSP). - if (m_headerFlags & GC_INFO_HAS_PSP_SYM) + if (Version() < 4 && (m_headerFlags & GC_INFO_HAS_PSP_SYM)) { m_PSPSymStackSlot = GcInfoEncoding::DENORMALIZE_STACK_SLOT((INT32)m_Reader.DecodeVarLengthSigned(GcInfoEncoding::PSP_SYM_STACK_SLOT_ENCBASE)); } else +#endif { m_PSPSymStackSlot = NO_PSP_SYM; } From 52b3fcfcbaebf050134c520f932b3046d4ab42d8 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 20 Apr 2025 22:33:48 +0200 Subject: [PATCH 50/52] Bump R2R version on two more places --- src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h | 2 +- src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h index 305901d7626a59..033331ece0656f 100644 --- a/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h +++ b/src/coreclr/nativeaot/Runtime/inc/ModuleHeaders.h @@ -11,7 +11,7 @@ struct ReadyToRunHeaderConstants { static const uint32_t Signature = 0x00525452; // 'RTR' - static const uint32_t CurrentMajorVersion = 12; + static const uint32_t CurrentMajorVersion = 13; static const uint32_t CurrentMinorVersion = 0; }; diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs index 10ed724001d54d..bf24777abc20c9 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs @@ -15,7 +15,7 @@ internal struct ReadyToRunHeaderConstants { public const uint Signature = 0x00525452; // 'RTR' - public const ushort CurrentMajorVersion = 12; + public const ushort CurrentMajorVersion = 13; public const ushort CurrentMinorVersion = 0; } #if READYTORUN From d78b7e54ce09a0dbf06bf753642fc28d9a50f2a1 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 21 Apr 2025 07:58:09 +0200 Subject: [PATCH 51/52] Save R11 in CallEHFilterFunclet on ARM --- src/coreclr/vm/arm/ehhelpers.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/arm/ehhelpers.S b/src/coreclr/vm/arm/ehhelpers.S index 006ade52bbda70..ab75b1cb6c1ef9 100644 --- a/src/coreclr/vm/arm/ehhelpers.S +++ b/src/coreclr/vm/arm/ehhelpers.S @@ -126,8 +126,7 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // frame pointer for accessing the locals in the parent method. NESTED_ENTRY CallEHFilterFunclet, _TEXT, NoHandler - PROLOG_PUSH "{r7, lr}" - PROLOG_STACK_SAVE r7 + PROLOG_PUSH "{r11, lr}" // On entry: // @@ -143,6 +142,6 @@ GenerateRedirectedStubWithFrame RedirectForThreadAbort, RedirectForThreadAbort2 // Invoke the filter funclet blx r2 - EPILOG_POP "{r7, pc}" + EPILOG_POP "{r11, pc}" NESTED_END CallEHFilterFunclet, _TEXT From 83efb78a98f10b14a6f078381582966e0a18ede4 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Mon, 21 Apr 2025 08:00:42 +0200 Subject: [PATCH 52/52] Apply doc suggestions --- docs/design/coreclr/botr/clr-abi.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/design/coreclr/botr/clr-abi.md b/docs/design/coreclr/botr/clr-abi.md index c48c0ed6a3bb66..0875c152259e14 100644 --- a/docs/design/coreclr/botr/clr-abi.md +++ b/docs/design/coreclr/botr/clr-abi.md @@ -352,9 +352,9 @@ When an exception occurs, the VM is invoked to do some processing. If the except The VM sets the frame register to be the same as the parent function. This allows the funclets to access local variables using frame-relative addresses. -On CoreCLR/AMD64, all other register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters and the frame register. +For filter funclets and on CoreCLR/AMD64 for all funclets, all other register values that existed at the exception point in the corresponding "try" region are trashed on entry to the funclet. That is, the only registers that have known values are those of the funclet parameters and the frame register. -On all other platforms and NativeAOT/AMD64, all non-volatile registers are restored to their values at the exception point. +For other funclets on all platforms except CoreCLR/AMD64, all non-volatile registers are restored to their values at the exception point. The JIT codegen [does not take advantage of it currently](https://github.com/dotnet/runtime/pull/114630#issuecomment-2810210759). ### Registers on return from a funclet