From aba8f69a920c64549d8d6b76abe54b890df2b72b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 20 Apr 2024 20:29:28 +0200 Subject: [PATCH 01/10] JIT: Rewrite initial parameter frame layout in terms of new ABI info Rewrite `lvaAssignVirtualFrameOffsetsToArgs` to make use of the ABI information that was already computed as part of ABI classification in the frontend. --- src/coreclr/jit/abi.cpp | 17 +- src/coreclr/jit/abi.h | 87 +++- src/coreclr/jit/compiler.h | 7 +- src/coreclr/jit/lclvars.cpp | 760 +++++++--------------------------- src/coreclr/jit/targetx86.cpp | 25 +- 5 files changed, 275 insertions(+), 621 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index fd899b899546b1..9c9cc81c3a8b1f 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -71,14 +71,15 @@ regMaskTP ABIPassingSegment::GetRegisterMask() const // Offset relative to the first stack argument. // // Remarks: -// On x86, where arguments are pushed in order and thus come in reverse order -// in the callee, this is the offset to subtract from the top of the stack to -// get the argument's address. By top of the stack is meant esp on entry + 4 -// for the return address + total size of stack arguments. In varargs methods -// the varargs cookie contains the information required to allow the -// computation of the total size of stack arguments. -// -// Outside x86 this is the offset to add to the first argument's address. +// On x86, for the managed ABI where arguments are pushed in order and thus +// come in reverse order in the callee, this is the offset to subtract from +// the top of the stack to get the argument's address. By top of the stack is +// meant esp on entry + 4 for the return address + total size of stack +// arguments. In varargs methods the varargs cookie contains the information +// required to allow the computation of the total size of stack arguments. +// +// Outside the managed x86 ABI this is the offset to add to the first +// argument's address. // unsigned ABIPassingSegment::GetStackOffset() const { diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index ac0ad5090dcf2b..9e42dd8c4ad373 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -106,12 +106,23 @@ struct ClassifierInfo class X86Classifier { - RegisterQueue m_regs; - unsigned m_stackArgSize = 0; + const ClassifierInfo& m_info; + RegisterQueue m_regs; + unsigned m_stackArgSize = 0; public: X86Classifier(const ClassifierInfo& info); + unsigned StackSize() + { + return m_stackArgSize; + } + + unsigned StackAlignment() + { + return 4; + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -122,11 +133,21 @@ class WinX64Classifier { RegisterQueue m_intRegs; RegisterQueue m_floatRegs; - unsigned m_stackArgSize = 0; + unsigned m_stackArgSize = 32; public: WinX64Classifier(const ClassifierInfo& info); + unsigned StackSize() + { + return m_stackArgSize; + } + + unsigned StackAlignment() + { + return 16; + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -142,6 +163,16 @@ class SysVX64Classifier public: SysVX64Classifier(const ClassifierInfo& info); + unsigned StackSize() + { + return m_stackArgSize; + } + + unsigned StackAlignment() + { + return 16; + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -158,6 +189,16 @@ class Arm64Classifier public: Arm64Classifier(const ClassifierInfo& info); + unsigned StackSize() + { + return m_stackArgSize; + } + + unsigned StackAlignment() + { + return 16; + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -182,6 +223,16 @@ class Arm32Classifier public: Arm32Classifier(const ClassifierInfo& info); + unsigned StackSize() + { + return m_stackArgSize; + } + + unsigned StackAlignment() + { + return 8; + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -198,6 +249,16 @@ class RiscV64Classifier public: RiscV64Classifier(const ClassifierInfo& info); + unsigned StackSize() + { + return m_stackArgSize; + } + + unsigned StackAlignment() + { + return 16; + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -214,6 +275,16 @@ class LoongArch64Classifier public: LoongArch64Classifier(const ClassifierInfo& info); + unsigned StackSize() + { + return m_stackArgSize; + } + + unsigned StackAlignment() + { + return 16; + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -247,6 +318,16 @@ class SwiftABIClassifier { } + unsigned StackSize() + { + return m_classifier.StackSize(); + } + + unsigned StackAlignment() + { + return m_classifier.StackAlignment(); + } + ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 819c20629de134..2b590b25e14289 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3784,6 +3784,7 @@ class Compiler unsigned lvaTableCnt; // lvaTable size (>= lvaCount) ABIPassingInformation* lvaParameterPassingInfo; + unsigned lvaParameterStackSize; unsigned lvaTrackedCount; // actual # of locals being tracked unsigned lvaTrackedCountInSizeTUnits; // min # of size_t's sufficient to hold a bit for all the locals being tracked @@ -3931,11 +3932,7 @@ class Compiler void lvaUpdateArgWithInitialReg(LclVarDsc* varDsc); void lvaUpdateArgsWithInitialReg(); void lvaAssignVirtualFrameOffsetsToArgs(); -#ifdef UNIX_AMD64_ABI - int lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs, int* callerArgOffset); -#else // !UNIX_AMD64_ABI - int lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, unsigned argSize, int argOffs); -#endif // !UNIX_AMD64_ABI + bool lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(unsigned lclNum, int* offset); void lvaAssignVirtualFrameOffsetsToLocals(); bool lvaParamHasLocalStackSpace(unsigned lclNum); int lvaAllocLocalAndSetVirtualOffset(unsigned lclNum, unsigned size, int stkOffs); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index c101955fdea7ed..eee94a74a3a1d5 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -36,6 +36,7 @@ unsigned Compiler::s_lvaDoubleAlignedProcsCount = 0; void Compiler::lvaInit() { lvaParameterPassingInfo = nullptr; + lvaParameterStackSize = 0; /* We haven't allocated stack variables yet */ lvaRefCountState = RCS_INVALID; @@ -473,9 +474,6 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) codeGen->intRegState.rsCalleeRegArgCount = varDscInfo->intRegArgNum; codeGen->floatRegState.rsCalleeRegArgCount = varDscInfo->floatRegArgNum; - // Now we have parameters created in the right order. Figure out how they're passed. - lvaClassifyParameterABI(); - #if FEATURE_FASTTAILCALL // Save the stack usage information // We can get register usage information using codeGen->intRegState and @@ -483,6 +481,9 @@ void Compiler::lvaInitArgs(InitVarDscInfo* varDscInfo) info.compArgStackSize = varDscInfo->stackArgSize; #endif // FEATURE_FASTTAILCALL + // Now we have parameters created in the right order. Figure out how they're passed. + lvaClassifyParameterABI(); + // The total argument size must be aligned. noway_assert((compArgSize % TARGET_POINTER_SIZE) == 0); @@ -1724,6 +1725,19 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) } #endif } + + lvaParameterStackSize = roundUp(classifier.StackSize(), classifier.StackAlignment()); + +#if FEATURE_FASTTAILCALL + unsigned oldStackSize = info.compArgStackSize; + +#ifdef WINDOWS_AMD64_ABI + // Old info does not take 4 shadow slots on win-x64 into account. + oldStackSize += 32; +#endif + + assert(lvaParameterStackSize == roundUp(oldStackSize, classifier.StackAlignment())); +#endif } //----------------------------------------------------------------------------- @@ -1870,12 +1884,18 @@ void Compiler::lvaClassifyParameterABI() { assert(reg == REG_STK); + unsigned dscStackOffset = (unsigned)dsc->GetStackOffset(); +#ifdef WINDOWS_AMD64_ABI + // The LclVarDsc value does not account for the 4 shadow slots allocated by the caller. + dscStackOffset += 32; +#endif + // On x86, varargs methods access stack args off of a base pointer, and the // first stack arg is not considered to be at offset 0. // TODO-Cleanup: Unify things so that x86 is consistent with other platforms // here and change fgMorphExpandStackArgForVarArgs to account for that. #ifndef TARGET_X86 - assert((unsigned)dsc->GetStackOffset() == expected.GetStackOffset()); + assert(dscStackOffset == expected.GetStackOffset()); #endif } } @@ -5853,642 +5873,166 @@ void Compiler::lvaUpdateArgsWithInitialReg() } } -/***************************************************************************** - * lvaAssignVirtualFrameOffsetsToArgs() : Assign virtual stack offsets to the - * arguments, and implicit arguments (this ptr, return buffer, generics, - * and varargs). - */ +//----------------------------------------------------------------------------- +// lvaAssignVirtualFrameOffsetsToArgs: +// Assign virtual frame offsets to the incoming parameters. +// void Compiler::lvaAssignVirtualFrameOffsetsToArgs() { - unsigned lclNum = 0; - int argOffs = 0; -#ifdef UNIX_AMD64_ABI - int callerArgOffset = 0; -#endif // UNIX_AMD64_ABI - - /* - Assign stack offsets to arguments (in reverse order of passing). - - This means that if we pass arguments left->right, we start at - the end of the list and work backwards, for right->left we start - with the first argument and move forward. - - This is all relative to our Virtual '0' - */ - - if (info.compArgOrder == Target::ARG_ORDER_L2R) - { - argOffs = compArgSize; - } - - /* Update the argOffs to reflect arguments that are passed in registers */ - - noway_assert(codeGen->intRegState.rsCalleeRegArgCount <= MAX_REG_ARG); - noway_assert(compAppleArm64Abi() || compArgSize >= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES); - - if (info.compArgOrder == Target::ARG_ORDER_L2R) - { - argOffs -= codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES; - } - - // Update the arg initial register locations. - lvaUpdateArgsWithInitialReg(); - -#ifdef SWIFT_SUPPORT - if (info.compCallConv == CorInfoCallConvExtension::Swift) - { - // We already assigned argument offsets in lvaClassifyParameterABI. - // Just get them from there. - // TODO-Cleanup: We can use similar logic for all backends once we have - // the new ABI info for all targets. - for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) - { - LclVarDsc* dsc = lvaGetDesc(lclNum); - const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(lclNum); - - if (abiInfo.HasExactlyOneStackSegment()) - { - dsc->SetStackOffset(abiInfo.Segments[0].GetStackOffset()); - } - } - return; - } -#endif - - /* Is there a "this" argument? */ - - if (!info.compIsStatic) - { - noway_assert(lclNum == info.compThisArg); -#ifndef TARGET_X86 - argOffs = - lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); -#endif // TARGET_X86 - lclNum++; - } - - unsigned userArgsToSkip = 0; -#if !defined(TARGET_ARM) - // In the native instance method calling convention on Windows, - // the this parameter comes before the hidden return buffer parameter. - // So, we want to process the native "this" parameter before we process - // the native return buffer parameter. - if (TargetOS::IsWindows && callConvIsInstanceMethodCallConv(info.compCallConv)) - { -#ifdef TARGET_X86 - if (!lvaTable[lclNum].lvIsRegArg) - { - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs); - } -#elif !defined(UNIX_AMD64_ABI) - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs); -#endif // TARGET_X86 - lclNum++; - userArgsToSkip++; - } + int relativeZero = 0; +#ifdef TARGET_ARM + // arm32 is special and has the concept of "prespill" where we generate + // code in the callee to spill the argument registers as the very first + // thing, and consider those to be actually passed by the caller. The + // virtual 0 is actually below these prespills. + // TODO-Cleanup: Unify arm32 with everyone else. arm64/RISCV64/LA64 also + // needs a similar mechanism for split parameters, but they do not consider + // the "virtual 0" to be below the prespills, which simplifies things + // considerably. + regMaskTP prespilled = codeGen->regSet.rsMaskPreSpillRegs(true); + JITDUMP("Prespill regs is "); + DBEXEC(VERBOSE, dspRegMask(prespilled)); + JITDUMP("\n"); + relativeZero = genCountBits(prespilled) * TARGET_POINTER_SIZE; #endif - /* if we have a hidden buffer parameter, that comes here */ - - if (info.compRetBuffArg != BAD_VAR_NUM) - { - noway_assert(lclNum == info.compRetBuffArg); - argOffs = - lvaAssignVirtualFrameOffsetToArg(lclNum, REGSIZE_BYTES, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); - lclNum++; - } - -#if USER_ARGS_COME_LAST - - //@GENERICS: extra argument for instantiation info - if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) - { - noway_assert(lclNum == info.compTypeCtxtArg); - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES, - argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); - } - - if (info.compIsVarArgs) - { - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES, - argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); - } - -#endif // USER_ARGS_COME_LAST - - CORINFO_ARG_LIST_HANDLE argLst = info.compMethodInfo->args.args; - unsigned argSigLen = info.compMethodInfo->args.numArgs; - // Skip any user args that we've already processed. - assert(userArgsToSkip <= argSigLen); - argSigLen -= userArgsToSkip; - for (unsigned i = 0; i < userArgsToSkip; i++, argLst = info.compCompHnd->getArgNext(argLst)) + for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++) { - ; - } + LclVarDsc* dsc = lvaGetDesc(lclNum); -#ifdef TARGET_ARM - // - // struct_n { int; int; ... n times }; - // - // Consider signature: - // - // Foo (float a,double b,float c,double d,float e,double f,float g,double h, - // float i,double j,float k,double l,struct_3 m) { } - // - // Basically the signature is: (all float regs full, 1 double, struct_3); - // - // The double argument occurs before pre spill in the argument iteration and - // computes an argOffset of 0. struct_3 offset becomes 8. This is wrong. - // Because struct_3 is prespilled and double occurs after prespill. - // The correct offsets are double = 16 (aligned stk), struct_3 = 0..12, - // Offset 12 will be skipped for double alignment of double. - // - // Another example is (struct_2, all float regs full, double, struct_2); - // Here, notice the order is similarly messed up because of 2 pre-spilled - // struct_2. - // - // Succinctly, - // ARG_INDEX(i) > ARG_INDEX(j) DOES NOT IMPLY |ARG_OFFSET(i)| > |ARG_OFFSET(j)| - // - // Therefore, we'll do a two pass offset calculation, one that considers pre-spill - // and the next, stack args. - // - - unsigned argLcls = 0; - - // Take care of pre spill registers first. - regMaskTP preSpillMask = codeGen->regSet.rsMaskPreSpillRegs(false); - regMaskTP tempMask = RBM_NONE; - for (unsigned i = 0, preSpillLclNum = lclNum; i < argSigLen; ++i, ++preSpillLclNum) - { - if (lvaIsPreSpilled(preSpillLclNum, preSpillMask)) + int startOffset; + if (lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(lclNum, &startOffset)) { - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &argClass)); - unsigned argSize = eeGetArgSize(argTypeJit, argClass); - argOffs = lvaAssignVirtualFrameOffsetToArg(preSpillLclNum, argSize, argOffs); - argLcls++; + dsc->SetStackOffset(startOffset + relativeZero); + JITDUMP("Set V%02u to offset %d\n", lclNum, startOffset); - // Early out if we can. If size is 8 and base reg is 2, then the mask is 0x1100 - tempMask |= ((((1 << (roundUp(argSize, TARGET_POINTER_SIZE) / REGSIZE_BYTES))) - 1) - << lvaTable[preSpillLclNum].GetArgReg()); - if (tempMask == preSpillMask) + if (dsc->lvPromoted) { - // We won't encounter more pre-spilled registers, - // so don't bother iterating further. - break; + for (unsigned fld = 0; fld < dsc->lvFieldCnt; fld++) + { + unsigned fieldLclNum = dsc->lvFieldLclStart + fld; + LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum); + fieldVarDsc->SetStackOffset(dsc->GetStackOffset() + fieldVarDsc->lvFldOffset); + JITDUMP(" Set field V%02u to offset %d\n", fieldLclNum, fieldVarDsc->GetStackOffset()); + } } - } - argLst = info.compCompHnd->getArgNext(argLst); - } - // Take care of non pre-spilled stack arguments. - argLst = info.compMethodInfo->args.args; - for (unsigned i = 0, stkLclNum = lclNum; i < argSigLen; ++i, ++stkLclNum) - { - if (!lvaIsPreSpilled(stkLclNum, preSpillMask)) - { - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &argClass)); - const unsigned argSize = eeGetArgSize(argTypeJit, argClass); - argOffs = lvaAssignVirtualFrameOffsetToArg(stkLclNum, argSize, argOffs); - argLcls++; + continue; } - argLst = info.compCompHnd->getArgNext(argLst); - } - - lclNum += argLcls; -#else // !TARGET_ARM - for (unsigned i = 0; i < argSigLen; i++) - { - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - CorInfoType argTypeJit = strip(info.compCompHnd->getArgType(&info.compMethodInfo->args, argLst, &argClass)); - unsigned argumentSize = eeGetArgSize(argTypeJit, argClass); - - assert(compAppleArm64Abi() || argumentSize % TARGET_POINTER_SIZE == 0); - - argOffs = - lvaAssignVirtualFrameOffsetToArg(lclNum++, argumentSize, argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); - argLst = info.compCompHnd->getArgNext(argLst); - } -#endif // !TARGET_ARM - -#if !USER_ARGS_COME_LAST - - //@GENERICS: extra argument for instantiation info - if (info.compMethodInfo->args.callConv & CORINFO_CALLCONV_PARAMTYPE) - { - noway_assert(lclNum == info.compTypeCtxtArg); - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES, - argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); - } - - if (info.compIsVarArgs) - { - argOffs = lvaAssignVirtualFrameOffsetToArg(lclNum++, REGSIZE_BYTES, - argOffs UNIX_AMD64_ABI_ONLY_ARG(&callerArgOffset)); } - -#endif // USER_ARGS_COME_LAST } -#ifdef UNIX_AMD64_ABI +//----------------------------------------------------------------------------- +// lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter: +// Return offset to use for a parameter local when the caller allocated space +// for (parts of) it. The offset returned is relative to the bottom of the +// space allocated by the caller (our "virtual 0", see lvaAssignFrameOffsets +// documentation). // -// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an -// individual argument, and return the offset for the next argument. -// Note: This method only calculates the initial offset of the stack passed/spilled arguments -// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.) -// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existence, -// ret address slot, stack frame padding, alloca instructions, etc. -// Note: This is the implementation for UNIX_AMD64 System V platforms. -// -int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, - unsigned argSize, - int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset)) -{ - noway_assert(lclNum < info.compArgsCount); - noway_assert(argSize); - - if (info.compArgOrder == Target::ARG_ORDER_L2R) - { - argOffs -= argSize; - } - - unsigned fieldVarNum = BAD_VAR_NUM; - - LclVarDsc* varDsc = lvaGetDesc(lclNum); - - noway_assert(varDsc->lvIsParam); - - if (varDsc->lvIsRegArg) - { - // Argument is passed in a register, don't count it - // when updating the current offset on the stack. - - if (varDsc->lvOnFrame) - { - // The offset for args needs to be set only for the stack homed arguments for System V. - varDsc->SetStackOffset(argOffs); - } - else - { - varDsc->SetStackOffset(0); - } - } - else - { - // For Windows AMD64 there are 4 slots for the register passed arguments on the top of the caller's stack. - // This is where they are always homed. So, they can be accessed with positive offset. - // On System V platforms, if the RA decides to home a register passed arg on the stack, it creates a stack - // location on the callee stack (like any other local var.) In such a case, the register passed, stack homed - // arguments are accessed using negative offsets and the stack passed arguments are accessed using positive - // offset (from the caller's stack.) - // For System V platforms if there is no frame pointer the caller stack parameter offset should include the - // callee allocated space. If frame register is used, the callee allocated space should not be included for - // accessing the caller stack parameters. The last two requirements are met in lvaFixVirtualFrameOffsets - // method, which fixes the offsets, based on frame pointer existence, existence of alloca instructions, ret - // address pushed, ets. - - varDsc->SetStackOffset(*callerArgOffset); - // Structs passed on stack could be of size less than TARGET_POINTER_SIZE. - // Make sure they get at least TARGET_POINTER_SIZE on the stack - this is required for alignment. - if (argSize > TARGET_POINTER_SIZE) - { - *callerArgOffset += (int)roundUp(argSize, TARGET_POINTER_SIZE); - } - else - { - *callerArgOffset += TARGET_POINTER_SIZE; - } - } - - // For struct promoted parameters we need to set the offsets for the field lclVars. - // - // For a promoted struct we also assign the struct fields stack offset - if (varDsc->lvPromoted) - { - unsigned firstFieldNum = varDsc->lvFieldLclStart; - int offset = varDsc->GetStackOffset(); - for (unsigned i = 0; i < varDsc->lvFieldCnt; i++) - { - LclVarDsc* fieldVarDsc = lvaGetDesc(firstFieldNum + i); - fieldVarDsc->SetStackOffset(offset + fieldVarDsc->lvFldOffset); - } - } - - if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) - { - argOffs += argSize; - } - - return argOffs; -} - -#else // !UNIX_AMD64_ABI - +// Parameters: +// lclNum - Parameter local +// offset - [out] Offset to use for the parameter local. Only valid when the +// function returns true. // -// lvaAssignVirtualFrameOffsetToArg() : Assign virtual stack offsets to an -// individual argument, and return the offset for the next argument. -// Note: This method only calculates the initial offset of the stack passed/spilled arguments -// (if any - the RA might decide to spill(home on the stack) register passed arguments, if rarely used.) -// The final offset is calculated in lvaFixVirtualFrameOffsets method. It accounts for FP existence, -// ret address slot, stack frame padding, alloca instructions, etc. -// Note: This implementation for all the platforms but UNIX_AMD64 OSs (System V 64 bit.) -int Compiler::lvaAssignVirtualFrameOffsetToArg(unsigned lclNum, - unsigned argSize, - int argOffs UNIX_AMD64_ABI_ONLY_ARG(int* callerArgOffset)) +// Returns: +// true if the caller allocated space that the JIT should reuse for the +// parameter's home. +// +// Remarks: +// The most common situation is for stack parameters, but there are other +// cases where we have usable space allocated by the caller: +// - On win-x64 the caller allocates stack space even for args passed in +// registers +// - On multiple ABIs (see below) structs can be passed split across stack +// and registers, where this function may then return an offset that only +// partially reaches into caller allocated space (i.e. negative) +// - On arm32 we sometimes prespill argument registers and consider it to be +// caller allocated, making this function also return a negative offset for +// some registers in that case. +// +bool Compiler::lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(unsigned lclNum, int* offset) { - noway_assert(lclNum < info.compArgsCount); - noway_assert(argSize); - - if (info.compArgOrder == Target::ARG_ORDER_L2R) - { - argOffs -= argSize; - } - - unsigned fieldVarNum = BAD_VAR_NUM; - - LclVarDsc* varDsc = lvaGetDesc(lclNum); - - noway_assert(varDsc->lvIsParam); + const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(lclNum); - if (varDsc->lvIsRegArg) + for (unsigned i = 0; i < abiInfo.NumSegments; i++) { - /* Argument is passed in a register, don't count it - * when updating the current offset on the stack */ - -#if !defined(TARGET_ARMARCH) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64) -#if DEBUG - // TODO: Remove this noway_assert and replace occurrences of TARGET_POINTER_SIZE with argSize - // Also investigate why we are incrementing argOffs for X86 as this seems incorrect - // - noway_assert(argSize == TARGET_POINTER_SIZE); -#endif // DEBUG -#endif - -#if defined(TARGET_X86) - argOffs += TARGET_POINTER_SIZE; -#elif defined(TARGET_AMD64) - // Register arguments on AMD64 also takes stack space. (in the backing store) - varDsc->SetStackOffset(argOffs); - argOffs += TARGET_POINTER_SIZE; -#elif defined(TARGET_ARM64) - // Register arguments on ARM64 only take stack space when they have a frame home. - // Unless on windows and in a vararg method. - if (compFeatureArgSplit() && this->info.compIsVarArgs) + const ABIPassingSegment& segment = abiInfo.Segments[i]; + if (!segment.IsPassedOnStack()) { - if (varDsc->lvType == TYP_STRUCT && varDsc->GetOtherArgReg() >= MAX_REG_ARG && - varDsc->GetOtherArgReg() != REG_NA) - { - // This is a split struct. It will account for an extra (8 bytes) - // of alignment. - varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE); - argOffs += TARGET_POINTER_SIZE; +#if defined(WINDOWS_AMD64_ABI) + switch (segment.GetRegister()) + { + case REG_ECX: + case REG_XMM0: + *offset = 0; + return true; + case REG_EDX: + case REG_XMM1: + *offset = 8; + return true; + case REG_R8: + case REG_XMM2: + *offset = 16; + return true; + case REG_R9: + case REG_XMM3: + *offset = 24; + return true; } - } - #elif defined(TARGET_ARM) - // On ARM we spill the registers in codeGen->regSet.rsMaskPreSpillRegArg - // in the prolog, so we have to do SetStackOffset() here - // - regMaskTP regMask = genRegMask(varDsc->GetArgReg()); - if (codeGen->regSet.rsMaskPreSpillRegArg & regMask) - { - // Signature: void foo(struct_8, int, struct_4) - // ------- CALLER SP ------- - // r3 struct_4 - // r2 int - not prespilled, but added for alignment. argOffs should skip this. - // r1 struct_8 - // r0 struct_8 - // ------------------------- - // If we added alignment we need to fix argOffs for all registers above alignment. - if (codeGen->regSet.rsMaskPreSpillAlign != RBM_NONE) - { - assert(genCountBits(codeGen->regSet.rsMaskPreSpillAlign) == 1); - // Is register beyond the alignment pos? - if (regMask > codeGen->regSet.rsMaskPreSpillAlign) - { - // Increment argOffs just once for the _first_ register after alignment pos - // in the prespill mask. - if (!BitsBetween(codeGen->regSet.rsMaskPreSpillRegArg, regMask, - codeGen->regSet.rsMaskPreSpillAlign)) - { - argOffs += TARGET_POINTER_SIZE; - } - } + regMaskTP prespills = codeGen->regSet.rsMaskPreSpillRegs(true); + if ((prespills & genRegMask(segment.GetRegister())) != RBM_NONE) + { + // Construct a mask with all prespills that includes the + // segment's register and all registers after it. For example: + // prespills: 1101 (i.e. prolog starts with push {r0, r2, r3} + // reg: 0100 (i.e. r2 which is at offset -8) + // higherPrespills: 1100 (=> r2, r3) + regMaskTP higherPrespills = prespills & (regMaskTP)(~((1ULL << (int)segment.GetRegister()) - 1)); + *offset = -(int)genCountBits(higherPrespills) * TARGET_POINTER_SIZE; + + // This might be a split parameter, so there may be other prespills as well. + *offset -= segment.Offset; + return true; } +#endif - switch (varDsc->lvType) - { - case TYP_STRUCT: - if (!varDsc->lvStructDoubleAlign) - { - break; - } - FALLTHROUGH; - - case TYP_DOUBLE: - case TYP_LONG: - { - // - // Let's assign offsets to arg1, a double in r2. argOffs has to be 4 not 8. - // - // ------- CALLER SP ------- - // r3 - // r2 double -- argOffs = 4, but it doesn't need to be skipped, because there is no skipping. - // r1 VACookie -- argOffs = 0 - // ------------------------- - // - // Consider argOffs as if it accounts for number of prespilled registers before the current - // register. In the above example, for r2, it is r1 that is prespilled, but since r1 is - // accounted for by argOffs being 4, there should have been no skipping. Instead, if we didn't - // assign r1 to any variable, then argOffs would still be 0 which implies it is not accounting - // for r1, equivalently r1 is skipped. - // - // If prevRegsSize is unaccounted for by a corresponding argOffs, we must have skipped a register. - int prevRegsSize = - genCountBits(codeGen->regSet.rsMaskPreSpillRegArg & (regMask - 1)) * TARGET_POINTER_SIZE; - if (argOffs < prevRegsSize) - { - // We must align up the argOffset to a multiple of 8 to account for skipped registers. - argOffs = roundUp((unsigned)argOffs, 2 * TARGET_POINTER_SIZE); - } - // We should've skipped only a single register. - assert(argOffs == prevRegsSize); - } - break; - - default: - // No alignment of argOffs required - break; - } - varDsc->SetStackOffset(argOffs); - argOffs += argSize; + continue; } -#elif defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - - if (varDsc->lvIsSplit) + if (info.compArgOrder == Target::ARG_ORDER_L2R) { - assert((varDsc->lvType == TYP_STRUCT) && (varDsc->GetOtherArgReg() == REG_STK)); - // This is a split struct. It will account for an extra (8 bytes) for the whole struct. - varDsc->SetStackOffset(varDsc->GetStackOffset() + TARGET_POINTER_SIZE); - argOffs += TARGET_POINTER_SIZE; + // This is the managed x86 ABI. Stack offsets saved in ABI + // information is relative to the top of the stack frame here. + assert(segment.Offset == 0); + *offset = (int)(lvaParameterStackSize - segment.GetStackOffset()); } - -#else // TARGET* -#error Unsupported or unset target architecture -#endif // TARGET* - } - else - { -#if defined(TARGET_ARM) - // Dev11 Bug 42817: incorrect codegen for DrawFlatCheckBox causes A/V in WinForms - // - // Here we have method with a signature (int a1, struct a2, struct a3, int a4, int a5). - // Struct parameter 'a2' is 16-bytes with no alignment requirements; - // it uses r1,r2,r3 and [OutArg+0] when passed. - // Struct parameter 'a3' is 16-bytes that is required to be double aligned; - // the caller skips [OutArg+4] and starts the argument at [OutArg+8]. - // Thus the caller generates the correct code to pass the arguments. - // When generating code to receive the arguments we set codeGen->regSet.rsMaskPreSpillRegArg to [r1,r2,r3] - // and spill these three registers as the first instruction in the prolog. - // Then when we layout the arguments' stack offsets we have an argOffs 0 which - // points at the location that we spilled r1 into the stack. For this first - // struct we take the lvIsRegArg path above with "codeGen->regSet.rsMaskPreSpillRegArg &" matching. - // Next when we calculate the argOffs for the second 16-byte struct we have an argOffs - // of 16, which appears to be aligned properly so we don't skip a stack slot. - // - // To fix this we must recover the actual OutArg offset by subtracting off the - // sizeof of the PreSpill register args. - // Then we align this offset to a multiple of 8 and add back the sizeof - // of the PreSpill register args. - // - // Dev11 Bug 71767: failure of assert(sizeofPreSpillRegArgs <= argOffs) - // - // We have a method with 'this' passed in r0, RetBuf arg in r1, VarArgs cookie - // in r2. The first user arg is a 144 byte struct with double alignment required, - // r3 is skipped, and the struct is passed on the stack. However, 'r3' is added - // to the codeGen->regSet.rsMaskPreSpillRegArg mask by the VarArgs cookie code, since we need to - // home all the potential varargs arguments in registers, even if we don't have - // signature type information for the variadic arguments. However, due to alignment, - // we have skipped a register that doesn't have a corresponding symbol. Make up - // for that by increasing argOffs here. - // - - int sizeofPreSpillRegArgs = genCountBits(codeGen->regSet.rsMaskPreSpillRegs(true)) * REGSIZE_BYTES; - - if (argOffs < sizeofPreSpillRegArgs) + else { - // This can only happen if we skipped the last register spot because current stk arg - // is a struct requiring alignment or a pre-spill alignment was required because the - // first reg arg needed alignment. - // - // Example 1: First Stk Argument requiring alignment in vararg case (same as above comment.) - // Signature (int a0, int a1, int a2, struct {long} a3, ...) + // Some ABIs may split parameters across registers and stack: // - // stk arg a3 --> argOffs here will be 12 (r0-r2) but pre-spill will be 16. - // ---- Caller SP ---- - // r3 --> Stack slot is skipped in this case. - // r2 int a2 - // r1 int a1 - // r0 int a0 + // - On Windows, the Arm64 varargs ABI can split a 16 byte struct across x7 and stack + // - Arm32 generally allows structs to be split + // - LA64/RISCV64 both allow splitting of 16-byte structs across 1 register and stack + // - The Swift ABI can split parameters across multiple register and multiple stack segments // - // Example 2: First Reg Argument requiring alignment in no-vararg case. - // Signature (struct {long} a0, struct {int} a1, int a2, int a3) - // - // stk arg --> argOffs here will be 12 {r0-r2} but pre-spill will be 16. - // ---- Caller SP ---- - // r3 int a2 --> pushed (not pre-spilled) for alignment of a0 by lvaInitUserArgs. - // r2 struct { int } a1 - // r0-r1 struct { long } a0 - -#ifdef PROFILING_SUPPORTED - // On Arm under profiler, r0-r3 are always prespilled on stack. - // It is possible to have methods that accept only HFAs as parameters e.g. Signature(struct hfa1, struct - // hfa2), in which case hfa1 and hfa2 will be en-registered in co-processor registers and will have an - // argument offset less than size of preSpill. - // - // For this reason the following conditions are asserted when not under profiler. - if (!compIsProfilerHookNeeded()) -#endif - { - bool cond = ((info.compIsVarArgs || opts.compUseSoftFP) && - // Does cur stk arg require double alignment? - ((varDsc->lvType == TYP_STRUCT && varDsc->lvStructDoubleAlign) || - (varDsc->lvType == TYP_DOUBLE) || (varDsc->lvType == TYP_LONG))) || - // Did first reg arg require alignment? - (codeGen->regSet.rsMaskPreSpillAlign & genRegMask(REG_ARG_LAST)); - - noway_assert(cond); - noway_assert(sizeofPreSpillRegArgs <= argOffs + TARGET_POINTER_SIZE); // at most one register of - // alignment - } - argOffs = sizeofPreSpillRegArgs; - } - - noway_assert(argOffs >= sizeofPreSpillRegArgs); - int argOffsWithoutPreSpillRegArgs = argOffs - sizeofPreSpillRegArgs; + // Of these we handle Swift separately (by reassembling structs + // entirely on the local stack frame). However, all other ABIs + // admit a simple strategy to reassemble the struct on the stack + // frame: we consider the local itself to start right before the + // "virtual 0", such that spilling the register parts will end up + // with the local fully reassembled and contiguous, without having + // to move any of the stack segments. The subtraction of the + // segment offset accomplishes that here. - switch (varDsc->lvType) - { - case TYP_STRUCT: - if (!varDsc->lvStructDoubleAlign) - break; - - FALLTHROUGH; - - case TYP_DOUBLE: - case TYP_LONG: - // We must align up the argOffset to a multiple of 8 - argOffs = - roundUp((unsigned)argOffsWithoutPreSpillRegArgs, 2 * TARGET_POINTER_SIZE) + sizeofPreSpillRegArgs; - break; - - default: - // No alignment of argOffs required - break; - } -#endif // TARGET_ARM - const bool isFloatHfa = (varDsc->lvIsHfa() && (varDsc->GetHfaType() == TYP_FLOAT)); - const unsigned argAlignment = eeGetArgSizeAlignment(varDsc->lvType, isFloatHfa); - if (compAppleArm64Abi()) - { - argOffs = roundUp(argOffs, argAlignment); + *offset = (int)segment.GetStackOffset() - (int)segment.Offset; } - assert((argSize % argAlignment) == 0); - assert((argOffs % argAlignment) == 0); - varDsc->SetStackOffset(argOffs); - } - - // For struct promoted parameters we need to set the offsets for both LclVars. - // - // For a dependent promoted struct we also assign the struct fields stack offset - - if (varDsc->lvPromoted) - { - unsigned firstFieldNum = varDsc->lvFieldLclStart; - for (unsigned i = 0; i < varDsc->lvFieldCnt; i++) - { - LclVarDsc* fieldVarDsc = lvaGetDesc(firstFieldNum + i); - - JITDUMP("Adjusting offset of dependent V%02u of arg V%02u: parent %u field %u net %u\n", lclNum, - firstFieldNum + i, varDsc->GetStackOffset(), fieldVarDsc->lvFldOffset, - varDsc->GetStackOffset() + fieldVarDsc->lvFldOffset); - - fieldVarDsc->SetStackOffset(varDsc->GetStackOffset() + fieldVarDsc->lvFldOffset); - } - } - - if (info.compArgOrder == Target::ARG_ORDER_R2L && !varDsc->lvIsRegArg) - { - argOffs += argSize; + return true; } - return argOffs; + return false; } -#endif // !UNIX_AMD64_ABI //----------------------------------------------------------------------------- // lvaAssignVirtualFrameOffsetsToLocals: compute the virtual stack offsets for @@ -7382,6 +6926,18 @@ bool Compiler::lvaParamHasLocalStackSpace(unsigned lclNum) } #endif + const ABIPassingInformation& abiInfo = + lvaGetParameterABIInfo(varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum); + if (abiInfo.IsSplitAcrossRegistersAndStack()) + { + // Outside Swift (handled above) we can handle split parameters by + // spilling the registers at the top of the stack frame, below the + // stack segments. This results in the correct contiguous local value. + // Thus we don't need to allocate full space for these. + // + return false; + } + #if defined(WINDOWS_AMD64_ABI) // On Windows AMD64 we can use the caller-reserved stack area that is already setup return false; diff --git a/src/coreclr/jit/targetx86.cpp b/src/coreclr/jit/targetx86.cpp index 2a7c906962b0cb..eb6ced79ce1eba 100644 --- a/src/coreclr/jit/targetx86.cpp +++ b/src/coreclr/jit/targetx86.cpp @@ -29,7 +29,8 @@ const regMaskTP intArgMasks[] = {RBM_ECX, RBM_EDX}; // info - Info about the method being classified. // X86Classifier::X86Classifier(const ClassifierInfo& info) - : m_regs(nullptr, 0) + : m_info(info) + , m_regs(nullptr, 0) { switch (info.CallConv) { @@ -113,8 +114,26 @@ ABIPassingInformation X86Classifier::Classify(Compiler* comp, else { assert((m_stackArgSize % TARGET_POINTER_SIZE) == 0); - m_stackArgSize += roundUp(size, TARGET_POINTER_SIZE); - segment = ABIPassingSegment::OnStack(m_stackArgSize, 0, size); + + unsigned offset; + unsigned roundedArgSize = roundUp(size, TARGET_POINTER_SIZE); + if (m_info.CallConv == CorInfoCallConvExtension::Managed) + { + // The managed ABI pushes parameters in left-to-right order. This + // means that on the stack the first parameter is at the higher + // offset (farthest away from ESP on entry). We model the stack + // offset as the value to subtract from the top of the stack for + // this ABI, see ABIPassingSegment::GetStackOffset. + m_stackArgSize += roundedArgSize; + offset = m_stackArgSize; + } + else + { + offset = m_stackArgSize; + m_stackArgSize += roundedArgSize; + } + + segment = ABIPassingSegment::OnStack(offset, 0, size); } return ABIPassingInformation::FromSegment(comp, segment); From f93287ad23c33b56d566b6e928f215d3353c1aed Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 20 Apr 2024 21:10:01 +0200 Subject: [PATCH 02/10] Fix build --- src/coreclr/jit/lclvars.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index eee94a74a3a1d5..3ce10982db9086 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5978,6 +5978,8 @@ bool Compiler::lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(unsigned l case REG_XMM3: *offset = 24; return true; + default: + break; } #elif defined(TARGET_ARM) regMaskTP prespills = codeGen->regSet.rsMaskPreSpillRegs(true); From 05009187c0466487203956faa156f48dd7c06d20 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 21 Apr 2024 00:07:43 +0200 Subject: [PATCH 03/10] Fix validation for Swift, small clean ups --- src/coreclr/jit/lclvars.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 3ce10982db9086..01e0f261426407 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1729,14 +1729,19 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) lvaParameterStackSize = roundUp(classifier.StackSize(), classifier.StackAlignment()); #if FEATURE_FASTTAILCALL - unsigned oldStackSize = info.compArgStackSize; + // Swift doesn't have correct ABI info computed by the old classification, + // so skip this validation there. + if (info.compCallConv != CorInfoCallConvExtension::Swift) + { + unsigned oldStackSize = info.compArgStackSize; #ifdef WINDOWS_AMD64_ABI - // Old info does not take 4 shadow slots on win-x64 into account. - oldStackSize += 32; + // Old info does not take 4 shadow slots on win-x64 into account. + oldStackSize += 32; #endif - assert(lvaParameterStackSize == roundUp(oldStackSize, classifier.StackAlignment())); + assert(lvaParameterStackSize == roundUp(oldStackSize, classifier.StackAlignment())); + } #endif } @@ -5916,8 +5921,6 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() JITDUMP(" Set field V%02u to offset %d\n", fieldLclNum, fieldVarDsc->GetStackOffset()); } } - - continue; } } } From 89e00cdc3f2d0b07ff6151f9866d8515d5a10570 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 21 Apr 2024 23:30:04 +0200 Subject: [PATCH 04/10] Remove stack alignment Considering the stack alignment to be part of the passed stack size does not really make sense since the callee does not own the extra bytes added to ensure alignment. --- src/coreclr/jit/abi.h | 37 +------------------------------------ src/coreclr/jit/lclvars.cpp | 34 +++++++++++++++++----------------- 2 files changed, 18 insertions(+), 53 deletions(-) diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 9e42dd8c4ad373..eb223f941932f5 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -118,11 +118,6 @@ class X86Classifier return m_stackArgSize; } - unsigned StackAlignment() - { - return 4; - } - ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -143,11 +138,6 @@ class WinX64Classifier return m_stackArgSize; } - unsigned StackAlignment() - { - return 16; - } - ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -168,11 +158,6 @@ class SysVX64Classifier return m_stackArgSize; } - unsigned StackAlignment() - { - return 16; - } - ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -191,12 +176,7 @@ class Arm64Classifier unsigned StackSize() { - return m_stackArgSize; - } - - unsigned StackAlignment() - { - return 16; + return roundUp(m_stackArgSize, TARGET_POINTER_SIZE); } ABIPassingInformation Classify(Compiler* comp, @@ -228,11 +208,6 @@ class Arm32Classifier return m_stackArgSize; } - unsigned StackAlignment() - { - return 8; - } - ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -254,11 +229,6 @@ class RiscV64Classifier return m_stackArgSize; } - unsigned StackAlignment() - { - return 16; - } - ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, @@ -280,11 +250,6 @@ class LoongArch64Classifier return m_stackArgSize; } - unsigned StackAlignment() - { - return 16; - } - ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 01e0f261426407..01cefbbdf46a9f 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1726,23 +1726,7 @@ void Compiler::lvaClassifyParameterABI(Classifier& classifier) #endif } - lvaParameterStackSize = roundUp(classifier.StackSize(), classifier.StackAlignment()); - -#if FEATURE_FASTTAILCALL - // Swift doesn't have correct ABI info computed by the old classification, - // so skip this validation there. - if (info.compCallConv != CorInfoCallConvExtension::Swift) - { - unsigned oldStackSize = info.compArgStackSize; - -#ifdef WINDOWS_AMD64_ABI - // Old info does not take 4 shadow slots on win-x64 into account. - oldStackSize += 32; -#endif - - assert(lvaParameterStackSize == roundUp(oldStackSize, classifier.StackAlignment())); - } -#endif + lvaParameterStackSize = classifier.StackSize(); } //----------------------------------------------------------------------------- @@ -1910,6 +1894,22 @@ void Compiler::lvaClassifyParameterABI() } } } + +#ifdef FEATURE_FASTTAILCALL + // Swift doesn't have correct ABI info computed by the old classification, + // so skip this validation there. + if (info.compCallConv != CorInfoCallConvExtension::Swift) + { + unsigned oldStackSize = info.compArgStackSize; + +#ifdef WINDOWS_AMD64_ABI + // Old info does not take 4 shadow slots on win-x64 into account. + oldStackSize += 32; +#endif + + assert(lvaParameterStackSize == roundUp(oldStackSize, TARGET_POINTER_SIZE)); + } +#endif #endif // DEBUG } From 06173e4cfad386dc305c620824e492f36b398400 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sun, 21 Apr 2024 23:50:03 +0200 Subject: [PATCH 05/10] Fix build --- src/coreclr/jit/abi.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index eb223f941932f5..5e87c12c597553 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -288,11 +288,6 @@ class SwiftABIClassifier return m_classifier.StackSize(); } - unsigned StackAlignment() - { - return m_classifier.StackAlignment(); - } - ABIPassingInformation Classify(Compiler* comp, var_types type, ClassLayout* structLayout, From b89dc82919f8e367f9404c5158955f3eeff3d4d6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 22 Apr 2024 00:07:39 +0200 Subject: [PATCH 06/10] Fix ifdef --- src/coreclr/jit/lclvars.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 01cefbbdf46a9f..6c57f25fdb1a9d 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1895,7 +1895,7 @@ void Compiler::lvaClassifyParameterABI() } } -#ifdef FEATURE_FASTTAILCALL +#if FEATURE_FASTTAILCALL // Swift doesn't have correct ABI info computed by the old classification, // so skip this validation there. if (info.compCallConv != CorInfoCallConvExtension::Swift) From 451855f672a72f557be9bc9e6f784252fda9a33e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Mon, 22 Apr 2024 13:40:37 +0200 Subject: [PATCH 07/10] Remove unnecessary change --- src/coreclr/jit/lclvars.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 6c57f25fdb1a9d..41a84554fe87de 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -6931,18 +6931,6 @@ bool Compiler::lvaParamHasLocalStackSpace(unsigned lclNum) } #endif - const ABIPassingInformation& abiInfo = - lvaGetParameterABIInfo(varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum); - if (abiInfo.IsSplitAcrossRegistersAndStack()) - { - // Outside Swift (handled above) we can handle split parameters by - // spilling the registers at the top of the stack frame, below the - // stack segments. This results in the correct contiguous local value. - // Thus we don't need to allocate full space for these. - // - return false; - } - #if defined(WINDOWS_AMD64_ABI) // On Windows AMD64 we can use the caller-reserved stack area that is already setup return false; From 7922f61240d2390129461f9b320c9724836d33ae Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 27 Apr 2024 12:11:12 +0200 Subject: [PATCH 08/10] Adjust some comments for RISCV64/LA64 --- src/coreclr/jit/lclvars.cpp | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 016f9412faa210..79e9df957fddec 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -6011,7 +6011,7 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() // partially reaches into caller allocated space (i.e. negative) // - On arm32 we sometimes prespill argument registers and consider it to be // caller allocated, making this function also return a negative offset for -// some registers in that case. +// some register parameters in that case. // bool Compiler::lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(unsigned lclNum, int* offset) { @@ -6056,7 +6056,9 @@ bool Compiler::lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(unsigned l regMaskTP higherPrespills = prespills & (regMaskTP)(~((1ULL << (int)segment.GetRegister()) - 1)); *offset = -(int)genCountBits(higherPrespills) * TARGET_POINTER_SIZE; - // This might be a split parameter, so there may be other prespills as well. + // Adjust for a potential split (we currently always expect all + // split structs to be fully prespilled, but this makes the + // general and matches the logic below). *offset -= segment.Offset; return true; } @@ -6081,15 +6083,18 @@ bool Compiler::lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(unsigned l // - LA64/RISCV64 both allow splitting of 16-byte structs across 1 register and stack // - The Swift ABI can split parameters across multiple register and multiple stack segments // - // Of these we handle Swift separately (by reassembling structs - // entirely on the local stack frame). However, all other ABIs - // admit a simple strategy to reassemble the struct on the stack - // frame: we consider the local itself to start right before the - // "virtual 0", such that spilling the register parts will end up - // with the local fully reassembled and contiguous, without having - // to move any of the stack segments. The subtraction of the - // segment offset accomplishes that here. - + // Of these, Swift and RISCV64/LA64 are handled separately, by + // reassembling the split structs entirely on the local stack + // frame. Thus the offsets returned here and assigned inside + // lvaAssignVirtualFrameOffsetsToArgs are overwritten later. + // + // For ARM64 and ARM32 we use a different strategy to reassemble + // the struct on the stack frame: we consider the local itself to + // start right before the "virtual 0", such that spilling the + // register parts will end up with the local fully reassembled and + // contiguous, without having to move any of the stack segments. + // The subtraction of the segment offset accomplishes that here. + // *offset = (int)segment.GetStackOffset() - (int)segment.Offset; } From 2ce06c0e74bfa71794feff6fda4716ddd0a0e2f0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 27 Apr 2024 12:13:16 +0200 Subject: [PATCH 09/10] Missed a word --- src/coreclr/jit/lclvars.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 79e9df957fddec..1d776662cbc2f4 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -6058,7 +6058,7 @@ bool Compiler::lvaGetRelativeOffsetToCallerAllocatedSpaceForParameter(unsigned l // Adjust for a potential split (we currently always expect all // split structs to be fully prespilled, but this makes the - // general and matches the logic below). + // logic general and matches the logic below). *offset -= segment.Offset; return true; } From ec9fcfe42e915fc70300ddf4e3b861eff184a525 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 27 Apr 2024 12:15:10 +0200 Subject: [PATCH 10/10] Fix another comment --- src/coreclr/jit/lclvars.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 1d776662cbc2f4..b04bd7373f7211 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5950,9 +5950,9 @@ void Compiler::lvaAssignVirtualFrameOffsetsToArgs() // code in the callee to spill the argument registers as the very first // thing, and consider those to be actually passed by the caller. The // virtual 0 is actually below these prespills. - // TODO-Cleanup: Unify arm32 with everyone else. arm64/RISCV64/LA64 also - // needs a similar mechanism for split parameters, but they do not consider - // the "virtual 0" to be below the prespills, which simplifies things + // TODO-Cleanup: Unify arm32 with arm64. arm64 also needs a similar + // mechanism for split parameters in varargs, but it does not consider the + // "virtual 0" to be below the prespills, which simplifies things // considerably. regMaskTP prespilled = codeGen->regSet.rsMaskPreSpillRegs(true); JITDUMP("Prespill regs is ");