From 4c3e3d749917676c49a808a3df249faab5b2a5ae Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 28 May 2024 01:42:52 +0200 Subject: [PATCH 1/5] Fix SIMD12 spills --- src/coreclr/jit/codegenlinear.cpp | 16 +++++++- .../JitBlue/Runtime_95043/Runtime_95043.cs | 37 +++++++++++++++++++ .../Runtime_95043/Runtime_95043.csproj | 9 +++++ 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.csproj diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 75583e90dda9f2..d5f88fa10b8fee 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -937,9 +937,21 @@ void CodeGen::genSpillVar(GenTree* tree) // but we will kill the var in the reg (below). if (!varDsc->IsAlwaysAliveInMemory()) { - instruction storeIns = ins_Store(lclType, compiler->isSIMDTypeLocalAligned(varNum)); assert(varDsc->GetRegNum() == tree->GetRegNum()); - inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); +#if defined(FEATURE_SIMD) + if ((lclType == TYP_SIMD12) && !compiler->lvaMapSimd12ToSimd16(varDsc)) + { + // Store SIMD12 to stack as 12 bytes + // TODO: an internal simd reg when SSE41 is not supported + GetEmitter()->emitStoreSimd12ToLclOffset(varNum, tree->AsLclVarCommon()->GetLclOffs(), + tree->GetRegNum(), tree); + } + else +#endif + { + instruction storeIns = ins_Store(lclType, compiler->isSIMDTypeLocalAligned(varNum)); + inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); + } } // We should only have both GTF_SPILL (i.e. the flag causing this method to be called) and diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs new file mode 100644 index 00000000000000..f96c900f5490b0 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs @@ -0,0 +1,37 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using Xunit; + +public unsafe class Runtime_95043 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Sweep(int ecx, int* edx, Vector3 stack12) + { + for (int i = 0; i < 5; i++) + { + Console.WriteLine(ecx); + if (ecx == -1) + { + ecx = edx[0]; + } + else if (!float.IsNaN(stack12.X)) + { + edx[0] = -1; + ecx = -1; + } + } + } + + [Fact] + [MethodImpl(MethodImplOptions.NoInlining)] + public static void Test() + { + int x = 42; + Sweep(1, &x, Vector3.Zero); + Assert.Equal(-1, x); + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.csproj new file mode 100644 index 00000000000000..1981001110bbe0 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.csproj @@ -0,0 +1,9 @@ + + + True + true + + + + + From 6d918539c31fedb24b36315ede78e970ce7060cc Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 28 May 2024 02:22:17 +0200 Subject: [PATCH 2/5] fix pre-sse41 --- src/coreclr/jit/codegenlinear.cpp | 3 +-- src/coreclr/jit/emitxarch.cpp | 17 +++++++++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index d5f88fa10b8fee..6ceca3177b8c3b 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -942,9 +942,8 @@ void CodeGen::genSpillVar(GenTree* tree) if ((lclType == TYP_SIMD12) && !compiler->lvaMapSimd12ToSimd16(varDsc)) { // Store SIMD12 to stack as 12 bytes - // TODO: an internal simd reg when SSE41 is not supported GetEmitter()->emitStoreSimd12ToLclOffset(varNum, tree->AsLclVarCommon()->GetLclOffs(), - tree->GetRegNum(), tree); + tree->GetRegNum(), nullptr); } else #endif diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index e8ff553f80cc80..f06827421b3987 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -5620,7 +5620,7 @@ void emitter::emitIns_R(instruction ins, emitAttr attr, regNumber reg) // varNum - the variable on the stack to use as a base; // offset - the offset from the varNum; // dataReg - the src reg with SIMD12 value; -// tmpRegProvider - a tree to grab a tmp reg from if needed. +// tmpRegProvider - a tree to grab a tmp reg from if needed (can be null). // void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNumber dataReg, GenTree* tmpRegProvider) { @@ -5635,7 +5635,7 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu // Extract and store upper 4 bytes emitIns_S_R_I(INS_extractps, EA_16BYTE, varNum, offset + 8, dataReg, 2); } - else + else if (tmpRegProvider != nullptr) { regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider); assert(isFloatReg(tmpReg)); @@ -5646,6 +5646,19 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu // Store upper 4 bytes emitIns_S_R(INS_movss, EA_4BYTE, tmpReg, varNum, offset + 8); } + else + { + // We don't have temp regs - let's do two shuffles then + + // [0,1,2,3] -> [2,3,0,1] + emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78); + + // Store upper 4 bytes + emitIns_S_R(INS_movss, EA_4BYTE, dataReg, varNum, offset + 8); + + // Restore dataReg to its previous state: [2,3,0,1] -> [0,1,2,3] + emitIns_R_R_I(INS_pshufd, EA_16BYTE, dataReg, dataReg, 78); + } } #endif // FEATURE_SIMD From ab6a85e56e671955c0be0b7b6d3b4ed3bc1aef60 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 28 May 2024 13:41:49 +0200 Subject: [PATCH 3/5] Update Runtime_95043.cs --- src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs index f96c900f5490b0..5c2a5c890165d5 100644 --- a/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs +++ b/src/tests/JIT/Regression/JitBlue/Runtime_95043/Runtime_95043.cs @@ -9,7 +9,7 @@ public unsafe class Runtime_95043 { [MethodImpl(MethodImplOptions.NoInlining)] - public static void Sweep(int ecx, int* edx, Vector3 stack12) + private static void Sweep(int ecx, int* edx, Vector3 stack12) { for (int i = 0; i < 5; i++) { From 844cf0676ec322c1c4dcc3650d01938b9a0206c2 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 1 Jun 2024 02:34:16 +0200 Subject: [PATCH 4/5] address feedback --- src/coreclr/jit/codegenlinear.cpp | 2 +- src/coreclr/jit/lsra.cpp | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 6ceca3177b8c3b..1f0ede80c7a8a2 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -939,7 +939,7 @@ void CodeGen::genSpillVar(GenTree* tree) { assert(varDsc->GetRegNum() == tree->GetRegNum()); #if defined(FEATURE_SIMD) - if ((lclType == TYP_SIMD12) && !compiler->lvaMapSimd12ToSimd16(varDsc)) + if (lclType == TYP_SIMD12) { // Store SIMD12 to stack as 12 bytes GetEmitter()->emitStoreSimd12ToLclOffset(varNum, tree->AsLclVarCommon()->GetLclOffs(), diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 5e5bbac3839f05..b696cf975910fc 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -8560,7 +8560,15 @@ void LinearScan::insertMove( // This var can't be marked lvRegister now varDsc->SetRegNum(REG_STK); - GenTree* src = compiler->gtNewLclvNode(lclNum, varDsc->TypeGet()); + var_types typ = varDsc->TypeGet(); +#if defined(FEATURE_SIMD) + if ((typ == TYP_SIMD12) && compiler->lvaMapSimd12ToSimd16(varDsc)) + { + typ = TYP_SIMD16; + } +#endif + + GenTree* src = compiler->gtNewLclvNode(lclNum, typ); SetLsraAdded(src); // There are three cases we need to handle: From 0e8283dac07675a7e2a1505460ee49db8e9a5cc4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 11 Jun 2024 05:44:27 +0200 Subject: [PATCH 5/5] Fix arm64 --- src/coreclr/jit/emitarm64.cpp | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index ff88d9fe29c98a..8dfca521a81174 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -17064,12 +17064,28 @@ void emitter::emitStoreSimd12ToLclOffset(unsigned varNum, unsigned offset, regNu // store lower 8 bytes emitIns_S_R(INS_str, EA_8BYTE, dataReg, varNum, offset); - // Extract upper 4-bytes from data - regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider); - emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2); + if (codeGen->internalRegisters.Count(tmpRegProvider) == 0) + { + // We don't have temp regs - let's do two shuffles then + + // [0,1,2,3] -> [2,3,0,1] + emitIns_R_R_R_I(INS_ext, EA_16BYTE, dataReg, dataReg, dataReg, 8, INS_OPTS_16B); + + // store lower 4 bytes + emitIns_S_R(INS_str, EA_4BYTE, dataReg, varNum, offset + 8); + + // Restore dataReg to its previous state: [2,3,0,1] -> [0,1,2,3] + emitIns_R_R_R_I(INS_ext, EA_16BYTE, dataReg, dataReg, dataReg, 8, INS_OPTS_16B); + } + else + { + // Extract upper 4-bytes from data + regNumber tmpReg = codeGen->internalRegisters.GetSingle(tmpRegProvider); + emitIns_R_R_I(INS_mov, EA_4BYTE, tmpReg, dataReg, 2); - // 4-byte write - emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8); + // 4-byte write + emitIns_S_R(INS_str, EA_4BYTE, tmpReg, varNum, offset + 8); + } } #endif // FEATURE_SIMD