Skip to content

Commit 21c52fa

Browse files
authored
Arm64: Implement LoadAndInsertScalar APIs (#93197)
* Add APIs for LoadVector*x2 * Add implementation for LoadVector*x2 * Add APIs for LoadVector*x3 * Add implementation for LoadVector*x3 * Add APIs for LoadVector*x4 * Add implementation for LoadVector*x4 * Add test cases for LoadVectorx2, LoadVectorx3, LoadVectorx4 * minor rename * REVERT: Add Debug.Assert(false) to make sure test runs * Retain gtOtherReg rather than making it an array * Add APIs for LoadAndReplicateToVector64x* and LoadAndReplicateToVector128x* * Revert "REVERT: Add Debug.Assert(false) to make sure test runs" This reverts commit 92fb279. * fix the test template * Implement LoadAndReplicateToVector* APIs * Add test coverage for LoadAndReplicateToVector* APIs * fix the LoadVectorx4 template * address review comment * Add APIs for LoadAndInsertScalar() * fix one more error in LoadVectorx4Test.template * Add APIs for LoadAndInsertScalar() * Fix the API definition * wip: Implementation * feedback by Bruce * Rename the test case name * Disable test for mono * Fix the errors to make it work * fix merge conflicts * fix the typo in test case * code cleanup * fix the importing of normal LoadAndInsertScalar * Fix some more importing and lsra * fix the lsra issues * Add test for LoadAndInsertScalarx2 * Add test cases for LoadAndInsertScalarx2 and LoadAndInsertScalarx3 * jit format * fix bug * fix test build errors * fix the test errors * fix typos in x3 and x4 * address feedback from Bruce
1 parent f1b4930 commit 21c52fa

File tree

18 files changed

+2240
-9
lines changed

18 files changed

+2240
-9
lines changed

src/coreclr/jit/compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2833,6 +2833,7 @@ class Compiler
28332833

28342834
#ifdef TARGET_ARM64
28352835
GenTreeFieldList* gtConvertTableOpToFieldList(GenTree* op, unsigned fieldCount);
2836+
GenTreeFieldList* gtConvertParamOpToFieldList(GenTree* op, unsigned fieldCount, CORINFO_CLASS_HANDLE clsHnd);
28362837
#endif
28372838
#endif // FEATURE_HW_INTRINSICS
28382839

src/coreclr/jit/gentree.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25242,6 +25242,43 @@ GenTreeFieldList* Compiler::gtConvertTableOpToFieldList(GenTree* op, unsigned fi
2524225242
}
2524325243
return fieldList;
2524425244
}
25245+
25246+
//------------------------------------------------------------------------
25247+
// gtConvertParamOpToFieldList: Convert a operand that represents tuple of struct into
25248+
// field list, where each field represents a struct in the tuple.
25249+
//
25250+
// Arguments:
25251+
// op -- Operand to convert.
25252+
// fieldCount -- Number of fields or rows present.
25253+
// clsHnd -- Class handle of the tuple.
25254+
//
25255+
// Return Value:
25256+
// The GenTreeFieldList node.
25257+
//
25258+
GenTreeFieldList* Compiler::gtConvertParamOpToFieldList(GenTree* op, unsigned fieldCount, CORINFO_CLASS_HANDLE clsHnd)
25259+
{
25260+
LclVarDsc* opVarDsc = lvaGetDesc(op->AsLclVar());
25261+
unsigned lclNum = lvaGetLclNum(opVarDsc);
25262+
unsigned fieldSize = opVarDsc->lvSize() / fieldCount;
25263+
GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList();
25264+
int offset = 0;
25265+
unsigned sizeBytes = 0;
25266+
CORINFO_CLASS_HANDLE structType;
25267+
25268+
for (unsigned fieldId = 0; fieldId < fieldCount; fieldId++)
25269+
{
25270+
CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(clsHnd, fieldId);
25271+
JitType2PreciseVarType(info.compCompHnd->getFieldType(fieldHandle, &structType));
25272+
getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes);
25273+
var_types simdType = getSIMDTypeForSize(sizeBytes);
25274+
25275+
GenTreeLclFld* fldNode = gtNewLclFldNode(lclNum, simdType, offset);
25276+
fieldList->AddField(this, fldNode, offset, simdType);
25277+
25278+
offset += fieldSize;
25279+
}
25280+
return fieldList;
25281+
}
2524525282
#endif // TARGET_ARM64
2524625283

2524725284
GenTree* Compiler::gtNewSimdWithLowerNode(
@@ -25391,6 +25428,13 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
2539125428

2539225429
#ifdef TARGET_ARM64
2539325430
case NI_AdvSimd_LoadAndInsertScalar:
25431+
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
25432+
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
25433+
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
25434+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
25435+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
25436+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
25437+
2539425438
addr = Op(3);
2539525439
break;
2539625440
#endif // TARGET_ARM64
@@ -25751,6 +25795,7 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
2575125795
case NI_AdvSimd_Arm64_LoadPairVector64:
2575225796
case NI_AdvSimd_Arm64_LoadPairVector64NonTemporal:
2575325797
case NI_AdvSimd_LoadVector64x2:
25798+
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
2575425799
case NI_AdvSimd_LoadAndReplicateToVector64x2:
2575525800
return compiler->typGetBlkLayout(16);
2575625801

@@ -25760,17 +25805,22 @@ ClassLayout* GenTreeHWIntrinsic::GetLayout(Compiler* compiler) const
2576025805
case NI_AdvSimd_LoadVector64x4:
2576125806
case NI_AdvSimd_LoadAndReplicateToVector64x4:
2576225807
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
25808+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
25809+
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
2576325810
return compiler->typGetBlkLayout(32);
2576425811

2576525812
case NI_AdvSimd_LoadVector64x3:
25813+
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
2576625814
case NI_AdvSimd_LoadAndReplicateToVector64x3:
2576725815
return compiler->typGetBlkLayout(24);
2576825816

2576925817
case NI_AdvSimd_Arm64_LoadVector128x3:
25818+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
2577025819
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
2577125820
return compiler->typGetBlkLayout(48);
2577225821

2577325822
case NI_AdvSimd_Arm64_LoadVector128x4:
25823+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
2577425824
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
2577525825
return compiler->typGetBlkLayout(64);
2577625826

src/coreclr/jit/hwintrinsic.cpp

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,9 +1071,53 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
10711071

10721072
if (HWIntrinsicInfo::IsMultiReg(intrinsic))
10731073
{
1074-
// We don't have generic multireg APIs
10751074
assert(sizeBytes == 0);
10761075
}
1076+
1077+
#ifdef TARGET_ARM64
1078+
else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar))
1079+
{
1080+
CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF;
1081+
var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType);
1082+
1083+
if (retFieldType == TYP_STRUCT)
1084+
{
1085+
CORINFO_CLASS_HANDLE structType;
1086+
unsigned int sizeBytes = 0;
1087+
1088+
// LoadAndInsertScalar that returns 2,3 or 4 vectors
1089+
assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF);
1090+
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass);
1091+
assert(fieldCount > 1);
1092+
CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0);
1093+
CorInfoType fieldType = info.compCompHnd->getFieldType(fieldHandle, &structType);
1094+
simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes);
1095+
switch (fieldCount)
1096+
{
1097+
case 2:
1098+
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2
1099+
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2;
1100+
break;
1101+
case 3:
1102+
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3
1103+
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3;
1104+
break;
1105+
case 4:
1106+
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4
1107+
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4;
1108+
break;
1109+
default:
1110+
assert("unsupported");
1111+
}
1112+
}
1113+
else
1114+
{
1115+
assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16));
1116+
assert(isSupportedBaseType(intrinsic, simdBaseJitType));
1117+
retType = getSIMDTypeForSize(sizeBytes);
1118+
}
1119+
}
1120+
#endif
10771121
else
10781122
{
10791123
// We want to return early here for cases where retType was TYP_STRUCT as per method signature and
@@ -1130,7 +1174,9 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
11301174

11311175
#ifdef TARGET_ARM64
11321176
if ((intrinsic == NI_AdvSimd_Insert) || (intrinsic == NI_AdvSimd_InsertScalar) ||
1133-
(intrinsic == NI_AdvSimd_LoadAndInsertScalar))
1177+
((intrinsic >= NI_AdvSimd_LoadAndInsertScalar) && (intrinsic <= NI_AdvSimd_LoadAndInsertScalarVector64x4)) ||
1178+
((intrinsic >= NI_AdvSimd_Arm64_LoadAndInsertScalar) &&
1179+
(intrinsic <= NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4)))
11341180
{
11351181
assert(sig->numArgs == 3);
11361182
immOp = impStackTop(1).val;

src/coreclr/jit/hwintrinsic.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -770,18 +770,24 @@ struct HWIntrinsicInfo
770770
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
771771
case NI_AdvSimd_LoadVector64x2:
772772
case NI_AdvSimd_Arm64_LoadVector128x2:
773+
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
774+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
773775
case NI_AdvSimd_LoadAndReplicateToVector64x2:
774776
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x2:
775777
return 2;
776778

777779
case NI_AdvSimd_LoadVector64x3:
778780
case NI_AdvSimd_Arm64_LoadVector128x3:
781+
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
782+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
779783
case NI_AdvSimd_LoadAndReplicateToVector64x3:
780784
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x3:
781785
return 3;
782786

783787
case NI_AdvSimd_LoadVector64x4:
784788
case NI_AdvSimd_Arm64_LoadVector128x4:
789+
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
790+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
785791
case NI_AdvSimd_LoadAndReplicateToVector64x4:
786792
case NI_AdvSimd_Arm64_LoadAndReplicateToVector128x4:
787793
return 4;

src/coreclr/jit/hwintrinsicarm64.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,12 @@ void HWIntrinsicInfo::lookupImmBounds(
253253
case NI_AdvSimd_Insert:
254254
case NI_AdvSimd_InsertScalar:
255255
case NI_AdvSimd_LoadAndInsertScalar:
256+
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
257+
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
258+
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
259+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
260+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
261+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
256262
case NI_AdvSimd_StoreSelectedScalar:
257263
case NI_AdvSimd_Arm64_DuplicateSelectedScalarToVector128:
258264
case NI_AdvSimd_Arm64_InsertSelectedScalar:
@@ -1916,6 +1922,57 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
19161922
retNode = impStoreMultiRegValueToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
19171923
break;
19181924
}
1925+
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
1926+
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
1927+
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
1928+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
1929+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
1930+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
1931+
{
1932+
assert(sig->numArgs == 3);
1933+
1934+
CORINFO_ARG_LIST_HANDLE arg1 = sig->args;
1935+
CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1);
1936+
CORINFO_ARG_LIST_HANDLE arg3 = info.compCompHnd->getArgNext(arg2);
1937+
var_types argType = TYP_UNKNOWN;
1938+
CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE;
1939+
1940+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg3, &argClass)));
1941+
op3 = getArgForHWIntrinsic(argType, argClass);
1942+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg2, &argClass)));
1943+
op2 = getArgForHWIntrinsic(argType, argClass);
1944+
argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass)));
1945+
op1 = impPopStack().val;
1946+
1947+
if (op3->OperIs(GT_CAST))
1948+
{
1949+
// Although the API specifies a pointer, if what we have is a BYREF, that's what
1950+
// we really want, so throw away the cast.
1951+
if (op3->gtGetOp1()->TypeGet() == TYP_BYREF)
1952+
{
1953+
op3 = op3->gtGetOp1();
1954+
}
1955+
}
1956+
1957+
assert(HWIntrinsicInfo::IsMultiReg(intrinsic));
1958+
assert(op1->TypeGet() == TYP_STRUCT);
1959+
1960+
info.compNeedsConsecutiveRegisters = true;
1961+
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(argClass);
1962+
1963+
if (!op1->OperIs(GT_LCL_VAR))
1964+
{
1965+
unsigned tmp = lvaGrabTemp(true DEBUGARG("LoadAndInsertScalar temp tree"));
1966+
1967+
impStoreTemp(tmp, op1, CHECK_SPILL_NONE);
1968+
op1 = gtNewLclvNode(tmp, argType);
1969+
}
1970+
1971+
op1 = gtConvertParamOpToFieldList(op1, fieldCount, argClass);
1972+
op1 = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, intrinsic, simdBaseJitType, simdSize);
1973+
retNode = impStoreMultiRegValueToVar(op1, sig->retTypeSigClass DEBUGARG(CorInfoCallConvExtension::Managed));
1974+
break;
1975+
}
19191976
case NI_AdvSimd_VectorTableLookup:
19201977
case NI_AdvSimd_Arm64_VectorTableLookup:
19211978
{

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -728,6 +728,52 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
728728
}
729729
break;
730730

731+
case NI_AdvSimd_LoadAndInsertScalarVector64x2:
732+
case NI_AdvSimd_LoadAndInsertScalarVector64x3:
733+
case NI_AdvSimd_LoadAndInsertScalarVector64x4:
734+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2:
735+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3:
736+
case NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4:
737+
{
738+
assert(isRMW);
739+
unsigned fieldIdx = 0;
740+
op2Reg = intrin.op2->GetRegNum();
741+
op3Reg = intrin.op3->GetRegNum();
742+
assert(intrin.op1->OperIsFieldList());
743+
744+
GenTreeFieldList* fieldList = intrin.op1->AsFieldList();
745+
GenTree* firstField = fieldList->Uses().GetHead()->GetNode();
746+
op1Reg = firstField->GetRegNum();
747+
748+
regNumber targetFieldReg = REG_NA;
749+
regNumber op1FieldReg = REG_NA;
750+
751+
for (GenTreeFieldList::Use& use : fieldList->Uses())
752+
{
753+
GenTree* fieldNode = use.GetNode();
754+
755+
targetFieldReg = node->GetRegByIndex(fieldIdx);
756+
op1FieldReg = fieldNode->GetRegNum();
757+
758+
if (targetFieldReg != op1FieldReg)
759+
{
760+
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(fieldNode), targetFieldReg, op1FieldReg,
761+
/* canSkip */ true);
762+
}
763+
fieldIdx++;
764+
}
765+
766+
HWIntrinsicImmOpHelper helper(this, intrin.op2, node);
767+
768+
for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
769+
{
770+
const int elementIndex = helper.ImmValue();
771+
772+
GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op3Reg, elementIndex);
773+
}
774+
775+
break;
776+
}
731777
case NI_AdvSimd_Arm64_LoadPairVector128:
732778
case NI_AdvSimd_Arm64_LoadPairVector128NonTemporal:
733779
case NI_AdvSimd_Arm64_LoadPairVector64:

0 commit comments

Comments
 (0)