From 2472edf7b688ae92200580edb8bdb9bede911fe3 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Thu, 5 Oct 2023 13:41:58 +0100 Subject: [PATCH] Add UZP1 TEST_IMG: ubuntu/dotnet-build TEST_CMD: echo # This patch is not expect to build Jira: ENTLLT-6871 Change-Id: I0b6034296d36a044a847b65f1f4cf508ad3494f7 --- src/coreclr/jit/emitarm64.cpp | 82 ++++++++++++++++++++++++++ src/coreclr/jit/emitfmtsarm64.h | 2 + src/coreclr/jit/hwintrinsiclistarm64.h | 9 +++ src/coreclr/jit/instrsarm64.h | 4 +- 4 files changed, 96 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index 5e0b4f2e78a959..fa3d8ba725eb79 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -924,6 +924,30 @@ void emitter::emitInsSanityCheck(instrDesc* id) assert(isVectorRegister(id->idReg3())); break; + case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (SVE vector) + // Copied this from IF_DV_3E! + assert(isValidVectorElemsize(id->idOpSize())); + assert(insOptsNone(id->idInsOpt())); + assert(isSVEVectorRegister(id->idReg1())); + assert(isSVEVectorRegister(id->idReg2())); + assert(isSVEVectorRegister(id->idReg3())); + elemsize = id->idOpSize(); + index = emitGetInsSC(id); + assert(isValidVectorIndex(EA_16BYTE, elemsize, index)); + break; + + case IF_DV_3I: // DV_3I ........XX..mmmm .......nnnn.dddd Pd Pn Pm (SVE predicate) + // Copied this from IF_DV_3E and modified! + assert(isValidVectorElemsize(id->idOpSize())); + assert(insOptsNone(id->idInsOpt())); + assert(isSVEPredicateRegister(id->idReg1())); + assert(isSVEPredicateRegister(id->idReg2())); + assert(isSVEPredicateRegister(id->idReg3())); + elemsize = id->idOpSize(); + index = emitGetInsSC(id); + assert(isValidVectorIndex(EA_16BYTE, elemsize, index)); + break; + case IF_DV_4A: // DR_4A .........X.mmmmm .aaaaannnnnddddd Rd Rn Rm Ra (scalar) assert(isValidGeneralDatasize(id->idOpSize())); assert(isVectorRegister(id->idReg1())); @@ -1020,6 +1044,8 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc* id) case IF_DV_3EI: // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) case IF_DV_3F: // DV_3F .Q......XX.mmmmm ......nnnnnddddd Vd Vn Vm (vector) case IF_DV_3G: // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) + case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (SVE vector) + case IF_DV_3I: // DV_3I ........XX..mmmm .......nnnn.dddd Pd Pn Pm (SVE predicate) case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) // Tracked GC pointers cannot be placed into the SIMD registers. return false; @@ -1725,6 +1751,7 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) const static insFormat formatEncode3H[3] = {IF_DR_3A, IF_DV_3A, IF_DV_3AI}; const static insFormat formatEncode3I[3] = {IF_DR_2E, IF_DR_2F, IF_DV_2M}; const static insFormat formatEncode3J[3] = {IF_LS_2D, IF_LS_3F, IF_LS_2E}; + const static insFormat formatEncode3K[3] = {IF_LS_3A, IF_LS_3H, IF_LS_3I}; const static insFormat formatEncode2A[2] = {IF_DR_2E, IF_DR_2F}; const static insFormat formatEncode2B[2] = {IF_DR_3A, IF_DR_3B}; const static insFormat formatEncode2C[2] = {IF_DR_3A, IF_DI_2D}; @@ -2047,6 +2074,17 @@ emitter::code_t emitter::emitInsCode(instruction ins, insFormat fmt) } break; + case IF_EN3K: + for (index = 0; index < 3; index++) + { + if (fmt == formatEncode3K[index]) + { + encoding_found = true; + break; + } + } + break; + case IF_EN2A: for (index = 0; index < 2; index++) { @@ -11748,6 +11786,26 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) dst += emitOutput_Instr(dst, code); break; + case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (SVE vector) + code = emitInsCode(ins, fmt); + elemsize = id->idOpSize(); + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeReg_Zd(id->idReg1()); // ddddd + code |= insEncodeReg_Zn(id->idReg2()); // nnnnn + code |= insEncodeReg_Zm(id->idReg3()); // mmmmm + dst += emitOutput_Instr(dst, code); + break; + + case IF_DV_3I: // DV_3I ........XX..mmmm .......nnnn.dddd Pd Pn Pm (SVE predicate) + code = emitInsCode(ins, fmt); + elemsize = id->idOpSize(); + code |= insEncodeElemsize(elemsize); // XX + code |= insEncodeReg_Pd(id->idReg1()); // dddd + code |= insEncodeReg_Pn(id->idReg2()); // nnnn + code |= insEncodeReg_Pm(id->idReg3()); // mmmm + dst += emitOutput_Instr(dst, code); + break; + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) code = emitInsCode(ins, fmt); elemsize = id->idOpSize(); @@ -13926,6 +13984,20 @@ void emitter::emitDispInsHelp( emitDispImm(emitGetInsSC(id), false); break; + case IF_DV_3H: // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (SVE vector) + emitDispSVEVectorReg(id->idReg1(), id->idInsOpt(), true); + emitDispSVEVectorReg(id->idReg2(), id->idInsOpt(), true); + emitDispSVEVectorReg(id->idReg3(), id->idInsOpt(), true); + emitDispImm(emitGetInsSC(id), false); + break; + + case IF_DV_3I: // DV_3I ........XX..mmmm .......nnnn.dddd Pd Pn Pm (SVE predicate) + emitDispSVEPredicateReg(id->idReg1(), id->idInsOpt(), true); + emitDispSVEPredicateReg(id->idReg2(), id->idInsOpt(), true); + emitDispSVEPredicateReg(id->idReg3(), id->idInsOpt(), true); + emitDispImm(emitGetInsSC(id), false); + break; + case IF_DV_4A: // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Va Vn Vm (scalar) emitDispReg(id->idReg1(), size, true); emitDispReg(id->idReg2(), size, true); @@ -15894,6 +15966,16 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins result.insLatency = PERFSCORE_LATENCY_2C; break; + case IF_DV_3H: // uzp1 + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + + case IF_DV_3I: // uzp1 + result.insThroughput = PERFSCORE_THROUGHPUT_2X; + result.insLatency = PERFSCORE_LATENCY_2C; + break; + case IF_DV_2L: // abs, neg, cmeq, cmge, cmgt, cmle, cmlt (scalar) case IF_DV_2M: // (vector) // abs, neg, mvn, not, cmeq, cmge, cmgt, cmle, cmlt, diff --git a/src/coreclr/jit/emitfmtsarm64.h b/src/coreclr/jit/emitfmtsarm64.h index 31bbde6afc47af..980ad2bf6c53f2 100644 --- a/src/coreclr/jit/emitfmtsarm64.h +++ b/src/coreclr/jit/emitfmtsarm64.h @@ -220,6 +220,8 @@ IF_DEF(DV_3E, IS_NONE, NONE) // DV_3E ........XX.mmmmm ......nnnnnddddd V IF_DEF(DV_3EI, IS_NONE, NONE) // DV_3EI ........XXLMmmmm ....H.nnnnnddddd Vd Vn Vm[] (scalar by element) IF_DEF(DV_3F, IS_NONE, NONE) // DV_3F ...........mmmmm ......nnnnnddddd Qd Sn Vm (Qd used as both source and destination) IF_DEF(DV_3G, IS_NONE, NONE) // DV_3G .Q.........mmmmm .iiii.nnnnnddddd Vd Vn Vm imm (vector) +IF_DEF(DV_3H, IS_NONE, NONE) // DV_3H ........XX.mmmmm ......nnnnnddddd Vd Vn Vm (SVE vector) +IF_DEF(DV_3I, IS_NONE, NONE) // DV_3I ........XX..mmmm .......nnnn.dddd Pd Pn Pm (SVE predicate) IF_DEF(DV_4A, IS_NONE, NONE) // DV_4A .........X.mmmmm .aaaaannnnnddddd Vd Vn Vm Va (scalar) diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index da77e3fdfabd54..6a601357e67ec5 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -764,6 +764,15 @@ HARDWARE_INTRINSIC(Sha256, HashUpdate2, HARDWARE_INTRINSIC(Sha256, ScheduleUpdate0, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256su0, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sha256, ScheduleUpdate1, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sha256su1, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_HasRMWSemantics) + +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags +// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} +// *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** +// SVE Intrinsics +HARDWARE_INTRINSIC(SVE_Arm64, UnzipEven, -1, 2, true, {INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1, INS_uzp1}, HW_Category_SVE, HW_Flag_NoFlag) + + #endif // FEATURE_HW_INTRINSIC #undef HARDWARE_INTRINSIC diff --git a/src/coreclr/jit/instrsarm64.h b/src/coreclr/jit/instrsarm64.h index 39d729b52de92b..818ae052df7e8c 100644 --- a/src/coreclr/jit/instrsarm64.h +++ b/src/coreclr/jit/instrsarm64.h @@ -1680,8 +1680,10 @@ INST1(fminnmv, "fminnmv", 0, IF_DV_2R, 0x2EB0C800) INST1(fminv, "fminv", 0, IF_DV_2R, 0x2EB0F800) // fminv Vd,Vn DV_2R 0Q1011101X110000 111110nnnnnddddd 2EB0 F800 Vd,Vn (vector) -INST1(uzp1, "uzp1", 0, IF_DV_3A, 0x0E001800) +INST3(uzp1, "uzp1", 0, IF_EN3n, 0x0E001800, 0x05206800, 0x05204800) // uzp1 Vd,Vn,Vm DV_3A 0Q001110XX0mmmmm 000110nnnnnddddd 0E00 1800 Vd,Vn,Vm (vector) + // uzp1 Zd,Zn,Zm DV_3H 00000101XX1mmmmm 011010nnnnnddddd 0520 6800 Zd,Zn,Zm (SVE vector) + // uzp1 Pd,Pn,Pm DV_3I 00000101XX10mmmm 0100100nnnn0dddd 0520 4800 Pd,Pn,Pm (SVE predicate) INST1(uzp2, "uzp2", 0, IF_DV_3A, 0x0E005800) // upz2 Vd,Vn,Vm DV_3A 0Q001110XX0mmmmm 010110nnnnnddddd 0E00 5800 Vd,Vn,Vm (vector)