@@ -615,6 +615,12 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
615615 all (typeIsLegalIntOrFPVec (0 , IntOrFPVecTys, ST),
616616 typeIsLegalIntOrFPVec (1 , IntOrFPVecTys, ST))));
617617
618+ getActionDefinitionsBuilder (G_INSERT_SUBVECTOR)
619+ .customIf (all (typeIsLegalBoolVec (0 , BoolVecTys, ST),
620+ typeIsLegalBoolVec (1 , BoolVecTys, ST)))
621+ .customIf (all (typeIsLegalIntOrFPVec (0 , IntOrFPVecTys, ST),
622+ typeIsLegalIntOrFPVec (1 , IntOrFPVecTys, ST)));
623+
618624 getLegacyLegalizerInfo ().computeTables ();
619625}
620626
@@ -834,9 +840,7 @@ static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,
834840// / Gets the two common "VL" operands: an all-ones mask and the vector length.
835841// / VecTy is a scalable vector type.
836842static std::pair<MachineInstrBuilder, MachineInstrBuilder>
837- buildDefaultVLOps (const DstOp &Dst, MachineIRBuilder &MIB,
838- MachineRegisterInfo &MRI) {
839- LLT VecTy = Dst.getLLTTy (MRI);
843+ buildDefaultVLOps (LLT VecTy, MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {
840844 assert (VecTy.isScalableVector () && " Expecting scalable container type" );
841845 const RISCVSubtarget &STI = MIB.getMF ().getSubtarget <RISCVSubtarget>();
842846 LLT XLenTy (STI.getXLenVT ());
@@ -890,7 +894,7 @@ bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,
890894 // Handle case of s64 element vectors on rv32
891895 if (XLenTy.getSizeInBits () == 32 &&
892896 VecTy.getElementType ().getSizeInBits () == 64 ) {
893- auto [_, VL] = buildDefaultVLOps (Dst, MIB, MRI);
897+ auto [_, VL] = buildDefaultVLOps (MRI. getType ( Dst) , MIB, MRI);
894898 buildSplatSplitS64WithVL (Dst, MIB.buildUndef (VecTy), SplatVal, VL, MIB,
895899 MRI);
896900 MI.eraseFromParent ();
@@ -1025,6 +1029,134 @@ bool RISCVLegalizerInfo::legalizeExtractSubvector(MachineInstr &MI,
10251029 return true ;
10261030}
10271031
1032+ bool RISCVLegalizerInfo::legalizeInsertSubvector (MachineInstr &MI,
1033+ LegalizerHelper &Helper,
1034+ MachineIRBuilder &MIB) const {
1035+ GInsertSubvector &IS = cast<GInsertSubvector>(MI);
1036+
1037+ MachineRegisterInfo &MRI = *MIB.getMRI ();
1038+
1039+ Register Dst = IS.getReg (0 );
1040+ Register BigVec = IS.getBigVec ();
1041+ Register LitVec = IS.getSubVec ();
1042+ uint64_t Idx = IS.getIndexImm ();
1043+
1044+ LLT BigTy = MRI.getType (BigVec);
1045+ LLT LitTy = MRI.getType (LitVec);
1046+
1047+ if (Idx == 0 ||
1048+ MRI.getVRegDef (BigVec)->getOpcode () == TargetOpcode::G_IMPLICIT_DEF)
1049+ return true ;
1050+
1051+ // We don't have the ability to slide mask vectors up indexed by their i1
1052+ // elements; the smallest we can do is i8. Often we are able to bitcast to
1053+ // equivalent i8 vectors. Otherwise, we can must zeroextend to equivalent i8
1054+ // vectors and truncate down after the insert.
1055+ if (LitTy.getElementType () == LLT::scalar (1 )) {
1056+ auto BigTyMinElts = BigTy.getElementCount ().getKnownMinValue ();
1057+ auto LitTyMinElts = LitTy.getElementCount ().getKnownMinValue ();
1058+ if (BigTyMinElts >= 8 && LitTyMinElts >= 8 )
1059+ return Helper.bitcast (
1060+ IS, 0 ,
1061+ LLT::vector (BigTy.getElementCount ().divideCoefficientBy (8 ), 8 ));
1062+
1063+ // We can't slide this mask vector up indexed by its i1 elements.
1064+ // This poses a problem when we wish to insert a scalable vector which
1065+ // can't be re-expressed as a larger type. Just choose the slow path and
1066+ // extend to a larger type, then truncate back down.
1067+ LLT ExtBigTy = BigTy.changeElementType (LLT::scalar (8 ));
1068+ return Helper.widenScalar (IS, 0 , ExtBigTy);
1069+ }
1070+
1071+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo ();
1072+ unsigned SubRegIdx, RemIdx;
1073+ std::tie (SubRegIdx, RemIdx) =
1074+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs (
1075+ getMVTForLLT (BigTy), getMVTForLLT (LitTy), Idx, TRI);
1076+
1077+ TypeSize VecRegSize = TypeSize::getScalable (RISCV::RVVBitsPerBlock);
1078+ assert (isPowerOf2_64 (
1079+ STI.expandVScale (LitTy.getSizeInBits ()).getKnownMinValue ()));
1080+ bool ExactlyVecRegSized =
1081+ STI.expandVScale (LitTy.getSizeInBits ())
1082+ .isKnownMultipleOf (STI.expandVScale (VecRegSize));
1083+
1084+ // If the Idx has been completely eliminated and this subvector's size is a
1085+ // vector register or a multiple thereof, or the surrounding elements are
1086+ // undef, then this is a subvector insert which naturally aligns to a vector
1087+ // register. These can easily be handled using subregister manipulation.
1088+ if (RemIdx == 0 && ExactlyVecRegSized)
1089+ return true ;
1090+
1091+ // If the subvector is smaller than a vector register, then the insertion
1092+ // must preserve the undisturbed elements of the register. We do this by
1093+ // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
1094+ // (which resolves to a subregister copy), performing a VSLIDEUP to place the
1095+ // subvector within the vector register, and an INSERT_SUBVECTOR of that
1096+ // LMUL=1 type back into the larger vector (resolving to another subregister
1097+ // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
1098+ // to avoid allocating a large register group to hold our subvector.
1099+
1100+ // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
1101+ // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
1102+ // (in our case undisturbed). This means we can set up a subvector insertion
1103+ // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
1104+ // size of the subvector.
1105+ const LLT XLenTy (STI.getXLenVT ());
1106+ LLT InterLitTy = BigTy;
1107+ Register AlignedExtract = BigVec;
1108+ unsigned AlignedIdx = Idx - RemIdx;
1109+ if (TypeSize::isKnownGT (BigTy.getSizeInBits (),
1110+ getLMUL1Ty (BigTy).getSizeInBits ())) {
1111+ InterLitTy = getLMUL1Ty (BigTy);
1112+ // Extract a subvector equal to the nearest full vector register type. This
1113+ // should resolve to a G_EXTRACT on a subreg.
1114+ AlignedExtract =
1115+ MIB.buildExtractSubvector (InterLitTy, BigVec, AlignedIdx).getReg (0 );
1116+ }
1117+
1118+ auto Insert = MIB.buildInsertSubvector (InterLitTy, MIB.buildUndef (InterLitTy),
1119+ LitVec, 0 );
1120+
1121+ auto [Mask, _] = buildDefaultVLOps (BigTy, MIB, MRI);
1122+ auto VL = MIB.buildVScale (XLenTy, LitTy.getElementCount ().getKnownMinValue ());
1123+
1124+ // If we're inserting into the lowest elements, use a tail undisturbed
1125+ // vmv.v.v.
1126+ MachineInstrBuilder Inserted;
1127+ bool NeedInsertSubvec =
1128+ TypeSize::isKnownGT (BigTy.getSizeInBits (), InterLitTy.getSizeInBits ());
1129+ Register InsertedDst =
1130+ NeedInsertSubvec ? MRI.createGenericVirtualRegister (InterLitTy) : Dst;
1131+ if (RemIdx == 0 ) {
1132+ Inserted = MIB.buildInstr (RISCV::G_VMV_V_V_VL, {InsertedDst},
1133+ {AlignedExtract, Insert, VL});
1134+ } else {
1135+ auto SlideupAmt = MIB.buildVScale (XLenTy, RemIdx);
1136+ // Construct the vector length corresponding to RemIdx + length(LitTy).
1137+ VL = MIB.buildAdd (XLenTy, SlideupAmt, VL);
1138+ // Use tail agnostic policy if we're inserting over InterLitTy's tail.
1139+ ElementCount EndIndex =
1140+ ElementCount::getScalable (RemIdx) + LitTy.getElementCount ();
1141+ uint64_t Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
1142+ if (STI.expandVScale (EndIndex) ==
1143+ STI.expandVScale (InterLitTy.getElementCount ()))
1144+ Policy = RISCVII::TAIL_AGNOSTIC;
1145+
1146+ Inserted =
1147+ MIB.buildInstr (RISCV::G_VSLIDEUP_VL, {InsertedDst},
1148+ {AlignedExtract, Insert, SlideupAmt, Mask, VL, Policy});
1149+ }
1150+
1151+ // If required, insert this subvector back into the correct vector register.
1152+ // This should resolve to an INSERT_SUBREG instruction.
1153+ if (NeedInsertSubvec)
1154+ MIB.buildInsertSubvector (Dst, BigVec, Inserted, AlignedIdx);
1155+
1156+ MI.eraseFromParent ();
1157+ return true ;
1158+ }
1159+
10281160bool RISCVLegalizerInfo::legalizeCustom (
10291161 LegalizerHelper &Helper, MachineInstr &MI,
10301162 LostDebugLocObserver &LocObserver) const {
@@ -1092,6 +1224,8 @@ bool RISCVLegalizerInfo::legalizeCustom(
10921224 return legalizeSplatVector (MI, MIRBuilder);
10931225 case TargetOpcode::G_EXTRACT_SUBVECTOR:
10941226 return legalizeExtractSubvector (MI, MIRBuilder);
1227+ case TargetOpcode::G_INSERT_SUBVECTOR:
1228+ return legalizeInsertSubvector (MI, Helper, MIRBuilder);
10951229 case TargetOpcode::G_LOAD:
10961230 case TargetOpcode::G_STORE:
10971231 return legalizeLoadStore (MI, Helper, MIRBuilder);
0 commit comments