@@ -223,8 +223,8 @@ namespace {
223223 void HoistPostRA (MachineInstr *MI, unsigned Def, MachineLoop *CurLoop,
224224 MachineBasicBlock *CurPreheader);
225225
226- void ProcessMI (MachineInstr *MI, BitVector &PhysRegDefs ,
227- BitVector &PhysRegClobbers, SmallSet<int , 32 > &StoredFIs,
226+ void ProcessMI (MachineInstr *MI, BitVector &RUDefs, BitVector &RUClobbers ,
227+ SmallSet<int , 32 > &StoredFIs,
228228 SmallVectorImpl<CandidateInfo> &Candidates,
229229 MachineLoop *CurLoop);
230230
@@ -423,10 +423,47 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
423423 return false ;
424424}
425425
426+ static void applyBitsNotInRegMaskToRegUnitsMask (const TargetRegisterInfo &TRI,
427+ BitVector &RUs,
428+ const uint32_t *Mask) {
429+ // Iterate over the RegMask raw to avoid constructing a BitVector, which is
430+ // expensive as it implies dynamically allocating memory.
431+ //
432+ // We also work backwards.
433+ const unsigned NumRegs = TRI.getNumRegs ();
434+ const unsigned MaskWords = (NumRegs + 31 ) / 32 ;
435+ for (unsigned K = 0 ; K < MaskWords; ++K) {
436+ // We want to set the bits that aren't in RegMask, so flip it.
437+ uint32_t Word = ~Mask[K];
438+
439+ // Iterate all set bits, starting from the right.
440+ while (Word) {
441+ const unsigned SetBitIdx = countr_zero (Word);
442+
443+ // The bits are numbered from the LSB in each word.
444+ const unsigned PhysReg = (K * 32 ) + SetBitIdx;
445+
446+ // Clear the bit at SetBitIdx. Doing it this way appears to generate less
447+ // instructions on x86. This works because negating a number will flip all
448+ // the bits after SetBitIdx. So (Word & -Word) == (1 << SetBitIdx), but
449+ // faster.
450+ Word ^= Word & -Word;
451+
452+ if (PhysReg == NumRegs)
453+ return ;
454+
455+ if (PhysReg) {
456+ for (MCRegUnitIterator RUI (PhysReg, &TRI); RUI.isValid (); ++RUI)
457+ RUs.set (*RUI);
458+ }
459+ }
460+ }
461+ }
462+
426463// / Examine the instruction for potentai LICM candidate. Also
427464// / gather register def and frame object update information.
428- void MachineLICMBase::ProcessMI (MachineInstr *MI, BitVector &PhysRegDefs ,
429- BitVector &PhysRegClobbers ,
465+ void MachineLICMBase::ProcessMI (MachineInstr *MI, BitVector &RUDefs ,
466+ BitVector &RUClobbers ,
430467 SmallSet<int , 32 > &StoredFIs,
431468 SmallVectorImpl<CandidateInfo> &Candidates,
432469 MachineLoop *CurLoop) {
@@ -448,7 +485,7 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
448485 // We can't hoist an instruction defining a physreg that is clobbered in
449486 // the loop.
450487 if (MO.isRegMask ()) {
451- PhysRegClobbers. setBitsNotInMask ( MO.getRegMask ());
488+ applyBitsNotInRegMaskToRegUnitsMask (*TRI, RUClobbers, MO.getRegMask ());
452489 continue ;
453490 }
454491
@@ -460,16 +497,22 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
460497 assert (Reg.isPhysical () && " Not expecting virtual register!" );
461498
462499 if (!MO.isDef ()) {
463- if (Reg && (PhysRegDefs.test (Reg) || PhysRegClobbers.test (Reg)))
464- // If it's using a non-loop-invariant register, then it's obviously not
465- // safe to hoist.
466- HasNonInvariantUse = true ;
500+ if (!HasNonInvariantUse) {
501+ for (MCRegUnitIterator RUI (Reg, TRI); RUI.isValid (); ++RUI) {
502+ // If it's using a non-loop-invariant register, then it's obviously
503+ // not safe to hoist.
504+ if (RUDefs.test (*RUI) || RUClobbers.test (*RUI)) {
505+ HasNonInvariantUse = true ;
506+ break ;
507+ }
508+ }
509+ }
467510 continue ;
468511 }
469512
470513 if (MO.isImplicit ()) {
471- for (MCRegAliasIterator AI (Reg, TRI, true ); AI .isValid (); ++AI )
472- PhysRegClobbers .set (*AI );
514+ for (MCRegUnitIterator RUI (Reg, TRI); RUI .isValid (); ++RUI )
515+ RUClobbers .set (*RUI );
473516 if (!MO.isDead ())
474517 // Non-dead implicit def? This cannot be hoisted.
475518 RuledOut = true ;
@@ -488,19 +531,18 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs,
488531 // If we have already seen another instruction that defines the same
489532 // register, then this is not safe. Two defs is indicated by setting a
490533 // PhysRegClobbers bit.
491- for (MCRegAliasIterator AS (Reg, TRI, true ); AS.isValid (); ++AS) {
492- if (PhysRegDefs.test (*AS))
493- PhysRegClobbers.set (*AS);
534+ for (MCRegUnitIterator RUI (Reg, TRI); RUI.isValid (); ++RUI) {
535+ if (RUDefs.test (*RUI)) {
536+ RUClobbers.set (*RUI);
537+ RuledOut = true ;
538+ } else if (RUClobbers.test (*RUI)) {
539+ // MI defined register is seen defined by another instruction in
540+ // the loop, it cannot be a LICM candidate.
541+ RuledOut = true ;
542+ }
543+
544+ RUDefs.set (*RUI);
494545 }
495- // Need a second loop because MCRegAliasIterator can visit the same
496- // register twice.
497- for (MCRegAliasIterator AS (Reg, TRI, true ); AS.isValid (); ++AS)
498- PhysRegDefs.set (*AS);
499-
500- if (PhysRegClobbers.test (Reg))
501- // MI defined register is seen defined by another instruction in
502- // the loop, it cannot be a LICM candidate.
503- RuledOut = true ;
504546 }
505547
506548 // Only consider reloads for now and remats which do not have register
@@ -521,9 +563,9 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
521563 if (!Preheader)
522564 return ;
523565
524- unsigned NumRegs = TRI->getNumRegs ();
525- BitVector PhysRegDefs (NumRegs ); // Regs defined once in the loop.
526- BitVector PhysRegClobbers (NumRegs ); // Regs defined more than once.
566+ unsigned NumRegUnits = TRI->getNumRegUnits ();
567+ BitVector RUDefs (NumRegUnits ); // RUs defined once in the loop.
568+ BitVector RUClobbers (NumRegUnits ); // RUs defined more than once.
527569
528570 SmallVector<CandidateInfo, 32 > Candidates;
529571 SmallSet<int , 32 > StoredFIs;
@@ -540,22 +582,21 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
540582 // FIXME: That means a reload that're reused in successor block(s) will not
541583 // be LICM'ed.
542584 for (const auto &LI : BB->liveins ()) {
543- for (MCRegAliasIterator AI (LI.PhysReg , TRI, true ); AI .isValid (); ++AI )
544- PhysRegDefs .set (*AI );
585+ for (MCRegUnitIterator RUI (LI.PhysReg , TRI); RUI .isValid (); ++RUI )
586+ RUDefs .set (*RUI );
545587 }
546588
547589 // Funclet entry blocks will clobber all registers
548590 if (const uint32_t *Mask = BB->getBeginClobberMask (TRI))
549- PhysRegClobbers. setBitsNotInMask ( Mask);
591+ applyBitsNotInRegMaskToRegUnitsMask (*TRI, RUClobbers, Mask);
550592
551593 SpeculationState = SpeculateUnknown;
552594 for (MachineInstr &MI : *BB)
553- ProcessMI (&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates,
554- CurLoop);
595+ ProcessMI (&MI, RUDefs, RUClobbers, StoredFIs, Candidates, CurLoop);
555596 }
556597
557598 // Gather the registers read / clobbered by the terminator.
558- BitVector TermRegs (NumRegs );
599+ BitVector TermRUs (NumRegUnits );
559600 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator ();
560601 if (TI != Preheader->end ()) {
561602 for (const MachineOperand &MO : TI->operands ()) {
@@ -564,8 +605,8 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
564605 Register Reg = MO.getReg ();
565606 if (!Reg)
566607 continue ;
567- for (MCRegAliasIterator AI (Reg, TRI, true ); AI .isValid (); ++AI )
568- TermRegs .set (*AI );
608+ for (MCRegUnitIterator RUI (Reg, TRI); RUI .isValid (); ++RUI )
609+ TermRUs .set (*RUI );
569610 }
570611 }
571612
@@ -583,24 +624,36 @@ void MachineLICMBase::HoistRegionPostRA(MachineLoop *CurLoop,
583624 continue ;
584625
585626 unsigned Def = Candidate.Def ;
586- if (!PhysRegClobbers.test (Def) && !TermRegs.test (Def)) {
587- bool Safe = true ;
588- MachineInstr *MI = Candidate.MI ;
589- for (const MachineOperand &MO : MI->all_uses ()) {
590- if (!MO.getReg ())
591- continue ;
592- Register Reg = MO.getReg ();
593- if (PhysRegDefs.test (Reg) ||
594- PhysRegClobbers.test (Reg)) {
627+ bool Safe = true ;
628+ for (MCRegUnitIterator RUI (Def, TRI); RUI.isValid (); ++RUI) {
629+ if (RUClobbers.test (*RUI) || TermRUs.test (*RUI)) {
630+ Safe = false ;
631+ break ;
632+ }
633+ }
634+
635+ if (!Safe)
636+ continue ;
637+
638+ MachineInstr *MI = Candidate.MI ;
639+ for (const MachineOperand &MO : MI->all_uses ()) {
640+ if (!MO.getReg ())
641+ continue ;
642+ for (MCRegUnitIterator RUI (MO.getReg (), TRI); RUI.isValid (); ++RUI) {
643+ if (RUDefs.test (*RUI) || RUClobbers.test (*RUI)) {
595644 // If it's using a non-loop-invariant register, then it's obviously
596645 // not safe to hoist.
597646 Safe = false ;
598647 break ;
599648 }
600649 }
601- if (Safe)
602- HoistPostRA (MI, Candidate.Def , CurLoop, CurPreheader);
650+
651+ if (!Safe)
652+ break ;
603653 }
654+
655+ if (Safe)
656+ HoistPostRA (MI, Candidate.Def , CurLoop, CurPreheader);
604657 }
605658}
606659
0 commit comments