@@ -98,22 +98,22 @@ class SIMemOpInfo final {
9898 bool IsCrossAddressSpaceOrdering = false ;
9999 bool IsVolatile = false ;
100100 bool IsNonTemporal = false ;
101-
102- SIMemOpInfo (AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
103- SIAtomicScope Scope = SIAtomicScope::SYSTEM,
104- SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC ,
105- SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL ,
106- bool IsCrossAddressSpaceOrdering = true ,
107- AtomicOrdering FailureOrdering =
108- AtomicOrdering::SequentiallyConsistent ,
109- bool IsVolatile = false ,
110- bool IsNonTemporal = false )
111- : Ordering(Ordering), FailureOrdering(FailureOrdering),
112- Scope (Scope ), OrderingAddrSpace(OrderingAddrSpace ),
113- InstrAddrSpace(InstrAddrSpace),
114- IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
115- IsVolatile(IsVolatile),
116- IsNonTemporal(IsNonTemporal ) {
101+ bool IsLastUse = false ;
102+
103+ SIMemOpInfo (
104+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent ,
105+ SIAtomicScope Scope = SIAtomicScope::SYSTEM ,
106+ SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC ,
107+ SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
108+ bool IsCrossAddressSpaceOrdering = true ,
109+ AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent ,
110+ bool IsVolatile = false , bool IsNonTemporal = false ,
111+ bool IsLastUse = false )
112+ : Ordering(Ordering ), FailureOrdering(FailureOrdering), Scope(Scope ),
113+ OrderingAddrSpace (OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
114+ IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
115+ IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal ),
116+ IsLastUse(IsLastUse ) {
117117
118118 if (Ordering == AtomicOrdering::NotAtomic) {
119119 assert (Scope == SIAtomicScope::NONE &&
@@ -201,6 +201,10 @@ class SIMemOpInfo final {
201201 return IsNonTemporal;
202202 }
203203
204+ // / \returns True if memory access of the machine instruction used to
205+ // / create this SIMemOpInfo is last use, false otherwise.
206+ bool isLastUse () const { return IsLastUse; }
207+
204208 // / \returns True if ordering constraint of the machine instruction used to
205209 // / create this SIMemOpInfo is unordered or higher, false otherwise.
206210 bool isAtomic () const {
@@ -305,12 +309,13 @@ class SICacheControl {
305309 SIAtomicAddrSpace AddrSpace) const = 0;
306310
307311 // / Update \p MI memory instruction of kind \p Op associated with address
308- // / spaces \p AddrSpace to indicate it is volatile and/or nontemporal. Return
309- // / true iff the instruction was modified.
312+ // / spaces \p AddrSpace to indicate it is volatile and/or
313+ // / nontemporal/last-use. Return true iff the instruction was modified.
310314 virtual bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
311315 SIAtomicAddrSpace AddrSpace,
312316 SIMemOp Op, bool IsVolatile,
313- bool IsNonTemporal) const = 0;
317+ bool IsNonTemporal,
318+ bool IsLastUse = false ) const = 0;
314319
315320 virtual bool expandSystemScopeStore (MachineBasicBlock::iterator &MI) const {
316321 return false ;
@@ -394,8 +399,8 @@ class SIGfx6CacheControl : public SICacheControl {
394399
395400 bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
396401 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
397- bool IsVolatile,
398- bool IsNonTemporal ) const override ;
402+ bool IsVolatile, bool IsNonTemporal,
403+ bool IsLastUse ) const override ;
399404
400405 bool insertWait (MachineBasicBlock::iterator &MI,
401406 SIAtomicScope Scope,
@@ -447,8 +452,8 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl {
447452
448453 bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
449454 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
450- bool IsVolatile,
451- bool IsNonTemporal ) const override ;
455+ bool IsVolatile, bool IsNonTemporal,
456+ bool IsLastUse ) const override ;
452457
453458 bool insertWait (MachineBasicBlock::iterator &MI,
454459 SIAtomicScope Scope,
@@ -508,8 +513,8 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl {
508513
509514 bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
510515 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
511- bool IsVolatile,
512- bool IsNonTemporal ) const override ;
516+ bool IsVolatile, bool IsNonTemporal,
517+ bool IsLastUse ) const override ;
513518
514519 bool insertAcquire (MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
515520 SIAtomicAddrSpace AddrSpace, Position Pos) const override ;
@@ -552,8 +557,8 @@ class SIGfx10CacheControl : public SIGfx7CacheControl {
552557
553558 bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
554559 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
555- bool IsVolatile,
556- bool IsNonTemporal ) const override ;
560+ bool IsVolatile, bool IsNonTemporal,
561+ bool IsLastUse ) const override ;
557562
558563 bool insertWait (MachineBasicBlock::iterator &MI,
559564 SIAtomicScope Scope,
@@ -578,8 +583,8 @@ class SIGfx11CacheControl : public SIGfx10CacheControl {
578583
579584 bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
580585 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
581- bool IsVolatile,
582- bool IsNonTemporal ) const override ;
586+ bool IsVolatile, bool IsNonTemporal,
587+ bool IsLastUse ) const override ;
583588};
584589
585590class SIGfx12CacheControl : public SIGfx11CacheControl {
@@ -614,8 +619,8 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
614619
615620 bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
616621 SIAtomicAddrSpace AddrSpace, SIMemOp Op,
617- bool IsVolatile,
618- bool IsNonTemporal ) const override ;
622+ bool IsVolatile, bool IsNonTemporal,
623+ bool IsLastUse ) const override ;
619624
620625 bool expandSystemScopeStore (MachineBasicBlock::iterator &MI) const override ;
621626};
@@ -745,12 +750,14 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
745750 SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
746751 bool IsNonTemporal = true ;
747752 bool IsVolatile = false ;
753+ bool IsLastUse = false ;
748754
749755 // Validator should check whether or not MMOs cover the entire set of
750756 // locations accessed by the memory instruction.
751757 for (const auto &MMO : MI->memoperands ()) {
752758 IsNonTemporal &= MMO->isNonTemporal ();
753759 IsVolatile |= MMO->isVolatile ();
760+ IsLastUse |= MMO->getFlags () & MOLastUse;
754761 InstrAddrSpace |=
755762 toSIAtomicAddrSpace (MMO->getPointerInfo ().getAddrSpace ());
756763 AtomicOrdering OpOrdering = MMO->getSuccessOrdering ();
@@ -792,7 +799,7 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
792799 }
793800 return SIMemOpInfo (Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
794801 IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
795- IsNonTemporal);
802+ IsNonTemporal, IsLastUse );
796803}
797804
798805std::optional<SIMemOpInfo>
@@ -969,7 +976,7 @@ bool SIGfx6CacheControl::enableRMWCacheBypass(
969976
970977bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal (
971978 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
972- bool IsVolatile, bool IsNonTemporal) const {
979+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
973980 // Only handle load and store, not atomic read-modify-write insructions. The
974981 // latter use glc to indicate if the atomic returns a result and so must not
975982 // be used for cache control.
@@ -1322,7 +1329,7 @@ bool SIGfx90ACacheControl::enableRMWCacheBypass(
13221329
13231330bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal (
13241331 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1325- bool IsVolatile, bool IsNonTemporal) const {
1332+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
13261333 // Only handle load and store, not atomic read-modify-write insructions. The
13271334 // latter use glc to indicate if the atomic returns a result and so must not
13281335 // be used for cache control.
@@ -1624,7 +1631,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass(
16241631
16251632bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal (
16261633 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1627- bool IsVolatile, bool IsNonTemporal) const {
1634+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
16281635 // Only handle load and store, not atomic read-modify-write insructions. The
16291636 // latter use glc to indicate if the atomic returns a result and so must not
16301637 // be used for cache control.
@@ -1856,7 +1863,7 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
18561863
18571864bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal (
18581865 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1859- bool IsVolatile, bool IsNonTemporal) const {
1866+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
18601867
18611868 // Only handle load and store, not atomic read-modify-write insructions. The
18621869 // latter use glc to indicate if the atomic returns a result and so must not
@@ -2127,7 +2134,7 @@ bool SIGfx11CacheControl::enableLoadCacheBypass(
21272134
21282135bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal (
21292136 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
2130- bool IsVolatile, bool IsNonTemporal) const {
2137+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
21312138
21322139 // Only handle load and store, not atomic read-modify-write insructions. The
21332140 // latter use glc to indicate if the atomic returns a result and so must not
@@ -2379,7 +2386,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
23792386
23802387bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal (
23812388 MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
2382- bool IsVolatile, bool IsNonTemporal) const {
2389+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
23832390
23842391 // Only handle load and store, not atomic read-modify-write instructions.
23852392 assert (MI->mayLoad () ^ MI->mayStore ());
@@ -2392,7 +2399,10 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
23922399
23932400 bool Changed = false ;
23942401
2395- if (IsNonTemporal) {
2402+ if (IsLastUse) {
2403+ // Set last-use hint.
2404+ Changed |= setTH (MI, AMDGPU::CPol::TH_LU);
2405+ } else if (IsNonTemporal) {
23962406 // Set non-temporal hint for all cache levels.
23972407 Changed |= setTH (MI, AMDGPU::CPol::TH_NT);
23982408 }
@@ -2472,11 +2482,12 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
24722482 }
24732483
24742484 // Atomic instructions already bypass caches to the scope specified by the
2475- // SyncScope operand. Only non-atomic volatile and nontemporal instructions
2476- // need additional treatment.
2477- Changed |= CC->enableVolatileAndOrNonTemporal (MI, MOI.getInstrAddrSpace (),
2478- SIMemOp::LOAD, MOI.isVolatile (),
2479- MOI.isNonTemporal ());
2485+ // SyncScope operand. Only non-atomic volatile and nontemporal/last-use
2486+ // instructions need additional treatment.
2487+ Changed |= CC->enableVolatileAndOrNonTemporal (
2488+ MI, MOI.getInstrAddrSpace (), SIMemOp::LOAD, MOI.isVolatile (),
2489+ MOI.isNonTemporal (), MOI.isLastUse ());
2490+
24802491 return Changed;
24812492}
24822493
0 commit comments