- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
Add SME2 builtins for zero { zt0 } #72274
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
| @llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang Author: Matthew Devereau (MDevereau) ChangesFull diff: https://github.com/llvm/llvm-project/pull/72274.diff 8 Files Affected: 
 diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index fb3f54ecff95080..189a0cafdc3370e 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -305,4 +305,9 @@ defm SVSUB : ZAAddSub<"sub">;
 let TargetGuard = "sme2" in {
   def SVLDR_ZT : Inst<"svldr_zt", "viQ", "", MergeNone, "aarch64_sme_ldr_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
   def SVSTR_ZT : Inst<"svstr_zt", "vi%", "", MergeNone, "aarch64_sme_str_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
+
+//
+// Zero ZT0
+//
+  def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>;
 }
diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
new file mode 100644
index 000000000000000..4ea26119301cab2
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_zero_zt.c
@@ -0,0 +1,32 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include <arm_sme_draft_spec_subject_to_change.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1) A1
+#else
+#define SVE_ACLE_FUNC(A1) A1
+#endif
+
+// CHECK-LABEL: @test_svzero_zt(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.zt(i32 0)
+// CHECK-NEXT:    ret void
+//
+// CPP-CHECK-LABEL: @_Z14test_svzero_ztv(
+// CPP-CHECK-NEXT:  entry:
+// CPP-CHECK-NEXT:    tail call void @llvm.aarch64.sme.zero.zt(i32 0)
+// CPP-CHECK-NEXT:    ret void
+//
+void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za  {
+  svzero_zt(0);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 9164604f7d78cbc..2d6065edbd3554e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3457,4 +3457,8 @@ let TargetPrefix = "aarch64" in {
   def int_aarch64_sme_ldr_zt : SME_LDR_STR_Intrinsic;
   def int_aarch64_sme_str_zt : SME_LDR_STR_Intrinsic;
 
+  //
+  //  Zero ZT0
+  //
+  def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrWriteMem]>;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 289c2f4409f9532..7f477a524f04709 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2746,19 +2746,25 @@ AArch64TargetLowering::EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const {
   return BB;
 }
 
-MachineBasicBlock *AArch64TargetLowering::EmitZTSpillFill(MachineInstr &MI,
-                                                          MachineBasicBlock *BB,
-                                                          bool IsSpill) const {
+MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr &MI,
+                                                      MachineBasicBlock *BB,
+                                                      unsigned Opcode,
+                                                      bool IsZTDest) const {
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   MachineInstrBuilder MIB;
-  if (IsSpill) {
-    MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::STR_TX));
-    MIB.addReg(MI.getOperand(0).getReg());
-  } else
-    MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::LDR_TX),
+
+  if (IsZTDest)
+    MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode),
                   MI.getOperand(0).getReg());
-  MIB.add(MI.getOperand(1)); // Base
-  MI.eraseFromParent();      // The pseudo is gone now.
+  else {
+    MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opcode));
+    MIB.addReg(MI.getOperand(0).getReg());
+  }
+
+  for (unsigned I = 1; I < MI.getNumOperands(); ++I)
+    MIB.add(MI.getOperand(I));
+
+  MI.eraseFromParent(); // The pseudo is gone now.
   return BB;
 }
 
@@ -2879,11 +2885,13 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
   case AArch64::LDR_ZA_PSEUDO:
     return EmitFill(MI, BB);
   case AArch64::LDR_TX_PSEUDO:
-    return EmitZTSpillFill(MI, BB, /*IsSpill=*/false);
+    return EmitZTInstr(MI, BB, AArch64::LDR_TX, /*IsZTDest=*/true);
   case AArch64::STR_TX_PSEUDO:
-    return EmitZTSpillFill(MI, BB, /*IsSpill=*/true);
+    return EmitZTInstr(MI, BB, AArch64::STR_TX, /*IsZTDest=*/false);
   case AArch64::ZERO_M_PSEUDO:
     return EmitZero(MI, BB);
+  case AArch64::ZERO_T_PSEUDO:
+    return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*IsZTDest=*/true);
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index d96e5977de0f4c7..d49f5f35f573142 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -608,8 +608,8 @@ class AArch64TargetLowering : public TargetLowering {
   MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
                                  MachineInstr &MI, MachineBasicBlock *BB,
                                  bool HasTile) const;
-  MachineBasicBlock *EmitZTSpillFill(MachineInstr &MI, MachineBasicBlock *BB,
-                                     bool IsSpill) const;
+  MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
+                                 unsigned Opcode, bool IsZTDest) const;
   MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
 
   MachineBasicBlock *
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index fcfa5f82a3809c2..84ec88d4fd49b69 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -539,7 +539,7 @@ defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops
 defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>;
 defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>;
 
-def ZERO_T : sme2_zero_zt<"zero", 0b0001>;
+defm ZERO_T : sme2_zero_zt<"zero", 0b0001>;
 
 defm LDR_TX : sme2_spill_fill_vector<"ldr", 0b01111100, int_aarch64_sme_ldr_zt>;
 defm STR_TX : sme2_spill_fill_vector<"str", 0b11111100, int_aarch64_sme_str_zt>;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index f54d898aa69f7cd..bfad3f1b540b8d1 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -3114,6 +3114,17 @@ class sme2_zero_zt<string mnemonic, bits<4> opc>
   let Inst{3-0}  = opc;
 }
 
+multiclass sme2_zero_zt<string mnemonic, bits<4> opc> {
+  def NAME : sme2_zero_zt<mnemonic, opc>;
+  def NAME # _PSEUDO
+        : Pseudo<(outs), (ins ZTR:$ZT), []>, Sched<[]> {
+    // Translated to actual instruction in AArch64ISelLowering.cpp
+    let usesCustomInserter = 1;
+  }
+  def : Pat<(int_aarch64_sme_zero_zt (imm_to_zt untyped:$zt)),
+          (!cast<Instruction>(NAME # _PSEUDO) $zt)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SME2 lookup table load/store
 class sme2_spill_fill_vector<string mnemonic, bits<8> opc>
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll
new file mode 100644
index 000000000000000..14a4dba2466bf3e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-zero-zt.ll
@@ -0,0 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 < %s | FileCheck %s
+
+define void @zero_zt0() {
+; CHECK-LABEL: zero_zt0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    zero { zt0 }
+; CHECK-NEXT:    ret
+    call void @llvm.aarch64.sme.zero.zt(i32 0)
+    ret void
+}
+
+declare void @llvm.aarch64.sme.zero.zt(i32)
 | 
e034691    to
    e6240fa      
    Compare
  
            
          
                clang/include/clang/Basic/arm_sme.td
              
                Outdated
          
        
      There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA, IsPreservesZA], [ImmCheck<0, ImmCheck0_0>]>; | |
| def SVZERO_ZT : Inst<"svzero_zt", "vi", "", MergeNone, "aarch64_sme_zero_zt", [IsOverloadNone, IsStreamingCompatible, IsSharedZA], [ImmCheck<0, ImmCheck0_0>]>; | 
While this will change with PR: Generalise the SME state management attributes, in the current version of the spec ZT is considered part of ZA, so zeroing ZT0 is not preserving ZA.
See https://github.com/ARM-software/acle/blob/main/main/acle.md#__arm_preserves_za:
ZT state is also considered preserved when a function is marked with arm_preserves_za.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we write this as:
 MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->getOpcode())
           .addReg(MI.getOperand(0).getReg(), IsZTDest ? RegState::Define : 0);
 for (unsigned I = 1; I < MI.getNumOperands(); ++I)
     MIB.add(MI.getOperand(I));
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've rebased this patch which includes a refactor of this function from 5fe7ae8. Your suggestion is still an improve to that though so I've added it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit:
| bool IsZTDest) const { | |
| bool Op0IsDef) const { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.
Patch by: Kerry McLaughlin [email protected]
e6240fa    to
    a9a5e7c      
    Compare
  
    There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with comment addressed.
| // CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero.zt(i32 0) | ||
| // CPP-CHECK-NEXT: ret void | ||
| // | ||
| void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { | |
| void test_svzero_zt(void) __arm_streaming_compatible __arm_shared_za __arm_preserves_za { | 
This should actually lead to a diagnostic (but I don't think we've implemented those yet), since svzero_zt doesn't preserve ZA.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed it. I didn't see this fail when testing it locally when I expected it to, so what you say is correct.
Patch by: Kerry McLaughlin [email protected]