Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2650,6 +2650,7 @@ def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic; def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic; + def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>; // // Counting elements Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -567,6 +567,7 @@ MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2376,6 +2376,22 @@ return BB; } +AArch64TargetLowering::EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = + BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::ZERO_M)); + MIB.add(MI.getOperand(0)); // Mask + + unsigned Mask = MI.getOperand(0).getImm(); + for (unsigned I = 0; I < 8; I++) { + if (Mask & (1 << I)) + MIB.addDef(AArch64::ZAD0 + I, RegState::ImplicitDefine); + } + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { @@ -2458,6 +2474,8 @@ case AArch64::INSERT_MXIPZ_V_PSEUDO_Q: return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI, BB); + case AArch64::ZERO_M_PSEUDO: + return EmitZero(MI, BB); } } Index: llvm/lib/Target/AArch64/SMEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SMEInstrFormats.td +++ llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -947,8 +947,11 @@ // SME Zero //===----------------------------------------------------------------------===// +// NOTE: This definition isn't really correct because there are outputs, i.e. +// the tile registers being zeroed. We fix this up in a custom inserter that +// marks the appropriate registers as being implicitly defined. class sme_zero_inst - : I<(outs MatrixTileList:$imm), (ins), + : I<(outs), (ins MatrixTileList:$imm), mnemonic, "\t$imm", "", []>, Sched<[]> { bits<8> imm; let Inst{31-8} = 0b110000000000100000000000; @@ -973,6 +976,15 @@ def : InstAlias<"zero\t\\{za0.s,za1.s,za3.s\\}", (!cast(NAME) 0b10111011), 1>; def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast(NAME) 0b11011101), 1>; def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast(NAME) 0b11101110), 1>; + + def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; + } + + def : Pat<(int_aarch64_sme_zero imm:$imm), + (!cast(NAME # _PSEUDO) imm:$imm)>; } //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll @@ -0,0 +1,524 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + + +define void @zero() { +; CHECK-LABEL: zero: +; CHECK: // %bb.0: +; CHECK-NEXT: zero {} +; CHECK-NEXT: zero {za0.d} +; CHECK-NEXT: zero {za1.d} +; CHECK-NEXT: zero {za0.d, za1.d} +; CHECK-NEXT: zero {za2.d} +; CHECK-NEXT: zero {za0.d, za2.d} +; CHECK-NEXT: zero {za1.d, za2.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d} +; CHECK-NEXT: zero {za3.d} +; CHECK-NEXT: zero {za0.d, za3.d} +; CHECK-NEXT: zero {za1.d, za3.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d} +; CHECK-NEXT: zero {za2.d, za3.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d} +; CHECK-NEXT: zero {za4.d} +; CHECK-NEXT: zero {za0.s} +; CHECK-NEXT: zero {za1.d, za4.d} +; CHECK-NEXT: zero {za0.d, za1.d, za4.d} +; CHECK-NEXT: zero {za2.d, za4.d} +; CHECK-NEXT: zero {za0.d, za2.d, za4.d} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za4.d} +; CHECK-NEXT: zero {za3.d, za4.d} +; CHECK-NEXT: zero {za0.d, za3.d, za4.d} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za4.d} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za4.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za4.d} +; CHECK-NEXT: zero {za5.d} +; CHECK-NEXT: zero {za0.d, za5.d} +; CHECK-NEXT: zero {za1.s} +; CHECK-NEXT: zero {za0.d, za1.d, za5.d} +; CHECK-NEXT: zero {za2.d, za5.d} +; CHECK-NEXT: zero {za0.d, za2.d, za5.d} +; CHECK-NEXT: zero {za1.d, za2.d, za5.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za5.d} +; CHECK-NEXT: zero {za3.d, za5.d} +; CHECK-NEXT: zero {za0.d, za3.d, za5.d} +; CHECK-NEXT: zero {za1.d, za3.d, za5.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za5.d} +; CHECK-NEXT: zero {za2.d, za3.d, za5.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za5.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za5.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za5.d} +; CHECK-NEXT: zero {za4.d, za5.d} +; CHECK-NEXT: zero {za0.d, za4.d, za5.d} +; CHECK-NEXT: zero {za1.d, za4.d, za5.d} +; CHECK-NEXT: zero {za0.s,za1.s} +; CHECK-NEXT: zero {za2.d, za4.d, za5.d} +; CHECK-NEXT: zero {za0.d, za2.d, za4.d, za5.d} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d, za5.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za4.d, za5.d} +; CHECK-NEXT: zero {za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za0.d, za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za4.d, za5.d} +; CHECK-NEXT: zero {za6.d} +; CHECK-NEXT: zero {za0.d, za6.d} +; CHECK-NEXT: zero {za1.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za6.d} +; CHECK-NEXT: zero {za2.s} +; CHECK-NEXT: zero {za0.d, za2.d, za6.d} +; CHECK-NEXT: zero {za1.d, za2.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za6.d} +; CHECK-NEXT: zero {za3.d, za6.d} +; CHECK-NEXT: zero {za0.d, za3.d, za6.d} +; CHECK-NEXT: zero {za1.d, za3.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za6.d} +; CHECK-NEXT: zero {za2.d, za3.d, za6.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za6.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za6.d} +; CHECK-NEXT: zero {za4.d, za6.d} +; CHECK-NEXT: zero {za0.d, za4.d, za6.d} +; CHECK-NEXT: zero {za1.d, za4.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za4.d, za6.d} +; CHECK-NEXT: zero {za2.d, za4.d, za6.d} +; CHECK-NEXT: zero {za0.h} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za4.d, za6.d} +; CHECK-NEXT: zero {za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za0.d, za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za4.d, za6.d} +; CHECK-NEXT: zero {za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za5.d, za6.d} +; CHECK-NEXT: zero {za2.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za2.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.s,za2.s} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za5.d, za6.d} +; CHECK-NEXT: zero {za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.d, za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za2.d, za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za5.d, za6.d} +; CHECK-NEXT: zero {za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za2.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za2.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.s,za1.s,za2.s} +; CHECK-NEXT: zero {za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za4.d, za5.d, za6.d} +; CHECK-NEXT: zero {za7.d} +; CHECK-NEXT: zero {za0.d, za7.d} +; CHECK-NEXT: zero {za1.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za7.d} +; CHECK-NEXT: zero {za2.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za7.d} +; CHECK-NEXT: zero {za3.s} +; CHECK-NEXT: zero {za0.d, za3.d, za7.d} +; CHECK-NEXT: zero {za1.d, za3.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za7.d} +; CHECK-NEXT: zero {za2.d, za3.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za7.d} +; CHECK-NEXT: zero {za4.d, za7.d} +; CHECK-NEXT: zero {za0.d, za4.d, za7.d} +; CHECK-NEXT: zero {za1.d, za4.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za4.d, za7.d} +; CHECK-NEXT: zero {za2.d, za4.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za4.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za4.d, za7.d} +; CHECK-NEXT: zero {za3.d, za4.d, za7.d} +; CHECK-NEXT: zero {za0.s,za3.s} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za4.d, za7.d} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za4.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za4.d, za7.d} +; CHECK-NEXT: zero {za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za5.d, za7.d} +; CHECK-NEXT: zero {za2.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za5.d, za7.d} +; CHECK-NEXT: zero {za3.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za3.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.h} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za5.d, za7.d} +; CHECK-NEXT: zero {za2.d, za3.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za5.d, za7.d} +; CHECK-NEXT: zero {za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za2.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za3.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za3.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.s,za1.s,za3.s} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za4.d, za5.d, za7.d} +; CHECK-NEXT: zero {za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za6.d, za7.d} +; CHECK-NEXT: zero {za3.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za3.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za3.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.s,za3.s} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za6.d, za7.d} +; CHECK-NEXT: zero {za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za3.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za3.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.s,za2.s,za3.s} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za4.d, za6.d, za7.d} +; CHECK-NEXT: zero {za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za3.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za3.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za3.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.d, za3.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.s,za2.s,za3.s} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za3.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za2.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za3.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za3.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za3.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za1.d, za3.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za2.d, za3.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za0.d, za2.d, za3.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za5.d, za6.d, za7.d} +; CHECK-NEXT: zero {za} +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.zero(i64 0) + call void @llvm.aarch64.sme.zero(i64 1) + call void @llvm.aarch64.sme.zero(i64 2) + call void @llvm.aarch64.sme.zero(i64 3) + call void @llvm.aarch64.sme.zero(i64 4) + call void @llvm.aarch64.sme.zero(i64 5) + call void @llvm.aarch64.sme.zero(i64 6) + call void @llvm.aarch64.sme.zero(i64 7) + call void @llvm.aarch64.sme.zero(i64 8) + call void @llvm.aarch64.sme.zero(i64 9) + call void @llvm.aarch64.sme.zero(i64 10) + call void @llvm.aarch64.sme.zero(i64 11) + call void @llvm.aarch64.sme.zero(i64 12) + call void @llvm.aarch64.sme.zero(i64 13) + call void @llvm.aarch64.sme.zero(i64 14) + call void @llvm.aarch64.sme.zero(i64 15) + call void @llvm.aarch64.sme.zero(i64 16) + call void @llvm.aarch64.sme.zero(i64 17) + call void @llvm.aarch64.sme.zero(i64 18) + call void @llvm.aarch64.sme.zero(i64 19) + call void @llvm.aarch64.sme.zero(i64 20) + call void @llvm.aarch64.sme.zero(i64 21) + call void @llvm.aarch64.sme.zero(i64 22) + call void @llvm.aarch64.sme.zero(i64 23) + call void @llvm.aarch64.sme.zero(i64 24) + call void @llvm.aarch64.sme.zero(i64 25) + call void @llvm.aarch64.sme.zero(i64 26) + call void @llvm.aarch64.sme.zero(i64 27) + call void @llvm.aarch64.sme.zero(i64 28) + call void @llvm.aarch64.sme.zero(i64 29) + call void @llvm.aarch64.sme.zero(i64 30) + call void @llvm.aarch64.sme.zero(i64 31) + call void @llvm.aarch64.sme.zero(i64 32) + call void @llvm.aarch64.sme.zero(i64 33) + call void @llvm.aarch64.sme.zero(i64 34) + call void @llvm.aarch64.sme.zero(i64 35) + call void @llvm.aarch64.sme.zero(i64 36) + call void @llvm.aarch64.sme.zero(i64 37) + call void @llvm.aarch64.sme.zero(i64 38) + call void @llvm.aarch64.sme.zero(i64 39) + call void @llvm.aarch64.sme.zero(i64 40) + call void @llvm.aarch64.sme.zero(i64 41) + call void @llvm.aarch64.sme.zero(i64 42) + call void @llvm.aarch64.sme.zero(i64 43) + call void @llvm.aarch64.sme.zero(i64 44) + call void @llvm.aarch64.sme.zero(i64 45) + call void @llvm.aarch64.sme.zero(i64 46) + call void @llvm.aarch64.sme.zero(i64 47) + call void @llvm.aarch64.sme.zero(i64 48) + call void @llvm.aarch64.sme.zero(i64 49) + call void @llvm.aarch64.sme.zero(i64 50) + call void @llvm.aarch64.sme.zero(i64 51) + call void @llvm.aarch64.sme.zero(i64 52) + call void @llvm.aarch64.sme.zero(i64 53) + call void @llvm.aarch64.sme.zero(i64 54) + call void @llvm.aarch64.sme.zero(i64 55) + call void @llvm.aarch64.sme.zero(i64 56) + call void @llvm.aarch64.sme.zero(i64 57) + call void @llvm.aarch64.sme.zero(i64 58) + call void @llvm.aarch64.sme.zero(i64 59) + call void @llvm.aarch64.sme.zero(i64 60) + call void @llvm.aarch64.sme.zero(i64 61) + call void @llvm.aarch64.sme.zero(i64 62) + call void @llvm.aarch64.sme.zero(i64 63) + call void @llvm.aarch64.sme.zero(i64 64) + call void @llvm.aarch64.sme.zero(i64 65) + call void @llvm.aarch64.sme.zero(i64 66) + call void @llvm.aarch64.sme.zero(i64 67) + call void @llvm.aarch64.sme.zero(i64 68) + call void @llvm.aarch64.sme.zero(i64 69) + call void @llvm.aarch64.sme.zero(i64 70) + call void @llvm.aarch64.sme.zero(i64 71) + call void @llvm.aarch64.sme.zero(i64 72) + call void @llvm.aarch64.sme.zero(i64 73) + call void @llvm.aarch64.sme.zero(i64 74) + call void @llvm.aarch64.sme.zero(i64 75) + call void @llvm.aarch64.sme.zero(i64 76) + call void @llvm.aarch64.sme.zero(i64 77) + call void @llvm.aarch64.sme.zero(i64 78) + call void @llvm.aarch64.sme.zero(i64 79) + call void @llvm.aarch64.sme.zero(i64 80) + call void @llvm.aarch64.sme.zero(i64 81) + call void @llvm.aarch64.sme.zero(i64 82) + call void @llvm.aarch64.sme.zero(i64 83) + call void @llvm.aarch64.sme.zero(i64 84) + call void @llvm.aarch64.sme.zero(i64 85) + call void @llvm.aarch64.sme.zero(i64 86) + call void @llvm.aarch64.sme.zero(i64 87) + call void @llvm.aarch64.sme.zero(i64 88) + call void @llvm.aarch64.sme.zero(i64 89) + call void @llvm.aarch64.sme.zero(i64 90) + call void @llvm.aarch64.sme.zero(i64 91) + call void @llvm.aarch64.sme.zero(i64 92) + call void @llvm.aarch64.sme.zero(i64 93) + call void @llvm.aarch64.sme.zero(i64 94) + call void @llvm.aarch64.sme.zero(i64 95) + call void @llvm.aarch64.sme.zero(i64 96) + call void @llvm.aarch64.sme.zero(i64 97) + call void @llvm.aarch64.sme.zero(i64 98) + call void @llvm.aarch64.sme.zero(i64 99) + call void @llvm.aarch64.sme.zero(i64 100) + call void @llvm.aarch64.sme.zero(i64 101) + call void @llvm.aarch64.sme.zero(i64 102) + call void @llvm.aarch64.sme.zero(i64 103) + call void @llvm.aarch64.sme.zero(i64 104) + call void @llvm.aarch64.sme.zero(i64 105) + call void @llvm.aarch64.sme.zero(i64 106) + call void @llvm.aarch64.sme.zero(i64 107) + call void @llvm.aarch64.sme.zero(i64 108) + call void @llvm.aarch64.sme.zero(i64 109) + call void @llvm.aarch64.sme.zero(i64 110) + call void @llvm.aarch64.sme.zero(i64 111) + call void @llvm.aarch64.sme.zero(i64 112) + call void @llvm.aarch64.sme.zero(i64 113) + call void @llvm.aarch64.sme.zero(i64 114) + call void @llvm.aarch64.sme.zero(i64 115) + call void @llvm.aarch64.sme.zero(i64 116) + call void @llvm.aarch64.sme.zero(i64 117) + call void @llvm.aarch64.sme.zero(i64 118) + call void @llvm.aarch64.sme.zero(i64 119) + call void @llvm.aarch64.sme.zero(i64 120) + call void @llvm.aarch64.sme.zero(i64 121) + call void @llvm.aarch64.sme.zero(i64 122) + call void @llvm.aarch64.sme.zero(i64 123) + call void @llvm.aarch64.sme.zero(i64 124) + call void @llvm.aarch64.sme.zero(i64 125) + call void @llvm.aarch64.sme.zero(i64 126) + call void @llvm.aarch64.sme.zero(i64 127) + call void @llvm.aarch64.sme.zero(i64 128) + call void @llvm.aarch64.sme.zero(i64 129) + call void @llvm.aarch64.sme.zero(i64 130) + call void @llvm.aarch64.sme.zero(i64 131) + call void @llvm.aarch64.sme.zero(i64 132) + call void @llvm.aarch64.sme.zero(i64 133) + call void @llvm.aarch64.sme.zero(i64 134) + call void @llvm.aarch64.sme.zero(i64 135) + call void @llvm.aarch64.sme.zero(i64 136) + call void @llvm.aarch64.sme.zero(i64 137) + call void @llvm.aarch64.sme.zero(i64 138) + call void @llvm.aarch64.sme.zero(i64 139) + call void @llvm.aarch64.sme.zero(i64 140) + call void @llvm.aarch64.sme.zero(i64 141) + call void @llvm.aarch64.sme.zero(i64 142) + call void @llvm.aarch64.sme.zero(i64 143) + call void @llvm.aarch64.sme.zero(i64 144) + call void @llvm.aarch64.sme.zero(i64 145) + call void @llvm.aarch64.sme.zero(i64 146) + call void @llvm.aarch64.sme.zero(i64 147) + call void @llvm.aarch64.sme.zero(i64 148) + call void @llvm.aarch64.sme.zero(i64 149) + call void @llvm.aarch64.sme.zero(i64 150) + call void @llvm.aarch64.sme.zero(i64 151) + call void @llvm.aarch64.sme.zero(i64 152) + call void @llvm.aarch64.sme.zero(i64 153) + call void @llvm.aarch64.sme.zero(i64 154) + call void @llvm.aarch64.sme.zero(i64 155) + call void @llvm.aarch64.sme.zero(i64 156) + call void @llvm.aarch64.sme.zero(i64 157) + call void @llvm.aarch64.sme.zero(i64 158) + call void @llvm.aarch64.sme.zero(i64 159) + call void @llvm.aarch64.sme.zero(i64 160) + call void @llvm.aarch64.sme.zero(i64 161) + call void @llvm.aarch64.sme.zero(i64 162) + call void @llvm.aarch64.sme.zero(i64 163) + call void @llvm.aarch64.sme.zero(i64 164) + call void @llvm.aarch64.sme.zero(i64 165) + call void @llvm.aarch64.sme.zero(i64 166) + call void @llvm.aarch64.sme.zero(i64 167) + call void @llvm.aarch64.sme.zero(i64 168) + call void @llvm.aarch64.sme.zero(i64 169) + call void @llvm.aarch64.sme.zero(i64 170) + call void @llvm.aarch64.sme.zero(i64 171) + call void @llvm.aarch64.sme.zero(i64 172) + call void @llvm.aarch64.sme.zero(i64 173) + call void @llvm.aarch64.sme.zero(i64 174) + call void @llvm.aarch64.sme.zero(i64 175) + call void @llvm.aarch64.sme.zero(i64 176) + call void @llvm.aarch64.sme.zero(i64 177) + call void @llvm.aarch64.sme.zero(i64 178) + call void @llvm.aarch64.sme.zero(i64 179) + call void @llvm.aarch64.sme.zero(i64 180) + call void @llvm.aarch64.sme.zero(i64 181) + call void @llvm.aarch64.sme.zero(i64 182) + call void @llvm.aarch64.sme.zero(i64 183) + call void @llvm.aarch64.sme.zero(i64 184) + call void @llvm.aarch64.sme.zero(i64 185) + call void @llvm.aarch64.sme.zero(i64 186) + call void @llvm.aarch64.sme.zero(i64 187) + call void @llvm.aarch64.sme.zero(i64 188) + call void @llvm.aarch64.sme.zero(i64 189) + call void @llvm.aarch64.sme.zero(i64 190) + call void @llvm.aarch64.sme.zero(i64 191) + call void @llvm.aarch64.sme.zero(i64 192) + call void @llvm.aarch64.sme.zero(i64 193) + call void @llvm.aarch64.sme.zero(i64 194) + call void @llvm.aarch64.sme.zero(i64 195) + call void @llvm.aarch64.sme.zero(i64 196) + call void @llvm.aarch64.sme.zero(i64 197) + call void @llvm.aarch64.sme.zero(i64 198) + call void @llvm.aarch64.sme.zero(i64 199) + call void @llvm.aarch64.sme.zero(i64 200) + call void @llvm.aarch64.sme.zero(i64 201) + call void @llvm.aarch64.sme.zero(i64 202) + call void @llvm.aarch64.sme.zero(i64 203) + call void @llvm.aarch64.sme.zero(i64 204) + call void @llvm.aarch64.sme.zero(i64 205) + call void @llvm.aarch64.sme.zero(i64 206) + call void @llvm.aarch64.sme.zero(i64 207) + call void @llvm.aarch64.sme.zero(i64 208) + call void @llvm.aarch64.sme.zero(i64 209) + call void @llvm.aarch64.sme.zero(i64 210) + call void @llvm.aarch64.sme.zero(i64 211) + call void @llvm.aarch64.sme.zero(i64 212) + call void @llvm.aarch64.sme.zero(i64 213) + call void @llvm.aarch64.sme.zero(i64 214) + call void @llvm.aarch64.sme.zero(i64 215) + call void @llvm.aarch64.sme.zero(i64 216) + call void @llvm.aarch64.sme.zero(i64 217) + call void @llvm.aarch64.sme.zero(i64 218) + call void @llvm.aarch64.sme.zero(i64 219) + call void @llvm.aarch64.sme.zero(i64 220) + call void @llvm.aarch64.sme.zero(i64 221) + call void @llvm.aarch64.sme.zero(i64 222) + call void @llvm.aarch64.sme.zero(i64 223) + call void @llvm.aarch64.sme.zero(i64 224) + call void @llvm.aarch64.sme.zero(i64 225) + call void @llvm.aarch64.sme.zero(i64 226) + call void @llvm.aarch64.sme.zero(i64 227) + call void @llvm.aarch64.sme.zero(i64 228) + call void @llvm.aarch64.sme.zero(i64 229) + call void @llvm.aarch64.sme.zero(i64 230) + call void @llvm.aarch64.sme.zero(i64 231) + call void @llvm.aarch64.sme.zero(i64 232) + call void @llvm.aarch64.sme.zero(i64 233) + call void @llvm.aarch64.sme.zero(i64 234) + call void @llvm.aarch64.sme.zero(i64 235) + call void @llvm.aarch64.sme.zero(i64 236) + call void @llvm.aarch64.sme.zero(i64 237) + call void @llvm.aarch64.sme.zero(i64 238) + call void @llvm.aarch64.sme.zero(i64 239) + call void @llvm.aarch64.sme.zero(i64 240) + call void @llvm.aarch64.sme.zero(i64 241) + call void @llvm.aarch64.sme.zero(i64 242) + call void @llvm.aarch64.sme.zero(i64 243) + call void @llvm.aarch64.sme.zero(i64 244) + call void @llvm.aarch64.sme.zero(i64 245) + call void @llvm.aarch64.sme.zero(i64 246) + call void @llvm.aarch64.sme.zero(i64 247) + call void @llvm.aarch64.sme.zero(i64 248) + call void @llvm.aarch64.sme.zero(i64 249) + call void @llvm.aarch64.sme.zero(i64 250) + call void @llvm.aarch64.sme.zero(i64 251) + call void @llvm.aarch64.sme.zero(i64 252) + call void @llvm.aarch64.sme.zero(i64 253) + call void @llvm.aarch64.sme.zero(i64 254) + call void @llvm.aarch64.sme.zero(i64 255) + ret void +} + +declare void @llvm.aarch64.sme.zero(i64)