diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2652,6 +2652,29 @@ def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>; + class SME_OuterProduct_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i64_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMMatchType<0>, + llvm_anyvector_ty]>; + + def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic; + + def int_aarch64_sme_mopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_mops_wide : SME_OuterProduct_Intrinsic; + + def int_aarch64_sme_smopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_smops_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_umopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_umops_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_sumopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_sumops_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic; + def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; + // // Counting elements // diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -563,7 +563,8 @@ MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; - + MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2372,6 +2372,23 @@ return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + MIB.add(MI.getOperand(1)); // pn + MIB.add(MI.getOperand(2)); // pm + MIB.add(MI.getOperand(3)); // zn + MIB.add(MI.getOperand(4)); // zm + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock * AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, MachineInstr &MI, @@ -2459,6 +2476,54 @@ return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB); case AArch64::LDR_ZA_PSEUDO: return EmitFill(MI, BB); + case AArch64::BFMOPA_MPPZZ_PSEUDO: + return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::BFMOPS_MPPZZ_PSEUDO: + return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::FMOPAL_MPPZZ_PSEUDO: + return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::FMOPSL_MPPZZ_PSEUDO: + return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB); + case AArch64::FMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::FMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::FMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::FMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::UMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::UMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SUMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SUMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::USMOPA_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::USMOPS_MPPZZ_S_PSEUDO: + return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); + case AArch64::SMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::UMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::UMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SUMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::SUMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::USMOPA_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); + case AArch64::USMOPS_MPPZZ_D_PSEUDO: + return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); case AArch64::INSERT_MXIPZ_H_PSEUDO_B: return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI, BB); diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -38,41 +38,41 @@ // Outer products //===----------------------------------------------------------------------===// -defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa">; -defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops">; +defm BFMOPA_MPPZZ : sme_bf16_outer_product<0b0, "bfmopa", int_aarch64_sme_mopa_wide>; +defm BFMOPS_MPPZZ : sme_bf16_outer_product<0b1, "bfmops", int_aarch64_sme_mops_wide>; -def FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa">; -def FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops">; +defm FMOPA_MPPZZ_S : sme_outer_product_fp32<0b0, "fmopa", int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_S : sme_outer_product_fp32<0b1, "fmops", int_aarch64_sme_mops>; } let Predicates = [HasSMEF64] in { -def FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa">; -def FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops">; +defm FMOPA_MPPZZ_D : sme_outer_product_fp64<0b0, "fmopa", int_aarch64_sme_mopa>; +defm FMOPS_MPPZZ_D : sme_outer_product_fp64<0b1, "fmops", int_aarch64_sme_mops>; } let Predicates = [HasSME] in { -defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa">; -defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops">; - -def SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa">; -def SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops">; -def UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa">; -def UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops">; -def SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa">; -def SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops">; -def USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa">; -def USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops">; +defm FMOPAL_MPPZZ : sme_f16_outer_product<0b0, "fmopa", int_aarch64_sme_mopa_wide>; +defm FMOPSL_MPPZZ : sme_f16_outer_product<0b1, "fmops", int_aarch64_sme_mops_wide>; + +defm SMOPA_MPPZZ_S : sme_int_outer_product_i32<0b000, "smopa", int_aarch64_sme_smopa_wide>; +defm SMOPS_MPPZZ_S : sme_int_outer_product_i32<0b001, "smops", int_aarch64_sme_smops_wide>; +defm UMOPA_MPPZZ_S : sme_int_outer_product_i32<0b110, "umopa", int_aarch64_sme_umopa_wide>; +defm UMOPS_MPPZZ_S : sme_int_outer_product_i32<0b111, "umops", int_aarch64_sme_umops_wide>; +defm SUMOPA_MPPZZ_S : sme_int_outer_product_i32<0b010, "sumopa", int_aarch64_sme_sumopa_wide>; +defm SUMOPS_MPPZZ_S : sme_int_outer_product_i32<0b011, "sumops", int_aarch64_sme_sumops_wide>; +defm USMOPA_MPPZZ_S : sme_int_outer_product_i32<0b100, "usmopa", int_aarch64_sme_usmopa_wide>; +defm USMOPS_MPPZZ_S : sme_int_outer_product_i32<0b101, "usmops", int_aarch64_sme_usmops_wide>; } let Predicates = [HasSMEI64] in { -def SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa">; -def SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops">; -def UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa">; -def UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops">; -def SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa">; -def SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops">; -def USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa">; -def USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops">; +defm SMOPA_MPPZZ_D : sme_int_outer_product_i64<0b000, "smopa", int_aarch64_sme_smopa_wide>; +defm SMOPS_MPPZZ_D : sme_int_outer_product_i64<0b001, "smops", int_aarch64_sme_smops_wide>; +defm UMOPA_MPPZZ_D : sme_int_outer_product_i64<0b110, "umopa", int_aarch64_sme_umopa_wide>; +defm UMOPS_MPPZZ_D : sme_int_outer_product_i64<0b111, "umops", int_aarch64_sme_umops_wide>; +defm SUMOPA_MPPZZ_D : sme_int_outer_product_i64<0b010, "sumopa", int_aarch64_sme_sumopa_wide>; +defm SUMOPS_MPPZZ_D : sme_int_outer_product_i64<0b011, "sumops", int_aarch64_sme_sumops_wide>; +defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme_usmopa_wide>; +defm USMOPS_MPPZZ_D : sme_int_outer_product_i64<0b101, "usmops", int_aarch64_sme_usmops_wide>; } let Predicates = [HasSME] in { diff --git a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td --- a/llvm/lib/Target/AArch64/AArch64SchedTSV110.td +++ b/llvm/lib/Target/AArch64/AArch64SchedTSV110.td @@ -25,7 +25,8 @@ let CompleteModel = 1; list UnsupportedFeatures = !listconcat(SVEUnsupported.F, - PAUnsupported.F); + PAUnsupported.F, + SMEUnsupported.F); } // Define each kind of processor resource and number available on the TSV110, diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -28,6 +28,14 @@ // SME Outer Products //===----------------------------------------------------------------------===// +class sme_outer_product_pseudo + : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, + zpr_ty:$zn, zpr_ty:$zm), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; +} + class sme_fp_outer_product_inst : I<(outs za_ty:$ZAda), @@ -52,17 +60,31 @@ let Constraints = "$ZAda = $_ZAda"; } -class sme_outer_product_fp32 - : sme_fp_outer_product_inst { - bits<2> ZAda; - let Inst{1-0} = ZAda; - let Inst{2} = 0b0; +multiclass sme_outer_product_fp32 { + def NAME : sme_fp_outer_product_inst { + bits<2> ZAda; + let Inst{1-0} = ZAda; + let Inst{2} = 0b0; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } -class sme_outer_product_fp64 - : sme_fp_outer_product_inst { - bits<3> ZAda; - let Inst{2-0} = ZAda; +multiclass sme_outer_product_fp64 { + def NAME : sme_fp_outer_product_inst { + bits<3> ZAda; + let Inst{2-0} = ZAda; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)), + (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_int_outer_product_inst opc, string mnemonic> - : sme_int_outer_product_inst { - bits<2> ZAda; - let Inst{1-0} = ZAda; - let Inst{2} = 0b0; +multiclass sme_int_outer_product_i32 opc, string mnemonic, + SDPatternOperator op> { + def NAME : sme_int_outer_product_inst { + bits<2> ZAda; + let Inst{1-0} = ZAda; + let Inst{2} = 0b0; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm), + (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } -class sme_int_outer_product_i64 opc, string mnemonic> - : sme_int_outer_product_inst { - bits<3> ZAda; - let Inst{2-0} = ZAda; +multiclass sme_int_outer_product_i64 opc, string mnemonic, + SDPatternOperator op> { + def NAME : sme_int_outer_product_inst { + bits<3> ZAda; + let Inst{2-0} = ZAda; + } + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)), + (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_outer_product_widening_inst @@ -131,12 +169,24 @@ let Constraints = "$ZAda = $_ZAda"; } -multiclass sme_bf16_outer_product { - def : sme_outer_product_widening_inst<0b0, S, mnemonic>; +multiclass sme_bf16_outer_product { + def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>; + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } -multiclass sme_f16_outer_product { - def : sme_outer_product_widening_inst<0b1, S, mnemonic>; +multiclass sme_f16_outer_product { + def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>; + + def NAME # _PSEUDO : sme_outer_product_pseudo; + + def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)), + (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +define void @bfmopa( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: bfmopa: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmopa za0.s, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64 0, %pn, %pm, %zn, %zm) + ret void +} + +define void @fmopa( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: fmopa: +; CHECK: // %bb.0: +; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64 1, %pn, %pm, %zn, %zm) + ret void +} + +define void @smopa_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: smopa_s: +; CHECK: // %bb.0: +; CHECK-NEXT: smopa za2.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + ret void +} + +define void @smopa_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: smopa_d: +; CHECK: // %bb.0: +; CHECK-NEXT: smopa za0.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64 0, %pn, %pm, %zn, %zm) + ret void +} + +define void @umopa_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: umopa_s: +; CHECK: // %bb.0: +; CHECK-NEXT: umopa za3.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64 3, %pn, %pm, %zn, %zm) + ret void +} + +define void @umopa_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: umopa_d: +; CHECK: // %bb.0: +; CHECK-NEXT: umopa za1.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64 1, %pn, %pm, %zn, %zm) + ret void +} + +define void @fmopa_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: fmopa_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mopa.nxv4f32(i64 0, %pn, %pm, %zn, %zm) + ret void +} + +define void @fmopa_d( %pn, %pm, %zn, %zm) #1 { +; CHECK-LABEL: fmopa_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmopa za2.d, p0/m, p1/m, z0.d, z1.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mopa.nxv2f64(i64 2, %pn, %pm, %zn, %zm) + ret void +} + +define void @sumopa_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: sumopa_s: +; CHECK: // %bb.0: +; CHECK-NEXT: sumopa za1.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64 1, %pn, %pm, %zn, %zm) + ret void +} + +define void @sumopa_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: sumopa_d: +; CHECK: // %bb.0: +; CHECK-NEXT: sumopa za3.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64 3, %pn, %pm, %zn, %zm) + ret void +} + +define void @usmopa_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: usmopa_s: +; CHECK: // %bb.0: +; CHECK-NEXT: usmopa za2.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + ret void +} + +define void @usmopa_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: usmopa_d: +; CHECK: // %bb.0: +; CHECK-NEXT: usmopa za7.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64 7, %pn, %pm, %zn, %zm) + ret void +} + +attributes #0 = { "target-features"="+sme-i64" } +attributes #1 = { "target-features"="+sme-f64" } + +declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64, , , , ) +declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64, , , , ) +declare void @llvm.aarch64.sme.mopa.nxv4f32(i64, , , , ) +declare void @llvm.aarch64.sme.mopa.nxv2f64(i64, , , , ) +declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64, , , , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll @@ -0,0 +1,126 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +define void @bfmops( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: bfmops: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmops za0.s, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64 0, %pn, %pm, %zn, %zm) + ret void +} + +define void @fmops( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: fmops: +; CHECK: // %bb.0: +; CHECK-NEXT: fmops za1.s, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mops.wide.nxv8f16(i64 1, %pn, %pm, %zn, %zm) + ret void +} + +define void @smops_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: smops_s: +; CHECK: // %bb.0: +; CHECK-NEXT: smops za2.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smops.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + ret void +} + +define void @smops_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: smops_d: +; CHECK: // %bb.0: +; CHECK-NEXT: smops za0.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smops.wide.nxv8i16(i64 0, %pn, %pm, %zn, %zm) + ret void +} + +define void @umops_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: umops_s: +; CHECK: // %bb.0: +; CHECK-NEXT: umops za3.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umops.wide.nxv16i8(i64 3, %pn, %pm, %zn, %zm) + ret void +} + +define void @umops_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: umops_d: +; CHECK: // %bb.0: +; CHECK-NEXT: umops za1.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umops.wide.nxv8i16(i64 1, %pn, %pm, %zn, %zm) + ret void +} + +define void @fmops_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: fmops_s: +; CHECK: // %bb.0: +; CHECK-NEXT: fmops za0.s, p0/m, p1/m, z0.s, z1.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mops.nxv4f32(i64 0, %pn, %pm, %zn, %zm) + ret void +} + +define void @fmops_d( %pn, %pm, %zn, %zm) #1 { +; CHECK-LABEL: fmops_d: +; CHECK: // %bb.0: +; CHECK-NEXT: fmops za2.d, p0/m, p1/m, z0.d, z1.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.mops.nxv2f64(i64 2, %pn, %pm, %zn, %zm) + ret void +} + +define void @sumops_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: sumops_s: +; CHECK: // %bb.0: +; CHECK-NEXT: sumops za1.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64 1, %pn, %pm, %zn, %zm) + ret void +} + +define void @sumops_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: sumops_d: +; CHECK: // %bb.0: +; CHECK-NEXT: sumops za3.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64 3, %pn, %pm, %zn, %zm) + ret void +} + +define void @usmops_s( %pn, %pm, %zn, %zm) { +; CHECK-LABEL: usmops_s: +; CHECK: // %bb.0: +; CHECK-NEXT: usmops za2.s, p0/m, p1/m, z0.b, z1.b +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + ret void +} + +define void @usmops_d( %pn, %pm, %zn, %zm) #0 { +; CHECK-LABEL: usmops_d: +; CHECK: // %bb.0: +; CHECK-NEXT: usmops za7.d, p0/m, p1/m, z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64 7, %pn, %pm, %zn, %zm) + ret void +} + +attributes #0 = { "target-features"="+sme-i64" } +attributes #1 = { "target-features"="+sme-f64" } + +declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64, , , , ) +declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i64, , , , ) +declare void @llvm.aarch64.sme.mops.nxv4f32(i64, , , , ) +declare void @llvm.aarch64.sme.mops.nxv2f64(i64, , , , ) +declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64, , , , ) +declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64, , , , )