Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2663,6 +2663,16 @@ def int_aarch64_sme_usmopa_wide : SME_OuterProduct_Intrinsic; def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; + class SME_AddVectorToTile_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i64_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty]>; + + def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic; + def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic; + // // Counting elements // Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -570,6 +570,9 @@ MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2426,6 +2426,23 @@ return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + MIB.add(MI.getOperand(1)); // pn + MIB.add(MI.getOperand(2)); // pm + MIB.add(MI.getOperand(3)); // zn + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { @@ -2558,6 +2575,14 @@ BB); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); + case AArch64::ADDHA_MPPZ_PSEUDO_S: + return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB); + case AArch64::ADDVA_MPPZ_PSEUDO_S: + return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB); + case AArch64::ADDHA_MPPZ_PSEUDO_D: + return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB); + case AArch64::ADDVA_MPPZ_PSEUDO_D: + return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB); } } Index: llvm/lib/Target/AArch64/SMEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SMEInstrFormats.td +++ llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -227,6 +227,42 @@ let Inst{2-0} = ZAda; } +class sme_add_vector_to_tile_pseudo + : Pseudo<(outs), + (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; +} + +def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo; +def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo; + +let Predicates = [HasSMEI64] in { +def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; +def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; +} + +def : Pat<(int_aarch64_sme_addha + imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4i32 ZPR32:$zn)), + (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; +def : Pat<(int_aarch64_sme_addva + imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4i32 ZPR32:$zn)), + (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; + +let Predicates = [HasSMEI64] in { +def : Pat<(int_aarch64_sme_addha + imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2i64 ZPR64:$zn)), + (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; +def : Pat<(int_aarch64_sme_addva + imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2i64 ZPR64:$zn)), + (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; +} + //===----------------------------------------------------------------------===// // SME Contiguous Loads //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme-i64 -verify-machineinstrs < %s | FileCheck %s + +define void @addha_s( %pn, %pm, %zn) { +; CHECK-LABEL: addha_s: +; CHECK: // %bb.0: +; CHECK-NEXT: addha za0.s, p0/m, p1/m, z0.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, %pn, %pm, %zn) + ret void +} + +define void @addva_s( %pn, %pm, %zn) { +; CHECK-LABEL: addva_s: +; CHECK: // %bb.0: +; CHECK-NEXT: addva za3.s, p0/m, p1/m, z0.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, %pn, %pm, %zn) + ret void +} + +define void @addha_d( %pn, %pm, %zn) { +; CHECK-LABEL: addha_d: +; CHECK: // %bb.0: +; CHECK-NEXT: addha za0.d, p0/m, p1/m, z0.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, %pn, %pm, %zn) + ret void +} + +define void @addva_d( %pn, %pm, %zn) { +; CHECK-LABEL: addva_d: +; CHECK: // %bb.0: +; CHECK-NEXT: addva za7.d, p0/m, p1/m, z0.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, %pn, %pm, %zn) + ret void +} + +declare void @llvm.aarch64.sme.addha.nxv4i32(i64, , , ) +declare void @llvm.aarch64.sme.addha.nxv2i64(i64, , , ) +declare void @llvm.aarch64.sme.addva.nxv4i32(i64, , , ) +declare void @llvm.aarch64.sme.addva.nxv2i64(i64, , , )