Index: llvm/include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- llvm/include/llvm/IR/IntrinsicsAArch64.td +++ llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2649,4 +2649,14 @@ def int_aarch64_sme_readq_vert : SME_TileToVector_Intrinsic; def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic; def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic; + + class SME_AddVectorToTile_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i64_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty]>; + + def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic; + def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic; } Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -564,6 +564,9 @@ MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2375,6 +2375,23 @@ return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + MIB.add(MI.getOperand(1)); // pn + MIB.add(MI.getOperand(2)); // pm + MIB.add(MI.getOperand(3)); // zn + + MI.eraseFromParent(); // The pseudo is gone now. + return BB; +} + MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { switch (MI.getOpcode()) { @@ -2457,6 +2474,14 @@ case AArch64::INSERT_MXIPZ_V_PSEUDO_Q: return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI, BB); + case AArch64::ADDHA_MPPZ_PSEUDO_S: + return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB); + case AArch64::ADDVA_MPPZ_PSEUDO_S: + return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB); + case AArch64::ADDHA_MPPZ_PSEUDO_D: + return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB); + case AArch64::ADDVA_MPPZ_PSEUDO_D: + return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB); } } Index: llvm/lib/Target/AArch64/SMEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SMEInstrFormats.td +++ llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -177,6 +177,36 @@ let Inst{2-0} = ZAda; } +class sme_add_vector_to_tile_pseudo + : Pseudo<(outs), + (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, + Sched<[]> { + // Translated to the actual instructions in AArch64ISelLowering.cpp + let usesCustomInserter = 1; +} + +def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo; +def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo; +def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; +def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; + +def : Pat<(int_aarch64_sme_addha + imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4i32 ZPR32:$zn)), + (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; +def : Pat<(int_aarch64_sme_addva + imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + (nxv4i32 ZPR32:$zn)), + (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; +def : Pat<(int_aarch64_sme_addha + imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2i64 ZPR64:$zn)), + (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; +def : Pat<(int_aarch64_sme_addva + imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2i64 ZPR64:$zn)), + (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; + //===----------------------------------------------------------------------===// // SME Contiguous Loads //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +define void @addha_s( %pn, %pm, %zn) { +; CHECK-LABEL: addha_s: +; CHECK: // %bb.0: +; CHECK-NEXT: addha za0.s, p0/m, p1/m, z0.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, %pn, %pm, %zn) + ret void +} + +define void @addva_s( %pn, %pm, %zn) { +; CHECK-LABEL: addva_s: +; CHECK: // %bb.0: +; CHECK-NEXT: addva za3.s, p0/m, p1/m, z0.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, %pn, %pm, %zn) + ret void +} + +define void @addha_d( %pn, %pm, %zn) { +; CHECK-LABEL: addha_d: +; CHECK: // %bb.0: +; CHECK-NEXT: addha za0.d, p0/m, p1/m, z0.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, %pn, %pm, %zn) + ret void +} + +define void @addva_d( %pn, %pm, %zn) { +; CHECK-LABEL: addva_d: +; CHECK: // %bb.0: +; CHECK-NEXT: addva za7.d, p0/m, p1/m, z0.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, %pn, %pm, %zn) + ret void +} + +declare void @llvm.aarch64.sme.addha.nxv4i32(i64, , , ) +declare void @llvm.aarch64.sme.addha.nxv2i64(i64, , , ) +declare void @llvm.aarch64.sme.addva.nxv4i32(i64, , , ) +declare void @llvm.aarch64.sme.addva.nxv2i64(i64, , , )