Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -108,6 +108,7 @@ Optional ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { + using namespace PatternMatch; Intrinsic::ID IID = II.getIntrinsicID(); switch (IID) { default: @@ -210,6 +211,28 @@ } break; } + case Intrinsic::arm_mve_vmldava: { + Instruction *I = cast(&II); + if (I->hasOneUse()){ + auto *User = cast(*I->user_begin()); + Value *OpZ; + if (match(cast(User), m_c_Add(m_Specific(I), m_Value(OpZ))) && + match(I->getOperand(3), PatternMatch::m_Zero())) { + Value *OpX = I->getOperand(4); + Value *OpY = I->getOperand(5); + Type *OpTy = OpX->getType(); + + IC.Builder.SetInsertPoint(User); + Value *V = IC.Builder.CreateIntrinsic( + Intrinsic::arm_mve_vmldava, {OpTy}, + {I->getOperand(0), I->getOperand(1), I->getOperand(2), OpZ, OpX, OpY}); + + IC.replaceInstUsesWith(*User, V); + return IC.eraseInstFromFunction(*User); + } + } + return None; + } } return None; } Index: llvm/test/Transforms/InstCombine/ARM/vmldava.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/ARM/vmldava.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc i32 @test_vmladavaq_s32(i32 %z, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_vmladavaq_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlava.s32 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_s16(i32 %z, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: test_vmladavaq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlava.s16 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_s8(i32 %z, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: test_vmladavaq_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlava.s8 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_u32(i32 %z, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_vmladavaq_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlava.u32 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 1, i32 0, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_u16(i32 %z, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: test_vmladavaq_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlava.u16 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 1, i32 0, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_u8(i32 %z, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: test_vmladavaq_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlava.u8 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 1, i32 0, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s32(i32 %z, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: test_vmlsdavaq_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlsdava.s32 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s16(i32 %z, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: test_vmlsdavaq_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlsdava.s16 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y) + ret i32 %0 +} + +define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s8(i32 %z, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: test_vmlsdavaq_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmlsdava.s8 r0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y) + ret i32 %0 +} + +declare i32 @llvm.arm.mve.vmldava.v4i32(i32, i32, i32, i32, <4 x i32>, <4 x i32>) +declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>) +declare i32 @llvm.arm.mve.vmldava.v16i8(i32, i32, i32, i32, <16 x i8>, <16 x i8>)