Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -108,6 +108,7 @@ Optional ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { + using namespace PatternMatch; Intrinsic::ID IID = II.getIntrinsicID(); switch (IID) { default: @@ -210,6 +211,29 @@ } break; } + case Intrinsic::arm_mve_vmldava: { + Instruction *I = cast(&II); + if (I->hasOneUse()) { + auto *User = cast(*I->user_begin()); + Value *OpZ; + if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) && + match(I->getOperand(3), m_Zero())) { + Value *OpX = I->getOperand(4); + Value *OpY = I->getOperand(5); + Type *OpTy = OpX->getType(); + + IC.Builder.SetInsertPoint(User); + Value *V = + IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy}, + {I->getOperand(0), I->getOperand(1), + I->getOperand(2), OpZ, OpX, OpY}); + + IC.replaceInstUsesWith(*User, V); + return IC.eraseInstFromFunction(*User); + } + } + return None; + } } return None; } Index: llvm/test/Transforms/InstCombine/ARM/vmldava.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/ARM/vmldava.ll @@ -0,0 +1,107 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S -mtriple=arm -o - %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define arm_aapcs_vfpcc i32 @test_vmladavaq_s32(i32 %z, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @test_vmladavaq_s32( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 0, i32 0, i32 0, <4 x i32> %x, <4 x i32> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_s16(i32 %z, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: @test_vmladavaq_s16( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 0, i32 0, i32 0, <8 x i16> %x, <8 x i16> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_s8(i32 %z, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_vmladavaq_s8( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 0, i32 0, i32 0, <16 x i8> %x, <16 x i8> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_u32(i32 %z, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @test_vmladavaq_u32( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 1, i32 0, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 1, i32 0, i32 0, i32 0, <4 x i32> %x, <4 x i32> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_u16(i32 %z, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: @test_vmladavaq_u16( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 1, i32 0, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 1, i32 0, i32 0, i32 0, <8 x i16> %x, <8 x i16> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmladavaq_u8(i32 %z, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_vmladavaq_u8( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 1, i32 0, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 1, i32 0, i32 0, i32 0, <16 x i8> %x, <16 x i8> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s32(i32 %z, <4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: @test_vmlsdavaq_s32( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 0, i32 %z, <4 x i32> %x, <4 x i32> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v4i32(i32 0, i32 1, i32 0, i32 0, <4 x i32> %x, <4 x i32> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s16(i32 %z, <8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: @test_vmlsdavaq_s16( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 0, i32 %z, <8 x i16> %x, <8 x i16> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v8i16(i32 0, i32 1, i32 0, i32 0, <8 x i16> %x, <8 x i16> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +define arm_aapcs_vfpcc i32 @test_vmlsdavaq_s8(i32 %z, <16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: @test_vmlsdavaq_s8( +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 0, i32 %z, <16 x i8> %x, <16 x i8> %y) +; CHECK-NEXT: ret i32 %0 +entry: + %0 = tail call i32 @llvm.arm.mve.vmldava.v16i8(i32 0, i32 1, i32 0, i32 0, <16 x i8> %x, <16 x i8> %y) + %1 = add nsw i32 %0, %z + ret i32 %1 +} + +declare i32 @llvm.arm.mve.vmldava.v4i32(i32, i32, i32, i32, <4 x i32>, <4 x i32>) +declare i32 @llvm.arm.mve.vmldava.v8i16(i32, i32, i32, i32, <8 x i16>, <8 x i16>) +declare i32 @llvm.arm.mve.vmldava.v16i8(i32, i32, i32, i32, <16 x i8>, <16 x i8>)