diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -715,6 +715,20 @@ return match(SplatValue, m_FPOne()) || match(SplatValue, m_One()); }; + auto IsFMul = [](auto *I) { + auto *IntrI = dyn_cast(I); + if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_fmul) + return false; + return true; + }; + + auto IsFAdd = [](auto *I) { + auto *IntrI = dyn_cast(I); + if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_fadd) + return false; + return true; + }; + // The OpMultiplier variable should always point to the dup (if any), so // swap if necessary. if (IsUnitDup(OpMultiplicand) || IsUnitDupX(OpMultiplicand)) @@ -734,6 +748,12 @@ OpMultiplicand->takeName(&II); return IC.replaceInstUsesWith(II, OpMultiplicand); } + } else if (IsFAdd(&II)) { + auto instr = Builder.CreateFAdd(OpMultiplicand, OpMultiplier); + return IC.replaceInstUsesWith(II, instr); + } else if (IsFMul(&II)) { + auto instr = Builder.CreateFMul(OpMultiplicand, OpMultiplier); + return IC.replaceInstUsesWith(II, instr); } return None; @@ -823,6 +843,7 @@ return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_mul: case Intrinsic::aarch64_sve_fmul: + case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorMul(IC, II); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fmul-idempotency.ll @@ -66,7 +66,7 @@ define @non_idempotent_fmul_f16( %pg, %a) #0 { ; CHECK-LABEL: @non_idempotent_fmul_f16( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dup.x.nxv8f16(half 0xH4000) -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmul.nxv8f16( [[PG:%.*]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fmul [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv8f16(half 2.0) @@ -77,7 +77,7 @@ define @non_idempotent_fmul_f32( %pg, %a) #0 { ; CHECK-LABEL: @non_idempotent_fmul_f32( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dup.x.nxv4f32(float 2.000000e+00) -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmul.nxv4f32( [[PG:%.*]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fmul [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv4f32(float 2.0) @@ -88,7 +88,7 @@ define @non_idempotent_fmul_f64( %pg, %a) #0 { ; CHECK-LABEL: @non_idempotent_fmul_f64( ; CHECK-NEXT: [[TMP1:%.*]] = call @llvm.aarch64.sve.dup.x.nxv2f64(double 2.000000e+00) -; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.aarch64.sve.fmul.nxv2f64( [[PG:%.*]], [[A:%.*]], [[TMP1]]) +; CHECK-NEXT: [[TMP2:%.*]] = fmul [[TMP1]], [[A:%.*]] ; CHECK-NEXT: ret [[TMP2]] ; %1 = call @llvm.aarch64.sve.dup.x.nxv2f64(double 2.0)