diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1234,10 +1234,13 @@ static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) { switch (Intrinsic) { case Intrinsic::aarch64_sve_fmul: + case Intrinsic::aarch64_sve_fmul_u: return Instruction::BinaryOps::FMul; case Intrinsic::aarch64_sve_fadd: + case Intrinsic::aarch64_sve_fadd_u: return Instruction::BinaryOps::FAdd; case Intrinsic::aarch64_sve_fsub: + case Intrinsic::aarch64_sve_fsub_u: return Instruction::BinaryOps::FSub; default: return Instruction::BinaryOpsEnd; @@ -1675,14 +1678,20 @@ return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_mul: case Intrinsic::aarch64_sve_fmul: + case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II); case Intrinsic::aarch64_sve_fadd: case Intrinsic::aarch64_sve_add: return instCombineSVEVectorAdd(IC, II); - case Intrinsic::aarch64_sve_fadd_u: - return instCombineSVEVectorFuseMulAddSub( - IC, II, true); + case Intrinsic::aarch64_sve_fadd_u: { + auto Replacement = instCombineSVEVectorBinOp(IC, II); + if (Replacement == std::nullopt) + return instCombineSVEVectorFuseMulAddSub( + IC, II, true); + + return Replacement; + } case Intrinsic::aarch64_sve_add_u: return instCombineSVEVectorFuseMulAddSub( @@ -1690,10 +1699,14 @@ case Intrinsic::aarch64_sve_fsub: case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); - case Intrinsic::aarch64_sve_fsub_u: - return instCombineSVEVectorFuseMulAddSub( - IC, II, true); + case Intrinsic::aarch64_sve_fsub_u: { + auto Replacement = instCombineSVEVectorBinOp(IC, II); + if (Replacement == std::nullopt) + return instCombineSVEVectorFuseMulAddSub( + IC, II, true); + return Replacement; + } case Intrinsic::aarch64_sve_sub_u: return instCombineSVEVectorFuseMulAddSub( diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll @@ -6,7 +6,7 @@ declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) -; SVE intrinsics fmul and fadd should be replaced with regular fmul and fadd +; SVE intrinsics fmul, fmul_u, fadd, fadd_u, fsub and fsub_u should be replaced with regular fmul, fadd and fsub. declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) define @replace_fmul_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_intrinsic_half @@ -37,6 +37,36 @@ ret %2 } +declare @llvm.aarch64.sve.fmul.u.nxv8f16(, , ) +define @replace_fmul_u_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: @replace_fmul_u_intrinsic_half +; CHECK-NEXT: %1 = fmul fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmul.u.nxv4f32(, , ) +define @replace_fmul_u_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: @replace_fmul_u_intrinsic_float +; CHECK-NEXT: %1 = fmul fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmul.u.nxv2f64(, , ) +define @replace_fmul_u_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: @replace_fmul_u_intrinsic_double +; CHECK-NEXT: %1 = fmul fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.u.nxv2f64( %1, %a, %b) + ret %2 +} + declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) define @replace_fadd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_intrinsic_half @@ -67,6 +97,36 @@ ret %2 } +declare @llvm.aarch64.sve.fadd.u.nxv8f16(, , ) +define @replace_fadd_u_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: @replace_fadd_u_intrinsic_half +; CHECK-NEXT: %1 = fadd fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fadd.u.nxv4f32(, , ) +define @replace_fadd_u_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: @replace_fadd_u_intrinsic_float +; CHECK-NEXT: %1 = fadd fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fadd.u.nxv2f64(, , ) +define @replace_fadd_u_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: @replace_fadd_u_intrinsic_double +; CHECK-NEXT: %1 = fadd fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.u.nxv2f64( %1, %a, %b) + ret %2 +} + declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) define @replace_fsub_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_half @@ -87,7 +147,6 @@ ret %2 } - declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) define @replace_fsub_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_double @@ -117,4 +176,44 @@ ret %2 } +declare @llvm.aarch64.sve.fsub.u.nxv8f16(, , ) +define @replace_fsub_u_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: @replace_fsub_u_intrinsic_half +; CHECK-NEXT: %1 = fsub fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fsub.u.nxv4f32(, , ) +define @replace_fsub_u_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: @replace_fsub_u_intrinsic_float +; CHECK-NEXT: %1 = fsub fast %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv4f32( %1, %a, %b) + ret %2 +} + +define @no_replace_on_non_ptrue_all_u( %a, %b) #0 { +; CHECK-LABEL: @no_replace_on_non_ptrue_all_u +; CHECK-NEXT: %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( %1, %a, %b) +; CHECK-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fsub.u.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fsub.u.nxv2f64(, , ) +define @replace_fsub_u_intrinsic_no_fast_flag( %a, %b) #0 { +; CHECK-LABEL: @replace_fsub_u_intrinsic_no_fast_flag +; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.fsub.u.nxv2f64( %1, %a, %b) + ret %2 +} + attributes #0 = { "target-features"="+sve" }