diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -695,6 +695,33 @@ return None; } +static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) { + switch (Intrinsic) { + case Intrinsic::aarch64_sve_fmul: + return Instruction::BinaryOps::FMul; + case Intrinsic::aarch64_sve_fadd: + return Instruction::BinaryOps::FAdd; + case Intrinsic::aarch64_sve_fsub: + return Instruction::BinaryOps::FSub; + default: + return Instruction::BinaryOpsEnd; + } +} + +static Optional instCombineSVEVectorBinOp(InstCombiner &IC, + IntrinsicInst &II) { + auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID()); + if (BinOpCode == Instruction::BinaryOpsEnd || + !match(II.getOperand(0), + m_Intrinsic( + m_ConstantInt()))) + return None; + IRBuilder<> Builder(II.getContext()); + Builder.SetInsertPoint(&II); + return IC.replaceInstUsesWith( + II, Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2))); +} + static Optional instCombineSVEVectorMul(InstCombiner &IC, IntrinsicInst &II) { auto *OpPredicate = II.getOperand(0); @@ -744,7 +771,7 @@ } } - return None; + return instCombineSVEVectorBinOp(IC, II); } static Optional instCombineSVEUnpack(InstCombiner &IC, @@ -871,6 +898,9 @@ case Intrinsic::aarch64_sve_mul: case Intrinsic::aarch64_sve_fmul: return instCombineSVEVectorMul(IC, II); + case Intrinsic::aarch64_sve_fadd: + case Intrinsic::aarch64_sve_fsub: + return instCombineSVEVectorBinOp(IC, II); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll @@ -0,0 +1,111 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +target triple = "aarch64-unknown-linux-gnu" + +declare @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +; SVE intrinsics fmul and fadd should be replaced with regular fmul and fadd +declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) +define @replace_fmul_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: @replace_fmul_intrinsic_half +; CHECK-NEXT: %1 = fmul %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) +define @replace_fmul_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: @replace_fmul_intrinsic_float +; CHECK-NEXT: %1 = fmul %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) +define @replace_fmul_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: @replace_fmul_intrinsic_double +; CHECK-NEXT: %1 = fmul %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) +define @replace_fadd_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: @replace_fadd_intrinsic_half +; CHECK-NEXT: %1 = fadd %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) +define @replace_fadd_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: @replace_fadd_intrinsic_float +; CHECK-NEXT: %1 = fadd %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) +define @replace_fadd_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: @replace_fadd_intrinsic_double +; CHECK-NEXT: %1 = fadd %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) +define @replace_fsub_intrinsic_half( %a, %b) #0 { +; CHECK-LABEL: @replace_fsub_intrinsic_half +; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %1, %a, %b) + ret %2 +} + +declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) +define @replace_fsub_intrinsic_float( %a, %b) #0 { +; CHECK-LABEL: @replace_fsub_intrinsic_float +; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( %1, %a, %b) + ret %2 +} + + +declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) +define @replace_fsub_intrinsic_double( %a, %b) #0 { +; CHECK-LABEL: @replace_fsub_intrinsic_double +; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) + ret %2 +} + +define @no_replace_on_non_ptrue_all( %a, %b) #0 { +; CHECK-LABEL: @no_replace_on_non_ptrue_all +; CHECK-NEXT: %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) +; CHECK-NEXT: %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) +; CHECK-NEXT: ret %2 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 5) + %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) + ret %2 +} + +attributes #0 = { "target-features"="+sve" }