diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -710,16 +710,18 @@ static Optional instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) { + auto *OpPredicate = II.getOperand(0); auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID()); if (BinOpCode == Instruction::BinaryOpsEnd || - !match(II.getOperand(0), - m_Intrinsic( - m_ConstantInt()))) + !match(OpPredicate, m_Intrinsic( + m_ConstantInt()))) return None; IRBuilder<> Builder(II.getContext()); Builder.SetInsertPoint(&II); - return IC.replaceInstUsesWith( - II, Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2))); + Builder.setFastMathFlags(II.getFastMathFlags()); + auto BinOp = + Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2)); + return IC.replaceInstUsesWith(II, BinOp); } static Optional instCombineSVEVectorMul(InstCombiner &IC, diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll @@ -10,7 +10,7 @@ declare @llvm.aarch64.sve.fmul.nxv8f16(, , ) define @replace_fmul_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_intrinsic_half -; CHECK-NEXT: %1 = fmul %a, %b +; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.nxv8f16( %1, %a, %b) @@ -20,7 +20,7 @@ declare @llvm.aarch64.sve.fmul.nxv4f32(, , ) define @replace_fmul_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_intrinsic_float -; CHECK-NEXT: %1 = fmul %a, %b +; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.nxv4f32( %1, %a, %b) @@ -30,7 +30,7 @@ declare @llvm.aarch64.sve.fmul.nxv2f64(, , ) define @replace_fmul_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fmul_intrinsic_double -; CHECK-NEXT: %1 = fmul %a, %b +; CHECK-NEXT: %1 = fmul fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fmul.nxv2f64( %1, %a, %b) @@ -40,7 +40,7 @@ declare @llvm.aarch64.sve.fadd.nxv8f16(, , ) define @replace_fadd_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_intrinsic_half -; CHECK-NEXT: %1 = fadd %a, %b +; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.nxv8f16( %1, %a, %b) @@ -50,7 +50,7 @@ declare @llvm.aarch64.sve.fadd.nxv4f32(, , ) define @replace_fadd_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_intrinsic_float -; CHECK-NEXT: %1 = fadd %a, %b +; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.nxv4f32( %1, %a, %b) @@ -60,7 +60,7 @@ declare @llvm.aarch64.sve.fadd.nxv2f64(, , ) define @replace_fadd_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fadd_intrinsic_double -; CHECK-NEXT: %1 = fadd %a, %b +; CHECK-NEXT: %1 = fadd fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fadd.nxv2f64( %1, %a, %b) @@ -70,7 +70,7 @@ declare @llvm.aarch64.sve.fsub.nxv8f16(, , ) define @replace_fsub_intrinsic_half( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_half -; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.nxv8f16( %1, %a, %b) @@ -80,7 +80,7 @@ declare @llvm.aarch64.sve.fsub.nxv4f32(, , ) define @replace_fsub_intrinsic_float( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_float -; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.nxv4f32( %1, %a, %b) @@ -91,7 +91,7 @@ declare @llvm.aarch64.sve.fsub.nxv2f64(, , ) define @replace_fsub_intrinsic_double( %a, %b) #0 { ; CHECK-LABEL: @replace_fsub_intrinsic_double -; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: %1 = fsub fast %a, %b ; CHECK-NEXT: ret %1 %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %2 = tail call fast @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) @@ -108,4 +108,13 @@ ret %2 } +define @replace_fsub_intrinsic_no_fast_flag( %a, %b) #0 { +; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag +; CHECK-NEXT: %1 = fsub %a, %b +; CHECK-NEXT: ret %1 + %1 = tail call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %2 = tail call @llvm.aarch64.sve.fsub.nxv2f64( %1, %a, %b) + ret %2 +} + attributes #0 = { "target-features"="+sve" }