diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -619,6 +619,18 @@ } inline apf_pred_ty m_Finite(const APFloat *&V) { return V; } +struct is_finitenonzero { + bool isValue(const APFloat &C) { return C.isFiniteNonZero(); } +}; +/// Match a finite non-zero FP constant. +/// For vectors, this includes constants with undefined elements. +inline cstfp_pred_ty m_FiniteNonZero() { + return cstfp_pred_ty(); +} +inline apf_pred_ty m_FiniteNonZero(const APFloat *&V) { + return V; +} + struct is_any_zero_fp { bool isValue(const APFloat &C) { return C.isZero(); } }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -823,6 +823,39 @@ break; } + case Intrinsic::amdgcn_fmul_legacy: { + Value *Op0 = II.getArgOperand(0); + Value *Op1 = II.getArgOperand(1); + + // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or + // infinity, gives +0.0. + // TODO: Move to InstSimplify? + if (match(Op0, PatternMatch::m_AnyZeroFP()) || + match(Op1, PatternMatch::m_AnyZeroFP())) + return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType())); + + // If we can prove we don't have one of the special cases then we can use a + // normal fmul instruction instead. + auto *TLI = &IC.getTargetLibraryInfo(); + bool CanSimplifyToMul = false; + // TODO: Create and use isKnownFiniteNonZero instead of just matching + // constants here. + if (match(Op0, PatternMatch::m_FiniteNonZero()) || + match(Op1, PatternMatch::m_FiniteNonZero())) { + // One operand is not zero or infinity or NaN. + CanSimplifyToMul = true; + } else if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) && + isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) { + // Neither operand is infinity or NaN. + CanSimplifyToMul = true; + } + if (CanSimplifyToMul) { + auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II); + FMul->takeName(&II); + return IC.replaceInstUsesWith(II, FMul); + } + break; + } default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) { diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -instcombine -S | FileCheck %s + +; Simplify to +0.0. +define float @test_zero(float %x) { +; CHECK-LABEL: @test_zero( +; CHECK-NEXT: ret float 0.000000e+00 +; + %call = call float @llvm.amdgcn.fmul.legacy(float %x, float 0.0) + ret float %call +} + +; Simplify to +0.0. +define float @test_negzero(float %y) { +; CHECK-LABEL: @test_negzero( +; CHECK-NEXT: ret float 0.000000e+00 +; + %call = call float @llvm.amdgcn.fmul.legacy(float -0.0, float %y) + ret float %call +} + +; Combine to fmul because the constant is finite and non-zero. +define float @test_const(float %x) { +; CHECK-LABEL: @test_const( +; CHECK-NEXT: [[CALL:%.*]] = fmul float [[X:%.*]], 9.950000e+01 +; CHECK-NEXT: ret float [[CALL]] +; + %call = call float @llvm.amdgcn.fmul.legacy(float %x, float 99.5) + ret float %call +} + +; Combine to fmul because neither argument can be infinity or NaN. +define float @test_finite(i32 %x, i32 %y) { +; CHECK-LABEL: @test_finite( +; CHECK-NEXT: [[XF:%.*]] = sitofp i32 [[X:%.*]] to float +; CHECK-NEXT: [[YF:%.*]] = sitofp i32 [[Y:%.*]] to float +; CHECK-NEXT: [[CALL:%.*]] = fmul float [[XF]], [[YF]] +; CHECK-NEXT: ret float [[CALL]] +; + %xf = sitofp i32 %x to float + %yf = sitofp i32 %y to float + %call = call float @llvm.amdgcn.fmul.legacy(float %xf, float %yf) + ret float %call +} + +declare float @llvm.amdgcn.fmul.legacy(float, float)