Index: llvm/include/llvm/Analysis/ValueTracking.h =================================================================== --- llvm/include/llvm/Analysis/ValueTracking.h +++ llvm/include/llvm/Analysis/ValueTracking.h @@ -375,6 +375,17 @@ return Known.isKnownNeverInfinity(); } +/// Return true if the floating-point value can neve rcontain a NaN or infinity. +inline bool isKnownNeverInfOrNaN( + const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI, + unsigned Depth = 0, AssumptionCache *AC = nullptr, + const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr, + OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) { + KnownFPClass Known = computeKnownFPClass(V, DL, fcInf | fcNan, Depth, TLI, AC, + CtxI, DT, ORE, UseInstrInfo); + return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity(); +} + /// Return true if the floating-point scalar value is not a NaN or if the /// floating-point vector value has no NaN elements. Return false if a value /// could ever be NaN. Index: llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp @@ -328,7 +328,8 @@ }); } -bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, +bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Instruction &I, + const Value *Op0, const Value *Op1, InstCombiner &IC) const { // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or // infinity, gives +0.0. If we can prove we don't have one of the special @@ -341,12 +342,13 @@ return true; } - // TODO: Use computeKnownFPClass auto *TLI = &IC.getTargetLibraryInfo(); - if (isKnownNeverInfinity(Op0, IC.getDataLayout(), TLI) && - isKnownNeverNaN(Op0, IC.getDataLayout(), TLI) && - isKnownNeverInfinity(Op1, IC.getDataLayout(), TLI) && - isKnownNeverNaN(Op1, IC.getDataLayout(), TLI)) { + if (isKnownNeverInfOrNaN(Op0, IC.getDataLayout(), TLI, 0, + &IC.getAssumptionCache(), &I, &IC.getDominatorTree(), + &IC.getOptimizationRemarkEmitter()) && + isKnownNeverInfOrNaN(Op1, IC.getDataLayout(), TLI, 0, + &IC.getAssumptionCache(), &I, &IC.getDominatorTree(), + &IC.getOptimizationRemarkEmitter())) { // Neither operand is infinity or NaN. return true; } @@ -1010,7 +1012,7 @@ // If we can prove we don't have one of the special cases then we can use a // normal fmul instruction instead. - if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) { + if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) { auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II); FMul->takeName(&II); return IC.replaceInstUsesWith(II, FMul); @@ -1037,7 +1039,7 @@ // If we can prove we don't have one of the special cases then we can use a // normal fma instead. - if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) { + if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) { II.setCalledOperand(Intrinsic::getDeclaration( II.getModule(), Intrinsic::fma, II.getType())); return ⅈ Index: llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -187,8 +187,8 @@ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const; - bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1, - InstCombiner &IC) const; + bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0, + const Value *Op1, InstCombiner &IC) const; std::optional instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const; std::optional simplifyDemandedVectorEltsIntrinsic( Index: llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/fma_legacy.ll @@ -83,4 +83,28 @@ ret float %call } +; Combine to fma because neither argument can be infinity or NaN based on assumptions +define float @test_finite_assumed(float %x, float %y, float %z) { +; CHECK-LABEL: @test_finite_assumed( +; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[IS_FINITE_X:%.*]] = fcmp one float [[FABS_X]], 0x7FF0000000000000 +; CHECK-NEXT: [[FABS_Y:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]]) +; CHECK-NEXT: [[IS_FINITE_Y:%.*]] = fcmp one float [[FABS_Y]], 0x7FF0000000000000 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_FINITE_X]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_FINITE_Y]]) +; CHECK-NEXT: [[CALL:%.*]] = call float @llvm.fma.f32(float [[X]], float [[Y]], float [[Z:%.*]]) +; CHECK-NEXT: ret float [[CALL]] +; + %fabs.x = call float @llvm.fabs.f32(float %x) + %is.finite.x = fcmp one float %fabs.x, 0x7FF0000000000000 + %fabs.y = call float @llvm.fabs.f32(float %y) + %is.finite.y = fcmp one float %fabs.y, 0x7FF0000000000000 + call void @llvm.assume(i1 %is.finite.x) + call void @llvm.assume(i1 %is.finite.y) + %call = call float @llvm.amdgcn.fma.legacy(float %x, float %y, float %z) + ret float %call +} + declare float @llvm.amdgcn.fma.legacy(float, float, float) +declare float @llvm.fabs.f32(float) +declare void @llvm.assume(i1 noundef) Index: llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll =================================================================== --- llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll +++ llvm/test/Transforms/InstCombine/AMDGPU/fmul_legacy.ll @@ -53,4 +53,28 @@ ret float %call } +; Combine to fmul because neither argument can be infinity or NaN based on assumptions +define float @test_finite_assumed(float %x, float %y) { +; CHECK-LABEL: @test_finite_assumed( +; CHECK-NEXT: [[FABS_X:%.*]] = call float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[IS_FINITE_X:%.*]] = fcmp one float [[FABS_X]], 0x7FF0000000000000 +; CHECK-NEXT: [[FABS_Y:%.*]] = call float @llvm.fabs.f32(float [[Y:%.*]]) +; CHECK-NEXT: [[IS_FINITE_Y:%.*]] = fcmp one float [[FABS_Y]], 0x7FF0000000000000 +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_FINITE_X]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[IS_FINITE_Y]]) +; CHECK-NEXT: [[CALL:%.*]] = fmul float [[X]], [[Y]] +; CHECK-NEXT: ret float [[CALL]] +; + %fabs.x = call float @llvm.fabs.f32(float %x) + %is.finite.x = fcmp one float %fabs.x, 0x7FF0000000000000 + %fabs.y = call float @llvm.fabs.f32(float %y) + %is.finite.y = fcmp one float %fabs.y, 0x7FF0000000000000 + call void @llvm.assume(i1 %is.finite.x) + call void @llvm.assume(i1 %is.finite.y) + %call = call float @llvm.amdgcn.fmul.legacy(float %x, float %y) + ret float %call +} + declare float @llvm.amdgcn.fmul.legacy(float, float) +declare float @llvm.fabs.f32(float) +declare void @llvm.assume(i1 noundef)