Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5229,6 +5229,58 @@ return new ICmpInst(Pred, LHSI->getOperand(0), RHSInt); } +/// Fold (C / X) < 0.0 --> X < 0.0 if possible. Swap predicate if necessary. +Instruction *InstCombiner::foldFCmpReciprocalAndZero(FCmpInst &I, + Instruction *LHSI, + Constant *RHSC) { + // When C is not 0.0 and infinities are not allowed: + // (C / X) < 0.0 is a sign-bit test of X + // (C / X) < 0.0 --> X < 0.0 (if C is positive) + // (C / X) < 0.0 --> X > 0.0 (if C is negative, swap the predicate) + // + // Proof: + // Multiply (C / X) < 0.0 by X * X / C. + // - X is non zero, if it is the flag 'ninf' is violated. + // - C defines the sign of X * X * C. Thus it also defines whether to swap + // the predicate. C is also non zero by definition. + // + // Thus X * X / C is non zero and the transformation is valid. [qed] + + FCmpInst::Predicate Pred = I.getPredicate(); + + // Check that predicates are valid. + if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) && + (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE)) + return nullptr; + + // Check that RHS oparand is zero. + if (!match(RHSC, m_AnyZeroFP())) + return nullptr; + + // Check fastmath flags ('ninf'). + if (!LHSI->hasNoInfs() || !I.hasNoInfs()) + return nullptr; + + // Check the properties of the dividend. It must not be zero to avoid a + // division by zero (see Proof). + const APFloat *C; + if (!match(LHSI->getOperand(0), m_APFloat(C))) + return nullptr; + + if (C->isZero()) + return nullptr; + + // Get swapped predicate if necessary. + if (C->isNegative()) + Pred = I.getSwappedPredicate(); + + // Finally emit the new fcmp. + Value *X = LHSI->getOperand(1); + FCmpInst *NewFCI = new FCmpInst(Pred, X, RHSC); + NewFCI->setFastMathFlags(I.getFastMathFlags()); + return NewFCI; +} + Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { bool Changed = false; @@ -5363,6 +5415,10 @@ ConstantExpr::getFNeg(RHSC)); break; } + case Instruction::FDiv: + if (Instruction *NV = foldFCmpReciprocalAndZero(I, LHSI, RHSC)) + return NV; + break; case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast(LHSI->getOperand(0))) { Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -843,6 +843,8 @@ ConstantInt *AndCst = nullptr); Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI, Constant *RHSC); + Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI, + Constant *RHSC); Instruction *foldICmpAddOpConst(Value *X, const APInt &C, ICmpInst::Predicate Pred); Instruction *foldICmpWithCastAndCast(ICmpInst &ICI); Index: test/Transforms/InstCombine/fcmp.ll =================================================================== --- test/Transforms/InstCombine/fcmp.ll +++ test/Transforms/InstCombine/fcmp.ll @@ -377,3 +377,93 @@ ret i1 %cmp } +; Can fold 1.0 / X < 0.0 --> X < 0 with ninf +define i1 @test20_recipX_olt_0(float %X) { +; CHECK-LABEL: @test20_recipX_olt_0( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf olt float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %div = fdiv ninf float 1.0, %X + %cmp = fcmp ninf olt float %div, 0.0 + ret i1 %cmp +} + +; Can fold -2.0 / X <= 0.0 --> X >= 0 with ninf +define i1 @test21_recipX_ole_0(float %X) { +; CHECK-LABEL: @test21_recipX_ole_0( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf oge float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %div = fdiv ninf float -2.0, %X + %cmp = fcmp ninf ole float %div, 0.0 + ret i1 %cmp +} + +; Can fold 2.0 / X > 0.0 --> X > 0 with ninf +define i1 @test22_recipX_ogt_0(float %X) { +; CHECK-LABEL: @test22_recipX_ogt_0( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf ogt float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %div = fdiv ninf float 2.0, %X + %cmp = fcmp ninf ogt float %div, 0.0 + ret i1 %cmp +} + +; Can fold -1.0 / X >= 0.0 --> X <= 0 with ninf +define i1 @test23_recipX_oge_0(float %X) { +; CHECK-LABEL: @test23_recipX_oge_0( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf ole float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %div = fdiv ninf float -1.0, %X + %cmp = fcmp ninf oge float %div, 0.0 + ret i1 %cmp +} + +; Do not fold 1.0 / X > 0.0 when ninf is missing +define i1 @test24_recipX_noninf_cmp(float %X) { +; CHECK-LABEL: @test24_recipX_noninf_cmp( +; CHECK-NEXT: [[DIV:%.*]] = fdiv ninf float 2.000000e+00, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[DIV]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %div = fdiv ninf float 2.0, %X + %cmp = fcmp ogt float %div, 0.0 + ret i1 %cmp +} + +; Do not fold 1.0 / X > 0.0 when ninf is missing +define i1 @test25_recipX_noninf_div(float %X) { +; CHECK-LABEL: @test25_recipX_noninf_div( +; CHECK-NEXT: [[DIV:%.*]] = fdiv float 2.000000e+00, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf ogt float [[DIV]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %div = fdiv float 2.0, %X + %cmp = fcmp ninf ogt float %div, 0.0 + ret i1 %cmp +} + +; Do not fold 1.0 / X > 0.0 with unordered predicates +define i1 @test26_recipX_unorderd(float %X) { +; CHECK-LABEL: @test26_recipX_unorderd( +; CHECK-NEXT: [[DIV:%.*]] = fdiv ninf float 2.000000e+00, [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf ugt float [[DIV]], 0.000000e+00 +; CHECK-NEXT: ret i1 [[CMP]] +; + %div = fdiv ninf float 2.0, %X + %cmp = fcmp ninf ugt float %div, 0.0 + ret i1 %cmp +} + +; Fold <-1.0, -1.0> / X > <-0.0, -0.0> +define <2 x i1> @test27_recipX_gt_vecsplat(<2 x float> %X) { +; CHECK-LABEL: @test27_recipX_gt_vecsplat( +; CHECK-NEXT: [[CMP:%.*]] = fcmp ninf olt <2 x float> [[X:%.*]], +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; + %div = fdiv ninf <2 x float> , %X + %cmp = fcmp ninf ogt <2 x float> %div, + ret <2 x i1> %cmp +}