Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5272,6 +5272,65 @@ ConstantExpr::getFNeg(RHSC)); break; } + case Instruction::FDiv: { + // Assume C != 0 is a constant and a and d are floating point variables. + // 1: a != 0 ... Because a is the nominator of a division + // this is implicitly given by the flag 'ninf' + // 2: d = C / a + // 3: (d < 0) + // + // To simplify 3: execute the following steps + // + // 4: (C / a < 0) ... subtitute d by C / a + // 5: (Ca < 0) ... multiply by a*a (note a*a is positive for a in + // float) + // 6: ... divide by C + // 7.1: (a < 0) ... if C > 0 + // 7.2: (a > 0) ... if C < 0 + // + // This transformation works for the ordered variants of <=, <, >, >= + + // Check that RHS oparand matches the from in (3:). + ConstantFP *RHSCfp = dyn_cast(RHSC); + if (!RHSCfp) + break; + + if (!RHSCfp->isZero()) + break; + + // Check that predicates are valid. + auto Pred = I.getPredicate(); + + if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) && + (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE)) + break; + + // Check fastmath flags ('ninf', 'arcp'). This is a requirement for (1:, + // 5:, 6:). + if (!LHSI->hasAllowReciprocal() || !LHSI->hasNoInfs() || + !I.hasAllowReciprocal() || !I.hasNoInfs()) + break; + + // Check if denominator of the division is constant. + ConstantFP *Const = dyn_cast(LHSI->getOperand(0)); + if (!Const) + break; + + // The denominator must not be zero (precondition). + if (Const->isZero()) + break; + + // If the denominator is negative swap the predicate (7.2:). + if (Const->isNegative()) + Pred = I.getSwappedPredicate(); + + // Finally emit the new fcmp. + auto NewFCI = new FCmpInst(Pred, LHSI->getOperand(1), RHSC); + NewFCI->setFastMathFlags(I.getFastMathFlags()); + NewFCI->copyMetadata(I); + + return NewFCI; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast(LHSI->getOperand(0))) { Index: test/Transforms/InstCombine/fcmp.ll =================================================================== --- test/Transforms/InstCombine/fcmp.ll +++ test/Transforms/InstCombine/fcmp.ll @@ -377,3 +377,119 @@ ret i1 %cmp } +; Can fold with ninf and arcp +; %2 = fdiv ninf arcp 1.0, double %1fcmp +; %3 = fcmp ninf arcp oeq double %2, 0.0 +; => +; %3 = fcmp ninf arcp olt double %1, 0.0 +define <25 x i1> @test20_recip(double %arg_d, float %arg_f) { +; CHECK-LABEL: @test20_recip( +; CHECK-SAME: double [[AD:%.*]], float [[AF:%.*]]) +; +; DoubleTy with all allowed predicates. Note: fcmp args are swapped +; +; CHECK: %cmp1 = fcmp ninf arcp ogt double [[AD]], 0.000000e+00 +; CHECK: %cmp2 = fcmp ninf arcp olt double [[AD]], 0.000000e+00 +; CHECK: %cmp3 = fcmp ninf arcp oge double [[AD]], 0.000000e+00 +; CHECK: %cmp4 = fcmp ninf arcp ole double [[AD]], 0.000000e+00 + + %div_dp = fdiv ninf arcp double 1.0, %arg_d + + %cmp1 = fcmp ninf arcp olt double 0.0, %div_dp + %cmp2 = fcmp ninf arcp ogt double 0.0, %div_dp + %cmp3 = fcmp ninf arcp ole double 0.0, %div_dp + %cmp4 = fcmp ninf arcp oge double 0.0, %div_dp + %res1 = insertelement <25 x i1> undef, i1 %cmp1, i32 0 + %res2 = insertelement <25 x i1> %res1, i1 %cmp2, i32 1 + %res3 = insertelement <25 x i1> %res2, i1 %cmp3, i32 2 + %res4 = insertelement <25 x i1> %res3, i1 %cmp4, i32 3 + +; FloatTy with all allowed predicates +; +; CHECK: %cmp5 = fcmp ninf arcp olt float [[AF]], 0.000000e+00 +; CHECK: %cmp6 = fcmp ninf arcp ogt float [[AF]], 0.000000e+00 +; CHECK: %cmp7 = fcmp ninf arcp ole float [[AF]], 0.000000e+00 +; CHECK: %cmp8 = fcmp ninf arcp oge float [[AF]], 0.000000e+00 + + %div_fp = fdiv ninf arcp float 2.0, %arg_f + + %cmp5 = fcmp ninf arcp olt float %div_fp, 0.0 + %cmp6 = fcmp ninf arcp ogt float %div_fp, 0.0 + %cmp7 = fcmp ninf arcp ole float %div_fp, 0.0 + %cmp8 = fcmp ninf arcp oge float %div_fp, 0.0 + %res5 = insertelement <25 x i1> %res4, i1 %cmp5, i32 4 + %res6 = insertelement <25 x i1> %res5, i1 %cmp6, i32 5 + %res7 = insertelement <25 x i1> %res6, i1 %cmp7, i32 6 + %res8 = insertelement <25 x i1> %res7, i1 %cmp8, i32 7 + +; Negative Denominator predicate gets inverted +; +; CHECK: %cmp9 = fcmp ninf arcp olt float [[AF]], 0.000000e+00 +; CHECK: %cmp10 = fcmp ninf arcp ogt float [[AF]], 0.000000e+00 +; CHECK: %cmp11 = fcmp ninf arcp ole float [[AF]], 0.000000e+00 +; CHECK: %cmp12 = fcmp ninf arcp oge float [[AF]], 0.000000e+00 +; CHECK: %cmp13 = fcmp ninf arcp ogt double [[AD]], 0.000000e+00 +; CHECK: %cmp14 = fcmp ninf arcp olt double [[AD]], 0.000000e+00 +; CHECK: %cmp15 = fcmp ninf arcp oge double [[AD]], 0.000000e+00 +; CHECK: %cmp16 = fcmp ninf arcp ole double [[AD]], 0.000000e+00 + + %div_dn = fdiv ninf arcp double -1.0, %arg_d + %div_fn = fdiv ninf arcp float -3.0, %arg_f + + %cmp9 = fcmp ninf arcp olt float 0.0, %div_fn + %cmp10 = fcmp ninf arcp ogt float 0.0, %div_fn + %cmp11 = fcmp ninf arcp ole float 0.0, %div_fn + %cmp12 = fcmp ninf arcp oge float 0.0, %div_fn + %cmp13 = fcmp ninf arcp olt double %div_dn, 0.0 + %cmp14 = fcmp ninf arcp ogt double %div_dn, 0.0 + %cmp15 = fcmp ninf arcp ole double %div_dn, 0.0 + %cmp16 = fcmp ninf arcp oge double %div_dn, 0.0 + %res9 = insertelement <25 x i1> %res8, i1 %cmp9, i32 8 + %res10 = insertelement <25 x i1> %res9, i1 %cmp10, i32 9 + %res11 = insertelement <25 x i1> %res10, i1 %cmp11, i32 10 + %res12 = insertelement <25 x i1> %res11, i1 %cmp12, i32 11 + %res13 = insertelement <25 x i1> %res12, i1 %cmp13, i32 12 + %res14 = insertelement <25 x i1> %res13, i1 %cmp14, i32 13 + %res15 = insertelement <25 x i1> %res14, i1 %cmp15, i32 14 + %res16 = insertelement <25 x i1> %res15, i1 %cmp16, i32 15 + +; Invalid fast-math flags +; + %div_inv1 = fdiv ninf arcp float %arg_f, 3.0 + %div_inv2 = fdiv ninf float 1.0, %arg_f + %div_inv3 = fdiv arcp float 1.0, %arg_f + +; CHECK: %cmpI0 = fcmp ninf arcp ogt float %div_inv1, 0.000000e+00 +; CHECK: %cmpI1 = fcmp ninf arcp olt float %div_inv2, 0.000000e+00 +; CHECK: %cmpI2 = fcmp ninf arcp ole float %div_inv3, 0.000000e+00 + %cmpI0 = fcmp ninf arcp olt float 0.0, %div_inv1 + %cmpI1 = fcmp ninf arcp ogt float 0.0, %div_inv2 + %cmpI2 = fcmp ninf arcp ole float %div_inv3, 0.0 +; CHECK: %cmpI3 = fcmp arcp ole float %div_fp, 0.000000e+00 +; CHECK: %cmpI4 = fcmp ninf olt float %div_fp, 0.000000e+00 + %cmpI3 = fcmp arcp oge float 0.0, %div_fp + %cmpI4 = fcmp ninf olt float %div_fp, 0.0 + %resI0 = insertelement <25 x i1> %res16, i1 %cmpI0, i32 16 + %resI1 = insertelement <25 x i1> %resI0, i1 %cmpI1, i32 17 + %resI2 = insertelement <25 x i1> %resI1, i1 %cmpI2, i32 18 + %resI3 = insertelement <25 x i1> %resI2, i1 %cmpI3, i32 19 + %resI4 = insertelement <25 x i1> %resI3, i1 %cmpI4, i32 20 + +; Unordered predicates +; +; CHECK: %cmpI5 = fcmp ninf arcp ugt float %div_fp, 0.000000e+00 +; CHECK: %cmpI6 = fcmp ninf arcp ult float %div_fp, 0.000000e+00 +; CHECK: %cmpI7 = fcmp ninf arcp uge float %div_fp, 0.000000e+00 +; CHECK: %cmpI8 = fcmp ninf arcp ule float %div_fp, 0.000000e+00 + %cmpI5 = fcmp ninf arcp ult float 0.0, %div_fp + %cmpI6 = fcmp ninf arcp ugt float 0.0, %div_fp + %cmpI7 = fcmp ninf arcp ule float 0.0, %div_fp + %cmpI8 = fcmp ninf arcp uge float 0.0, %div_fp + %resI5 = insertelement <25 x i1> %resI4, i1 %cmpI5, i32 21 + %resI6 = insertelement <25 x i1> %resI5, i1 %cmpI6, i32 22 + %resI7 = insertelement <25 x i1> %resI6, i1 %cmpI7, i32 23 + %resI8 = insertelement <25 x i1> %resI7, i1 %cmpI8, i32 24 + +; CHECK: ret <25 x i1> %resI8 + ret <25 x i1> %resI8 +}