Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -5272,6 +5272,59 @@ ConstantExpr::getFNeg(RHSC)); break; } + case Instruction::FDiv: { + // Assume C != 0 is a constant and a and d are floating point variables. + // 1: a != 0 ... Because a is the nominator of a division + // this is implicitly given by the flag 'ninf' + // 2: d = C / a + // 3: (d < 0) + // + // To simplify 3: execute the following steps + // + // 4: (C / a < 0) ... subtitute d by C / a + // 5: (Ca < 0) ... multiply by a*a (note a*a is positive for a in + // float) + // 6: ... divide by C + // 7.1: (a < 0) ... if C > 0 + // 7.2: (a > 0) ... if C < 0 + // + // This transformation works for the ordered variants of <=, <, >, >= + + // Check that predicates are valid. + if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) && + (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE)) + break; + + // Check that RHS oparand matches the from in (3:) + if (!match(RHSC, m_AnyZeroFP())) + break; + + // Check fastmath flags ('ninf'). This is a requirement for 1: and 5:. + if (!LHSI->hasNoInfs() || !I.hasNoInfs()) + break; + + // Check the properties of the dividend + const APFloat *C; + if (!match(LHSI->getOperand(0), m_APFloat(C))) + break; + + // Division by zero is not allowed (see 6:) + if (C->isZero()) + break; + + // Get the new predicate (see 7:) + FCmpInst::Predicate NewPred; + if (C->isNegative()) + NewPred = I.getSwappedPredicate(); + else + NewPred = I.getPredicate(); + + // Finally emit the new fcmp. + auto *a = LHSI->getOperand(1); + auto NewFCI = new FCmpInst(NewPred, a, RHSC); + NewFCI->setFastMathFlags(I.getFastMathFlags()); + return NewFCI; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast(LHSI->getOperand(0))) { Index: test/Transforms/InstCombine/fcmp.ll =================================================================== --- test/Transforms/InstCombine/fcmp.ll +++ test/Transforms/InstCombine/fcmp.ll @@ -377,3 +377,135 @@ ret i1 %cmp } +; Can fold with ninf and arcp +; %2 = fdiv ninf 1.0, double %1fcmp +; %3 = fcmp ninf oeq double %2, 0.0 +; => +; %3 = fcmp ninf olt double %1, 0.0 +define i1 @test20_recip(double %arg_d, float %arg_f) { +; CHECK-LABEL: @test20_recip( +; CHECK-SAME: double [[AD:%.*]], float [[AF:%.*]]) +; +; DoubleTy with all allowed predicates. Note: fcmp args are swapped +; +; CHECK: %cmp1 = fcmp ninf ogt double [[AD]], 0.000000e+00 +; CHECK: %cmp2 = fcmp ninf olt double [[AD]], 0.000000e+00 +; CHECK: %cmp3 = fcmp ninf oge double [[AD]], 0.000000e+00 +; CHECK: %cmp4 = fcmp ninf ole double [[AD]], 0.000000e+00 + + %div_dp = fdiv ninf double 1.0, %arg_d + + %cmp1 = fcmp ninf olt double 0.0, %div_dp + %cmp2 = fcmp ninf ogt double 0.0, %div_dp + %cmp3 = fcmp ninf ole double 0.0, %div_dp + %cmp4 = fcmp ninf oge double 0.0, %div_dp + %res2 = xor i1 %cmp1, %cmp2 + %res3 = xor i1 %res2, %cmp3 + %res4 = xor i1 %res3, %cmp4 + +; FloatTy with all allowed predicates +; +; CHECK: %cmp5 = fcmp ninf olt float [[AF]], 0.000000e+00 +; CHECK: %cmp6 = fcmp ninf ogt float [[AF]], 0.000000e+00 +; CHECK: %cmp7 = fcmp ninf ole float [[AF]], 0.000000e+00 +; CHECK: %cmp8 = fcmp ninf oge float [[AF]], 0.000000e+00 + + %div_fp = fdiv ninf float 2.0, %arg_f + + %cmp5 = fcmp ninf olt float %div_fp, 0.0 + %cmp6 = fcmp ninf ogt float %div_fp, 0.0 + %cmp7 = fcmp ninf ole float %div_fp, 0.0 + %cmp8 = fcmp ninf oge float %div_fp, 0.0 + %res5 = xor i1 %res4, %cmp5 + %res6 = xor i1 %res5, %cmp6 + %res7 = xor i1 %res6, %cmp7 + %res8 = xor i1 %res7, %cmp8 + +; Negative Denominator predicate gets inverted +; +; CHECK: %cmp9 = fcmp ninf olt float [[AF]], 0.000000e+00 +; CHECK: %cmp10 = fcmp ninf ogt float [[AF]], 0.000000e+00 +; CHECK: %cmp11 = fcmp ninf ole float [[AF]], 0.000000e+00 +; CHECK: %cmp12 = fcmp ninf oge float [[AF]], 0.000000e+00 +; CHECK: %cmp13 = fcmp ninf ogt double [[AD]], 0.000000e+00 +; CHECK: %cmp14 = fcmp ninf olt double [[AD]], 0.000000e+00 +; CHECK: %cmp15 = fcmp ninf oge double [[AD]], 0.000000e+00 +; CHECK: %cmp16 = fcmp ninf ole double [[AD]], 0.000000e+00 + + %div_dn = fdiv ninf double -1.0, %arg_d + %div_fn = fdiv ninf float -3.0, %arg_f + + %cmp9 = fcmp ninf olt float 0.0, %div_fn + %cmp10 = fcmp ninf ogt float 0.0, %div_fn + %cmp11 = fcmp ninf ole float 0.0, %div_fn + %cmp12 = fcmp ninf oge float 0.0, %div_fn + %cmp13 = fcmp ninf olt double %div_dn, 0.0 + %cmp14 = fcmp ninf ogt double %div_dn, 0.0 + %cmp15 = fcmp ninf ole double %div_dn, 0.0 + %cmp16 = fcmp ninf oge double %div_dn, 0.0 + %res9 = xor i1 %res8, %cmp9 + %res10 = xor i1 %res9, %cmp10 + %res11 = xor i1 %res10, %cmp11 + %res12 = xor i1 %res11, %cmp12 + %res13 = xor i1 %res12, %cmp13 + %res14 = xor i1 %res13, %cmp14 + %res15 = xor i1 %res14, %cmp15 + %res16 = xor i1 %res15, %cmp16 + +; Invalid fast-math flags +; + %div_inv1 = fdiv ninf float %arg_f, 3.0 + %div_inv2 = fdiv float 1.0, %arg_f + +; CHECK: %cmpI0 = fcmp ninf ogt float %div_inv1, 0.000000e+00 +; CHECK: %cmpI1 = fcmp ninf ogt float %div_inv2, 0.000000e+00 + %cmpI0 = fcmp ninf olt float 0.0, %div_inv1 + %cmpI1 = fcmp ninf ogt float %div_inv2, 0.0 + %resI0 = xor i1 %res16, %cmpI0 + %resI1 = xor i1 %resI0, %cmpI1 + +; Unordered predicates +; +; CHECK: %cmpO0 = fcmp ninf ugt float %div_fp, 0.000000e+00 +; CHECK: %cmpO1 = fcmp ninf ult float %div_fp, 0.000000e+00 +; CHECK: %cmpO2 = fcmp ninf uge float %div_fp, 0.000000e+00 +; CHECK: %cmpO3 = fcmp ninf ule float %div_fp, 0.000000e+00 + %cmpO0 = fcmp ninf ult float 0.0, %div_fp + %cmpO1 = fcmp ninf ugt float 0.0, %div_fp + %cmpO2 = fcmp ninf ule float 0.0, %div_fp + %cmpO3 = fcmp ninf uge float 0.0, %div_fp + %resO0 = xor i1 %resI1, %cmpO0 + %resO1 = xor i1 %resO0, %cmpO1 + %resO2 = xor i1 %resO1, %cmpO2 + %resO3 = xor i1 %resO2, %cmpO3 + +; CHECK: ret i1 %resO3 + ret i1 %resO3 +} + +; vector tests for test20_recip +define < 2 x i1> @test21_recip_vec(<2 x float> %arg_f) { +; CHECK-LABEL: @test21_recip_vec( +; CHECK-SAME: <2 x float> [[AF:%.*]]) + +; CHECK: %resV0 = fcmp ninf oge <2 x float> %arg_f, zeroinitializer + %divV0 = fdiv ninf <2 x float> , %arg_f + %resV0 = fcmp ninf oge <2 x float> %divV0, zeroinitializer +; CHECK: %resV1 = fcmp ninf ole <2 x float> %arg_f, zeroinitializer + %divV1 = fdiv ninf <2 x float> , %arg_f + %resV1 = fcmp ninf oge <2 x float> %divV1, zeroinitializer +; CHECK: %resV2 = fcmp ninf oge <2 x float> %divV2, zeroinitializer + %divV2 = fdiv ninf <2 x float> zeroinitializer, %arg_f + %resV2 = fcmp ninf oge <2 x float> %divV2, zeroinitializer +; CHECK: %resV3 = fcmp ninf oge <2 x float> %divV3, zeroinitializer + %divV3 = fdiv ninf <2 x float> , %arg_f + %resV3 = fcmp ninf oge <2 x float> %divV3, zeroinitializer + +; prevent DCE + %res1 = xor <2 x i1> %resV0, %resV1 + %res2 = xor <2 x i1> %res1, %resV2 + %res3 = xor <2 x i1> %res2, %resV3 + +; CHECK: ret <2 x i1> %res3 + ret <2 x i1> %res3 +}