Index: lib/Transforms/InstCombine/InstCombine.h =================================================================== --- lib/Transforms/InstCombine/InstCombine.h +++ lib/Transforms/InstCombine/InstCombine.h @@ -193,6 +193,8 @@ ConstantInt *CI1, ConstantInt *CI2); Instruction *FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A, ConstantInt *CI1, ConstantInt *CI2); + Instruction *FoldUICmpCstShlCst(ICmpInst &I, Value *Op, Value *A, + ConstantInt *CI1, ConstantInt *CI2); Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI, ICmpInst::Predicate Pred); Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, Index: lib/Transforms/InstCombine/InstCombineCompares.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCompares.cpp +++ lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1135,6 +1135,78 @@ return getConstant(false); } +/// FoldUICmpCstShlCst - Handle "(icmp ugt/uge/ult/ule (shl const2, A), const1)" +/// -> (icmp eq/ne A, LeadingZeros(const2) - LeadingZeros(const1)). +Instruction *InstCombiner::FoldUICmpCstShlCst(ICmpInst &I, Value *Op, Value *A, + ConstantInt *CI1, + ConstantInt *CI2) { + assert(I.isUnsigned() && "Cannot fold icmp sgt/sge/slt/sle"); + + APInt AP1 = CI1->getValue(); + APInt AP2 = CI2->getValue(); + + Instruction *Result = nullptr; + + // Don't bother doing any work for cases which InstSimplify handles. + if (AP2 == 0 || AP1 == 0) + return nullptr; + + if (AP1 == AP2) { + if (I.getPredicate() == I.ICMP_UGT) { + Result = + new ICmpInst(I.ICMP_NE, A, ConstantInt::getNullValue(A->getType())); + } else if (I.getPredicate() == I.ICMP_UGE) { + Result = ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), true)); + } else if (I.getPredicate() == I.ICMP_ULT) { + Result = ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), false)); + } else { + Result = + new ICmpInst(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType())); + } + return Result; + } + + // Get the distance between the highest bits that are set. + int AP2TrailingZeros = AP2.countTrailingZeros(); + int Shift = AP2.countLeadingZeros() - AP1.countLeadingZeros(); + + if (AP1.ult(AP2)) { + if (I.getPredicate() == I.ICMP_UGT || I.getPredicate() == I.ICMP_UGE) { + if (AP2TrailingZeros == 0) + Result = ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), true)); + else + Result = + new ICmpInst(I.ICMP_ULT, A, + ConstantInt::get(A->getType(), AP2.getBitWidth() - + AP2TrailingZeros)); + } else { + if (AP2TrailingZeros == 0) + Result = ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), false)); + else + Result = + new ICmpInst(I.ICMP_UGE, A, + ConstantInt::get(A->getType(), AP2.getBitWidth() - + AP2TrailingZeros)); + } + } else if (Shift >= 0 && AP2TrailingZeros == 0) { + if (AP2.shl(Shift) == AP1) { + Result = new ICmpInst(I.getPredicate(), A, + ConstantInt::get(A->getType(), Shift)); + } else { + if (AP2.shl(Shift).ugt(AP1)) + Shift--; + if (I.getPredicate() == I.ICMP_UGT || I.getPredicate() == I.ICMP_UGE) + Result = + new ICmpInst(I.ICMP_UGT, A, ConstantInt::get(A->getType(), Shift)); + else + Result = new ICmpInst(I.ICMP_ULT, A, + ConstantInt::get(A->getType(), Shift + 1)); + } + } + + return Result; +} + /// visitICmpInstWithInstAndIntCst - Handle "icmp (instr, intcst)". /// Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, @@ -1311,7 +1383,7 @@ cast(ConstantExpr::getShl(AndCst, ShAmt)); ConstantInt *ShiftedRHSCst = cast(ConstantExpr::getShl(RHS, ShAmt)); - + if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative()) CanFold = true; } @@ -2591,18 +2663,23 @@ Builder->getInt(CI->getValue()-1)); } - if (I.isEquality()) { - ConstantInt *CI2; - if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) || - match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) { + ConstantInt *CI2; + if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) || + match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) { + if (I.isEquality()) { // (icmp eq/ne (ashr/lshr const2, A), const1) if (Instruction *Inst = FoldICmpCstShrCst(I, Op0, A, CI, CI2)) return Inst; } - if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) { + } else if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) { + if (I.isEquality()) { // (icmp eq/ne (shl const2, A), const1) if (Instruction *Inst = FoldICmpCstShlCst(I, Op0, A, CI, CI2)) return Inst; + } else if (I.isUnsigned()) { + // (icmp ult/ule/ugt/uge (shl const2, A), const1) + if (Instruction *Inst = FoldUICmpCstShlCst(I, Op0, A, CI, CI2)) + return Inst; } } Index: test/Transforms/InstCombine/icmp.ll =================================================================== --- test/Transforms/InstCombine/icmp.ll +++ test/Transforms/InstCombine/icmp.ll @@ -1511,3 +1511,145 @@ %cmp = icmp sle i32 %add, 0 ret i1 %cmp } + +define i1 @shl_ugt_both_equal(i32 %a) { +; CHECK-LABEL: @shl_ugt_both_equal( +; CHECK-NEXT: %cmp = icmp ne i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 4895, %a + %cmp = icmp ugt i32 %shl, 4895 + ret i1 %cmp +} + +define i1 @shl_uge_both_equal(i32 %a) { +; CHECK-LABEL: @shl_uge_both_equal( +; CHECK-NEXT: ret i1 true + %shl = shl i32 4895, %a + %cmp = icmp uge i32 %shl, 4895 + ret i1 %cmp +} + +define i1 @shl_ult_both_equal(i32 %a) { +; CHECK-LABEL: @shl_ult_both_equal( +; CHECK-NEXT: ret i1 false + %shl = shl i32 4895, %a + %cmp = icmp ult i32 %shl, 4895 + ret i1 %cmp +} + +define i1 @shl_ule_both_equal(i32 %a) { +; CHECK-LABEL: @shl_ule_both_equal( +; CHECK-NEXT: %cmp = icmp eq i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 4895, %a + %cmp = icmp ule i32 %shl, 4895 + ret i1 %cmp +} + +define i1 @shl_ugt_ap2_greater(i32 %a) { +; CHECK-LABEL: @shl_ugt_ap2_greater( +; CHECK-NEXT: %cmp = icmp ult i32 %a, 31 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 498, %a + %cmp = icmp ugt i32 %shl, 123 + ret i1 %cmp +} + +define i1 @shl_uge_ap2_greater(i32 %a) { +; CHECK-LABEL: @shl_uge_ap2_greater( +; CHECK-NEXT: %cmp = icmp ult i32 %a, 31 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 498, %a + %cmp = icmp uge i32 %shl, 123 + ret i1 %cmp +} + +define i1 @shl_ult_ap2_greater(i32 %a) { +; CHECK-LABEL: @shl_ult_ap2_greater( +; CHECK-NEXT: %cmp = icmp ugt i32 %a, 30 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 498, %a + %cmp = icmp ult i32 %shl, 123 + ret i1 %cmp +} + +define i1 @shl_ule_ap2_greater(i32 %a) { +; CHECK-LABEL: @shl_ule_ap2_greater( +; CHECK-NEXT: %cmp = icmp ugt i32 %a, 30 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 498, %a + %cmp = icmp ule i32 %shl, 123 + ret i1 %cmp +} + +define i1 @shl_ugt_ap1_greater_1(i32 %a) { +; CHECK-LABEL: @shl_ugt_ap1_greater_1( +; CHECK-NEXT: %cmp = icmp ne i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp ugt i32 %shl, 108 + ret i1 %cmp +} + +define i1 @shl_uge_ap1_greater_1(i32 %a) { +; CHECK-LABEL: @shl_uge_ap1_greater_1( +; CHECK-NEXT: %cmp = icmp ne i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp uge i32 %shl, 108 + ret i1 %cmp +} + +define i1 @shl_ult_ap1_greater_1(i32 %a) { +; CHECK-LABEL: @shl_ult_ap1_greater_1( +; CHECK-NEXT: %cmp = icmp eq i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp ult i32 %shl, 108 + ret i1 %cmp +} + +define i1 @shl_ule_ap1_greater_1(i32 %a) { +; CHECK-LABEL: @shl_ule_ap1_greater_1( +; CHECK-NEXT: %cmp = icmp eq i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp ule i32 %shl, 108 + ret i1 %cmp +} + +define i1 @shl_ugt_ap1_greater_2(i32 %a) { +; CHECK-LABEL: @shl_ugt_ap1_greater_2( +; CHECK-NEXT: %cmp = icmp ugt i32 %a, 1 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp ugt i32 %shl, 150 + ret i1 %cmp +} + +define i1 @shl_uge_ap1_greater_2(i32 %a) { +; CHECK-LABEL: @shl_uge_ap1_greater_2( +; CHECK-NEXT: %cmp = icmp ne i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp uge i32 %shl, 150 + ret i1 %cmp +} + +define i1 @shl_ult_ap1_greater_2(i32 %a) { +; CHECK-LABEL: @shl_ult_ap1_greater_2( +; CHECK-NEXT: %cmp = icmp eq i32 %a, 0 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp ult i32 %shl, 150 + ret i1 %cmp +} + +define i1 @shl_ule_ap1_greater_2(i32 %a) { +; CHECK-LABEL: @shl_ule_ap1_greater_2( +; CHECK-NEXT: %cmp = icmp ult i32 %a, 2 +; CHECK-NEXT: ret i1 %cmp + %shl = shl i32 75, %a + %cmp = icmp ule i32 %shl, 150 + ret i1 %cmp +}