diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3335,6 +3335,44 @@ return nullptr; } +static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs, + const APInt &CRhs, + InstCombiner::BuilderTy &Builder, + const SimplifyQuery &Q) { + assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop && + "Non-ctpop intrin in ctpop fold"); + if (!CtpopLhs->hasOneUse()) + return nullptr; + + // Power of 2 test: + // isPow2OrZero : ctpop(X) u< 2 + // isPow2 : ctpop(X) == 1 + // NotPow2OrZero: ctpop(X) u> 1 + // NotPow2 : ctpop(X) != 1 + // If we know any bit of X can be folded to: + // IsPow2 : X & (~Bit) == 0 + // NotPow2 : X & (~Bit) != 0 + const ICmpInst::Predicate Pred = I.getPredicate(); + if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) || + (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) { + Value *Op = CtpopLhs->getArgOperand(0); + KnownBits OpKnown = llvm::computeKnownBits(Op, Q.DL, + /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT); + // No need to check for count > 1, that should be already constant folded. + if (OpKnown.countMinPopulation() == 1) { + Value *And = Builder.CreateAnd( + Op, Constant::getIntegerValue(Op->getType(), ~(OpKnown.One))); + return new ICmpInst( + (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT) + ? ICmpInst::ICMP_EQ + : ICmpInst::ICMP_NE, + And, Constant::getNullValue(Op->getType())); + } + } + + return nullptr; +} + /// Fold an equality icmp with LLVM intrinsic and constant operand. Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant( ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) { @@ -3376,8 +3414,8 @@ APInt Mask1 = IsTrailing ? APInt::getLowBitsSet(BitWidth, Num + 1) : APInt::getHighBitsSet(BitWidth, Num + 1); APInt Mask2 = IsTrailing - ? APInt::getOneBitSet(BitWidth, Num) - : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); + ? APInt::getOneBitSet(BitWidth, Num) + : APInt::getOneBitSet(BitWidth, BitWidth - Num - 1); return new ICmpInst(Pred, Builder.CreateAnd(II->getArgOperand(0), Mask1), ConstantInt::get(Ty, Mask2)); } @@ -3392,6 +3430,9 @@ return new ICmpInst(Pred, II->getArgOperand(0), IsZero ? Constant::getNullValue(Ty) : Constant::getAllOnesValue(Ty)); + const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); + if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q)) + return R; break; } @@ -3600,6 +3641,11 @@ if (C == BitWidth && Pred == ICmpInst::ICMP_ULT) return CmpInst::Create(Instruction::ICmp, ICmpInst::ICMP_NE, X, ConstantInt::getAllOnesValue(Ty)); + + const SimplifyQuery Q = SQ.getWithInstruction(&Cmp); + if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q)) + return R; + break; } case Intrinsic::ctlz: { diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -1213,9 +1213,8 @@ declare <2 x i32> @llvm.ctpop.2xi32(<2 x i32>) define i1 @is_pow2_nz_known_bits(i32 %xin) { ; CHECK-LABEL: @is_pow2_nz_known_bits( -; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 64 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CNT]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[XIN:%.*]], -65 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = or i32 %xin, 64 @@ -1227,7 +1226,7 @@ define i1 @is_pow2_nz_known_bits_fail_multiuse(i32 %xin) { ; CHECK-LABEL: @is_pow2_nz_known_bits_fail_multiuse( ; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 64 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]] +; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]] ; CHECK-NEXT: call void @use.i32(i32 [[CNT]]) ; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CNT]], 1 ; CHECK-NEXT: ret i1 [[R]] @@ -1241,9 +1240,7 @@ define i1 @not_pow2_nz_known_bits(i32 %xin) { ; CHECK-LABEL: @not_pow2_nz_known_bits( -; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 1 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[CNT]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i32 [[XIN:%.*]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %x = or i32 %xin, 1 @@ -1267,9 +1264,8 @@ define i1 @is_pow2_or_z_known_bits(i32 %xin) { ; CHECK-LABEL: @is_pow2_or_z_known_bits( -; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], -2147483648 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]] -; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[CNT]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[XIN:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = or i32 %xin, 2147483648 @@ -1280,9 +1276,8 @@ define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) { ; CHECK-LABEL: @not_pow2_or_z_known_bits( -; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[XIN:%.*]], -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X]]), !range [[RNG3]] -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i32> [[CNT]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; %x = or <2 x i32> %xin,