diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -6769,6 +6769,35 @@ if (Instruction *Res = foldICmpPow2Test(I, Builder)) return Res; + { + // Power of 2 test: + // isPow2OrZero : ctpop(X) u< 2 + // isPow2 : ctpop(X) == 1 + // NotPow2OrZero: ctpop(X) u> 1 + // NotPow2 : ctpop(X) != 1 + // If we know any bit of X can be folded to: + // IsPow2 : X & (~Bit) == 0 + // NotPow2 : X & (~Bit) != 0 + Value *A; + const APInt *C; + if (match(Op0, m_OneUse(m_Intrinsic(m_Value(A)))) && + match(Op1, m_APInt(C)) && + (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && *C == 1) || + (Pred == ICmpInst::ICMP_ULT && *C == 2))) { + KnownBits AKnown = computeKnownBits(A, /*Depth*/ 0, &I); + // No need to check for count > 1, that should be already constant folded. + if (AKnown.countMinPopulation() == 1) { + Value *And = Builder.CreateAnd( + A, Constant::getIntegerValue(A->getType(), ~(AKnown.One))); + return new ICmpInst( + (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT) + ? ICmpInst::ICMP_EQ + : ICmpInst::ICMP_NE, + And, Constant::getNullValue(A->getType())); + } + } + } + if (Instruction *Res = foldICmpOfUAddOv(I)) return Res; diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -1213,9 +1213,8 @@ declare <2 x i32> @llvm.ctpop.2xi32(<2 x i32>) define i1 @is_pow2_nz_known_bits(i32 %xin) { ; CHECK-LABEL: @is_pow2_nz_known_bits( -; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 64 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]] -; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CNT]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[XIN:%.*]], -65 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = or i32 %xin, 64 @@ -1227,7 +1226,7 @@ define i1 @is_pow2_nz_known_bits_fail_multiuse(i32 %xin) { ; CHECK-LABEL: @is_pow2_nz_known_bits_fail_multiuse( ; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 64 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]] +; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3:![0-9]+]] ; CHECK-NEXT: call void @use.i32(i32 [[CNT]]) ; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[CNT]], 1 ; CHECK-NEXT: ret i1 [[R]] @@ -1241,9 +1240,7 @@ define i1 @not_pow2_nz_known_bits(i32 %xin) { ; CHECK-LABEL: @not_pow2_nz_known_bits( -; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], 1 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[CNT]], 1 +; CHECK-NEXT: [[R:%.*]] = icmp ugt i32 [[XIN:%.*]], 1 ; CHECK-NEXT: ret i1 [[R]] ; %x = or i32 %xin, 1 @@ -1267,9 +1264,8 @@ define i1 @is_pow2_or_z_known_bits(i32 %xin) { ; CHECK-LABEL: @is_pow2_or_z_known_bits( -; CHECK-NEXT: [[X:%.*]] = or i32 [[XIN:%.*]], -2147483648 -; CHECK-NEXT: [[CNT:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range [[RNG3]] -; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[CNT]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[XIN:%.*]], 2147483647 +; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = or i32 %xin, 2147483648 @@ -1280,9 +1276,8 @@ define <2 x i1> @not_pow2_or_z_known_bits(<2 x i32> %xin) { ; CHECK-LABEL: @not_pow2_or_z_known_bits( -; CHECK-NEXT: [[X:%.*]] = or <2 x i32> [[XIN:%.*]], -; CHECK-NEXT: [[CNT:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X]]), !range [[RNG3]] -; CHECK-NEXT: [[R:%.*]] = icmp ugt <2 x i32> [[CNT]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[XIN:%.*]], +; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[R]] ; %x = or <2 x i32> %xin,