diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -3829,23 +3829,21 @@ return new ICmpInst(Pred, A, B); // Canonicalize checking for a power-of-2-or-zero value: - // (A & -A) == A --> (A & (A - 1)) == 0 - // (-A & A) == A --> (A & (A - 1)) == 0 - // A == (A & -A) --> (A & (A - 1)) == 0 - // A == (-A & A) --> (A & (A - 1)) == 0 - // TODO: This could be reduced by using the ctpop intrinsic. + // (A & -A) == A --> ctpop(A) < 2 (four commuted variants) + // (-A & A) != A --> ctpop(A) > 1 (four commuted variants) A = nullptr; - if (match(Op0, m_OneUse(m_c_And(m_OneUse(m_Neg(m_Specific(Op1))), - m_Specific(Op1))))) + if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1))))) A = Op1; - else if (match(Op1, m_OneUse(m_c_And(m_OneUse(m_Neg(m_Specific(Op0))), - m_Specific(Op0))))) + else if (match(Op1, + m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0))))) A = Op0; + if (A) { Type *Ty = A->getType(); - Value *Dec = Builder.CreateAdd(A, ConstantInt::getAllOnesValue(Ty)); - Value *And = Builder.CreateAnd(A, Dec); - return new ICmpInst(Pred, And, ConstantInt::getNullValue(Ty)); + CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A); + return Pred == ICmpInst::ICMP_EQ + ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, ConstantInt::get(Ty, 2)) + : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1)); } return nullptr; diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll --- a/llvm/test/Transforms/InstCombine/ispow2.ll +++ b/llvm/test/Transforms/InstCombine/ispow2.ll @@ -3,9 +3,8 @@ define i1 @is_pow2or0_negate_op(i32 %x) { ; CHECK-LABEL: @is_pow2or0_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; %neg = sub i32 0, %x @@ -16,9 +15,8 @@ define <2 x i1> @is_pow2or0_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @is_pow2or0_negate_op_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -55,9 +53,8 @@ define i1 @isnot_pow2or0_negate_op(i32 %x) { ; CHECK-LABEL: @isnot_pow2or0_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %neg = sub i32 0, %x @@ -68,9 +65,8 @@ define <2 x i1> @isnot_pow2or0_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @isnot_pow2or0_negate_op_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %neg = sub <2 x i32> zeroinitializer, %x @@ -108,9 +104,8 @@ define i1 @is_pow2or0_negate_op_commute1(i32 %p) { ; CHECK-LABEL: @is_pow2or0_negate_op_commute1( ; CHECK-NEXT: [[X:%.*]] = srem i32 42, [[P:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], [[TMP1]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; %x = srem i32 42, %p ; thwart complexity-based canonicalization @@ -125,9 +120,8 @@ define i1 @isnot_pow2or0_negate_op_commute2(i32 %p) { ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute2( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[P:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], [[TMP1]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %x = urem i32 42, %p ; thwart complexity-based canonicalization @@ -140,9 +134,8 @@ define i1 @isnot_pow2or0_negate_op_commute3(i32 %p) { ; CHECK-LABEL: @isnot_pow2or0_negate_op_commute3( ; CHECK-NEXT: [[X:%.*]] = urem i32 42, [[P:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], [[TMP1]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP1]], 1 ; CHECK-NEXT: ret i1 [[CMP]] ; %x = urem i32 42, %p ; thwart complexity-based canonicalization @@ -158,8 +151,8 @@ ; CHECK-LABEL: @is_pow2or0_negate_op_extra_use1( ; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[X:%.*]] ; CHECK-NEXT: call void @use(i32 [[NEG]]) -; CHECK-NEXT: [[AND:%.*]] = and i32 [[NEG]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], [[X]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X]]), !range !0 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; %neg = sub i32 0, %x @@ -428,12 +421,9 @@ define i1 @is_pow2_negate_op(i32 %x) { ; CHECK-LABEL: @is_pow2_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0 -; CHECK-NEXT: [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] ; %neg = sub i32 0, %x %and = and i32 %neg, %x @@ -445,12 +435,9 @@ define <2 x i1> @is_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @is_pow2_negate_op_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[NOTZERO:%.*]] = icmp ne <2 x i32> [[X]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[CMP]], [[NOTZERO]] -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %neg = sub <2 x i32> zeroinitializer, %x %and = and <2 x i32> %neg, %x @@ -496,12 +483,9 @@ define i1 @isnot_pow2_negate_op(i32 %x) { ; CHECK-LABEL: @isnot_pow2_negate_op( -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq i32 [[X]], 0 -; CHECK-NEXT: [[R:%.*]] = or i1 [[CMP]], [[ISZERO]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range !0 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 1 +; CHECK-NEXT: ret i1 [[TMP2]] ; %neg = sub i32 0, %x %and = and i32 %neg, %x @@ -513,12 +497,9 @@ define <2 x i1> @isnot_pow2_negate_op_vec(<2 x i32> %x) { ; CHECK-LABEL: @isnot_pow2_negate_op_vec( -; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[X]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer -; CHECK-NEXT: [[ISZERO:%.*]] = icmp eq <2 x i32> [[X]], zeroinitializer -; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[ISZERO]], [[CMP]] -; CHECK-NEXT: ret <2 x i1> [[R]] +; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %neg = sub <2 x i32> zeroinitializer, %x %and = and <2 x i32> %neg, %x