diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -565,8 +565,11 @@ } // ctpop(~x & (x - 1)) -> cttz(x, false) + // ctpop((x & -x ) - 1) -> cttz(x, false) if (match(Op0, - m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { + m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes()))) || + match(Op0, + m_Add(m_c_And(m_Value(X), m_Neg(m_Deferred(X))), m_AllOnes()))) { Function *F = Intrinsic::getDeclaration(II.getModule(), Intrinsic::cttz, Ty); return CallInst::Create(F, {X, IC.Builder.getFalse()}); diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll --- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll @@ -94,11 +94,8 @@ ; __builtin_popcount((i & -i) - 1) -> __builtin_cttz(i, false) define i32 @ctpop3(i32 %0) { ; CHECK-LABEL: @ctpop3( -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP4]]), !range [[RNG0]] -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range [[RNG0]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %2 = sub i32 0, %0 %3 = and i32 %2, %0 @@ -109,11 +106,8 @@ define <2 x i32> @ctpop3v(<2 x i32> %0) { ; CHECK-LABEL: @ctpop3v( -; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]]) -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = sub <2 x i32> zeroinitializer, %0 %3 = and <2 x i32> %2, %0 @@ -124,11 +118,8 @@ define <2 x i32> @ctpop3v_undef(<2 x i32> %0) { ; CHECK-LABEL: @ctpop3v_undef( -; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]]) -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = sub <2 x i32> zeroinitializer, %0 %3 = and <2 x i32> %2, %0