diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -1415,6 +1415,16 @@ return &I; } + // Canonicalize ((A & -A) - 1) --> (~A & (A - 1)) + // Forms all commutable operations, and simplifies ctpop -> cttz folds. + if (match(&I, + m_Add(m_OneUse(m_c_And(m_Value(A), m_OneUse(m_Neg(m_Deferred(A))))), + m_AllOnes()))) { + Constant *AllOnes = ConstantInt::getAllOnesValue(RHS->getType()); + return BinaryOperator::CreateAnd(Builder.CreateXor(A, AllOnes), + Builder.CreateAdd(A, AllOnes)); + } + // TODO(jingyue): Consider willNotOverflowSignedAdd and // willNotOverflowUnsignedAdd to reduce the number of invocations of // computeKnownBits. diff --git a/llvm/test/Transforms/InstCombine/add-mask-neg.ll b/llvm/test/Transforms/InstCombine/add-mask-neg.ll --- a/llvm/test/Transforms/InstCombine/add-mask-neg.ll +++ b/llvm/test/Transforms/InstCombine/add-mask-neg.ll @@ -2,14 +2,14 @@ ; RUN: opt < %s -S -passes=instcombine | FileCheck %s ; -; TODO: Canonicalize ((X & -X) - 1) --> (~X & (X - 1)) +; Canonicalize ((X & -X) - 1) --> (~X & (X - 1)) ; define i32 @dec_mask_neg_i32(i32 %X) { ; CHECK-LABEL: @dec_mask_neg_i32( -; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and i32 [[NEG]], [[X]] -; CHECK-NEXT: [[DEC:%.*]] = add i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[X]], -1 +; CHECK-NEXT: [[DEC:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[DEC]] ; %neg = sub i32 0, %X @@ -21,9 +21,9 @@ define i32 @dec_mask_commute_neg_i32(i32 %A) { ; CHECK-LABEL: @dec_mask_commute_neg_i32( ; CHECK-NEXT: [[X:%.*]] = sdiv i32 42, [[A:%.*]] -; CHECK-NEXT: [[NEG:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[MASK:%.*]] = and i32 [[X]], [[NEG]] -; CHECK-NEXT: [[DEC:%.*]] = add i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[X]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[X]], -1 +; CHECK-NEXT: [[DEC:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[DEC]] ; %X = sdiv i32 42, %A ; thwart complexity-based canonicalization @@ -35,9 +35,9 @@ define i32 @dec_commute_mask_neg_i32(i32 %X) { ; CHECK-LABEL: @dec_commute_mask_neg_i32( -; CHECK-NEXT: [[NEG:%.*]] = sub i32 0, [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and i32 [[NEG]], [[X]] -; CHECK-NEXT: [[DEC:%.*]] = add i32 [[MASK]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[X]], -1 +; CHECK-NEXT: [[DEC:%.*]] = and i32 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i32 [[DEC]] ; %neg = sub i32 0, %X @@ -78,9 +78,9 @@ define <2 x i32> @dec_mask_neg_v2i32(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_neg_v2i32( -; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[NEG]], [[X]] -; CHECK-NEXT: [[DEC:%.*]] = add <2 x i32> [[MASK]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[DEC]] ; %neg = sub <2 x i32> zeroinitializer, %X @@ -91,9 +91,9 @@ define <2 x i32> @dec_mask_neg_v2i32_undef(<2 x i32> %X) { ; CHECK-LABEL: @dec_mask_neg_v2i32_undef( -; CHECK-NEXT: [[NEG:%.*]] = sub <2 x i32> zeroinitializer, [[X:%.*]] -; CHECK-NEXT: [[MASK:%.*]] = and <2 x i32> [[NEG]], [[X]] -; CHECK-NEXT: [[DEC:%.*]] = add <2 x i32> [[MASK]], +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[DEC:%.*]] = and <2 x i32> [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret <2 x i32> [[DEC]] ; %neg = sub <2 x i32> zeroinitializer, %X diff --git a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll --- a/llvm/test/Transforms/InstCombine/ctpop-cttz.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-cttz.ll @@ -94,11 +94,8 @@ ; __builtin_popcount((i & -i) - 1) -> __builtin_cttz(i, false) define i32 @ctpop3(i32 %0) { ; CHECK-LABEL: @ctpop3( -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 0, [[TMP0:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP3]], -1 -; CHECK-NEXT: [[TMP5:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[TMP4]]), !range [[RNG0]] -; CHECK-NEXT: ret i32 [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.cttz.i32(i32 [[TMP0:%.*]], i1 false), !range [[RNG0]] +; CHECK-NEXT: ret i32 [[TMP2]] ; %2 = sub i32 0, %0 %3 = and i32 %2, %0 @@ -109,11 +106,8 @@ define <2 x i32> @ctpop3v(<2 x i32> %0) { ; CHECK-LABEL: @ctpop3v( -; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]]) -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = sub <2 x i32> zeroinitializer, %0 %3 = and <2 x i32> %2, %0 @@ -124,11 +118,8 @@ define <2 x i32> @ctpop3v_undef(<2 x i32> %0) { ; CHECK-LABEL: @ctpop3v_undef( -; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i32> [[TMP2]], [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = tail call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> [[TMP4]]) -; CHECK-NEXT: ret <2 x i32> [[TMP5]] +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[TMP0:%.*]], i1 false) +; CHECK-NEXT: ret <2 x i32> [[TMP2]] ; %2 = sub <2 x i32> zeroinitializer, %0 %3 = and <2 x i32> %2, %0