diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1077,6 +1077,20 @@ computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); + Value *X = nullptr, *Y = nullptr; + // and(x, -x) is a common idiom for clearing all but lowest set bit. If we + // have a single known bit in x, we can clear all bits above it. + // TODO: instcombine often reassociates independent `and` which can hide + // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). + if (!Known.One.isZero() || !Known2.One.isZero()) { + if (match(I, m_c_BinOp(m_Value(X), m_Neg(m_Deferred(X))))) { + // -(-x) == x so pick whichever we can get a better result with. + if (Known.countMaxTrailingZeros() <= Known2.countMaxTrailingZeros()) + Known = Known.blsi(); + else + Known = Known2.blsi(); + } + } Known &= Known2; // and(x, add (x, -1)) is a common idiom that always clears the low bit; @@ -1084,7 +1098,6 @@ // matching the form add(x, add(x, y)) where y is odd. // TODO: This could be generalized to clearing any bit set in y where the // following bit is known to be unset in y. - Value *X = nullptr, *Y = nullptr; if (!Known.Zero[0] && !Known.One[0] && match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) { Known2.resetAll(); @@ -1100,12 +1113,26 @@ Known |= Known2; break; - case Instruction::Xor: + case Instruction::Xor: { computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - Known ^= Known2; - break; + Value *X = nullptr; + // xor(x, x + -1) is a common idiom that will clear all bits above + // the lowest set bit. We can safely say any bit past the lowest + // known one must be zero. + // TODO: `x + -1` is often shrunk `x + C` which `C` is minimum bits needed + // for demanded. This can cause us to miss this pattern. Expand to account + // for `x + -1` in the context of demanded bits. + if ((!Known.One.isZero() || !Known2.One.isZero()) && + match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_AllOnes())))) { + // Known2 is confusingly LHS. + const KnownBits &XBits = I->getOperand(0) == X ? Known2 : Known; + Known = XBits.blsmsk(); + } else { + Known ^= Known2; + } + } break; case Instruction::Mul: { bool NSW = Q.IIQ.hasNoSignedWrap(cast(I)); computeKnownBitsMul(I->getOperand(0), I->getOperand(1), NSW, DemandedElts, diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll --- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll @@ -27,11 +27,7 @@ define <2 x i1> @blsmsk_ne_is_true_diff_vec(<2 x i32> %x) { ; CHECK-LABEL: @blsmsk_ne_is_true_diff_vec( -; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[X2:%.*]] = add nsw <2 x i32> [[X1]], -; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = icmp ne <2 x i32> [[X3]], -; CHECK-NEXT: ret <2 x i1> [[Z]] +; CHECK-NEXT: ret <2 x i1> ; %x1 = or <2 x i32> %x, %x2 = sub <2 x i32> %x1, @@ -72,11 +68,7 @@ define i1 @blsmsk_signed_is_false(i32 %x) { ; CHECK-LABEL: @blsmsk_signed_is_false( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 10 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt i32 [[X3]], 0 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %x1 = or i32 %x, 10 %x2 = sub i32 %x1, 1 @@ -87,11 +79,7 @@ define i32 @blsmsk_add_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_add_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 9 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 9 %x2 = sub i32 %x1, 1 @@ -105,7 +93,7 @@ ; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[X2:%.*]] = add nsw <2 x i32> [[X1]], ; CHECK-NEXT: [[X3:%.*]] = xor <2 x i32> [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -118,9 +106,9 @@ define i32 @blsmsk_sub_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_sub_eval( ; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 9 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 +; CHECK-NEXT: [[X2:%.*]] = add i32 [[X1]], 31 ; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], -32 +; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], -32 ; CHECK-NEXT: ret i32 [[Z]] ; %x1 = or i32 %x, 9 @@ -132,11 +120,7 @@ define i32 @blsmsk_or_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_or_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 129 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 129 %x2 = sub i32 %x1, 1 @@ -162,11 +146,7 @@ define i32 @blsmsk_xor_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_xor_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 255 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z1:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z1]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 255 %x2 = sub i32 %x1, 1 @@ -232,10 +212,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %lb = and i32 %x, 4 %cmp = icmp ne i32 %lb, 0 @@ -294,10 +271,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -337,9 +311,9 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 +; CHECK-NEXT: [[X2:%.*]] = add i32 [[X]], 31 ; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], -32 +; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], -32 ; CHECK-NEXT: ret i32 [[Z]] ; %lb = and i32 %x, 1 @@ -356,10 +330,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -484,11 +455,7 @@ define i32 @blsi_add_eval(i32 %x) { ; CHECK-LABEL: @blsi_add_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 9 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 9 %x2 = sub i32 0, %x1 @@ -499,11 +466,7 @@ define i32 @blsi_sub_eval(i32 %x) { ; CHECK-LABEL: @blsi_sub_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 33 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], -32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 -31 ; %x1 = or i32 %x, 33 %x2 = sub i32 0, %x1 @@ -517,7 +480,7 @@ ; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], ; CHECK-NEXT: [[X2:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] ; CHECK-NEXT: [[X3:%.*]] = and <2 x i32> [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = add <2 x i32> [[X3]], +; CHECK-NEXT: [[Z:%.*]] = or <2 x i32> [[X3]], ; CHECK-NEXT: ret <2 x i32> [[Z]] ; %x1 = or <2 x i32> %x, @@ -529,11 +492,7 @@ define i32 @blsi_or_eval(i32 %x) { ; CHECK-LABEL: @blsi_or_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 129 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 129 %x2 = sub i32 0, %x1 @@ -628,10 +587,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 true ; %lb = and i32 %x, 4 %cmp = icmp ne i32 %lb, 0 @@ -719,10 +675,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = xor i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -738,10 +691,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = and i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 0 ; %lb = and i32 %x, 8 %cmp = icmp ne i32 %lb, 0 @@ -839,7 +789,7 @@ ; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 256 ; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 ; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 +; CHECK-NEXT: [[Z:%.*]] = add nuw nsw i32 [[X3]], 32 ; CHECK-NEXT: ret i32 [[Z]] ; %x1 = or i32 %x, 256 diff --git a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll --- a/llvm/test/Transforms/InstCombine/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstCombine/ctpop-pow2.ll @@ -144,7 +144,10 @@ define <2 x i32> @ctpop_x_and_negx_vec_nz(<2 x i32> %x) { ; CHECK-LABEL: @ctpop_x_and_negx_vec_nz( -; CHECK-NEXT: ret <2 x i32> +; CHECK-NEXT: [[X1:%.*]] = or <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[SUB:%.*]] = sub nsw <2 x i32> zeroinitializer, [[X1]] +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[X1]], [[SUB]] +; CHECK-NEXT: ret <2 x i32> [[AND]] ; %x1 = or <2 x i32> %x, %sub = sub <2 x i32> , %x1 diff --git a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll --- a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll @@ -44,8 +44,7 @@ ; CHECK-NEXT: [[X1:%.*]] = or i8 [[X:%.*]], 1 ; CHECK-NEXT: [[V0:%.*]] = sub i8 0, [[X1]] ; CHECK-NEXT: [[V1:%.*]] = and i8 [[X1]], [[V0]] -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[V1]]) -; CHECK-NEXT: ret i8 [[CNT]] +; CHECK-NEXT: ret i8 [[V1]] ; %x1 = or i8 %x, 1 %v0 = sub i8 0, %x1