diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1083,6 +1083,22 @@ computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); + // Common idiom blsi is: and(x, -x) is common idiom for clearing all but + // lowest set bit. If we have a single known bit in x, we can clear all bits + // above it. + // TODO: instcombine often reassociates independent `and` which can hide + // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x). + if (!Known.One.isZero() || !Known2.One.isZero()) { + if (match(I->getOperand(0), m_Neg(m_Specific(I->getOperand(1)))) || + match(I->getOperand(1), m_Neg(m_Specific(I->getOperand(0))))) { + unsigned MinBit = std::min(Known.One.countTrailingZeros(), + Known2.One.countTrailingZeros()); + APInt Mask = APInt::getZero(Known.One.getBitWidth()); + Mask.setBits(MinBit + 1, Mask.getBitWidth()); + Known.Zero |= Mask; + Known.One &= (~Mask); + } + } Known &= Known2; // and(x, add (x, -1)) is a common idiom that always clears the low bit; @@ -1110,7 +1126,27 @@ computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q); - Known ^= Known2; + // Common idiom is blsmsk: xor(x, x + -1). This will clear all bits above + // the lowest set bit. We can safely say any bit past the lowest known one + // must be zero. + // TODO: `x + -1` is often shrunk `x + C` which `C` is minimum bits needed + // for demanded. This can cause us to miss this pattern. Expand to account + // for `x + -1` in the context of demanded bits. + if ((!Known.One.isZero() || !Known2.One.isZero()) && + (match(I->getOperand(0), + m_c_Add(m_AllOnes(), m_Specific(I->getOperand(1)))) || + match(I->getOperand(1), + m_c_Add(m_AllOnes(), m_Specific(I->getOperand(0)))))) { + unsigned MinBit = std::min(Known.One.countTrailingZeros(), + Known2.One.countTrailingZeros()); + APInt Mask = APInt::getZero(Known.One.getBitWidth()); + Mask.setBits(MinBit + 1, Mask.getBitWidth()); + Known ^= Known2; + Known.Zero |= Mask; + Known.One &= (~Mask); + } else { + Known ^= Known2; + } break; case Instruction::Mul: { bool NSW = Q.IIQ.hasNoSignedWrap(cast(I)); diff --git a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll --- a/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll +++ b/llvm/test/Analysis/ValueTracking/knownbits-bmi-pattern.ll @@ -58,11 +58,7 @@ define i1 @blsmsk_signed_is_false(i32 %x) { ; CHECK-LABEL: @blsmsk_signed_is_false( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 10 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt i32 [[X3]], 0 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %x1 = or i32 %x, 10 %x2 = sub i32 %x1, 1 @@ -74,11 +70,7 @@ define i32 @blsmsk_add_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_add_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 9 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 9 %x2 = sub i32 %x1, 1 @@ -90,9 +82,9 @@ define i32 @blsmsk_sub_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_sub_eval( ; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 9 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 +; CHECK-NEXT: [[X2:%.*]] = add i32 [[X1]], 31 ; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], -32 +; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], -32 ; CHECK-NEXT: ret i32 [[Z]] ; %x1 = or i32 %x, 9 @@ -105,11 +97,7 @@ define i32 @blsmsk_or_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_or_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 129 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X1]] -; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 129 %x2 = sub i32 %x1, 1 @@ -120,11 +108,7 @@ define i32 @blsmsk_xor_eval(i32 %x) { ; CHECK-LABEL: @blsmsk_xor_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 255 -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X1]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X1]] -; CHECK-NEXT: [[Z1:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z1]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 255 %x2 = sub i32 %x1, 1 @@ -165,10 +149,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %lb = and i32 %x, 4 %cmp = icmp ne i32 %lb, 0 @@ -184,10 +165,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 true ; %lb = and i32 %x, 2 %cmp = icmp ne i32 %lb, 0 @@ -242,10 +220,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt i32 [[X3]], 0 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %lb = and i32 %x, 2 %cmp = icmp ne i32 %lb, 0 @@ -262,10 +237,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -281,9 +253,9 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 +; CHECK-NEXT: [[X2:%.*]] = add i32 [[X]], 31 ; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], -32 +; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], -32 ; CHECK-NEXT: ret i32 [[Z]] ; %lb = and i32 %x, 1 @@ -301,10 +273,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -320,10 +289,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[X]], -1 -; CHECK-NEXT: [[X3:%.*]] = xor i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = xor i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -428,11 +394,7 @@ define i1 @blsi_signed_is_false(i32 %x) { ; CHECK-LABEL: @blsi_signed_is_false( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 10 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt i32 [[X3]], 0 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %x1 = or i32 %x, 10 %x2 = sub i32 0, %x1 @@ -444,11 +406,7 @@ define i32 @blsi_add_eval(i32 %x) { ; CHECK-LABEL: @blsi_add_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 9 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 9 %x2 = sub i32 0, %x1 @@ -459,11 +417,7 @@ define i32 @blsi_sub_eval(i32 %x) { ; CHECK-LABEL: @blsi_sub_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 33 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], -32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 -31 ; %x1 = or i32 %x, 33 %x2 = sub i32 0, %x1 @@ -475,11 +429,7 @@ define i32 @blsi_or_eval(i32 %x) { ; CHECK-LABEL: @blsi_or_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 129 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 129 %x2 = sub i32 0, %x1 @@ -490,11 +440,7 @@ define i32 @blsi_xor_eval(i32 %x) { ; CHECK-LABEL: @blsi_xor_eval( -; CHECK-NEXT: [[X1:%.*]] = or i32 [[X:%.*]], 255 -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X1]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X1]], [[X2]] -; CHECK-NEXT: [[Z1:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z1]] +; CHECK-NEXT: ret i32 33 ; %x1 = or i32 %x, 255 %x2 = sub i32 0, %x1 @@ -551,10 +497,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp eq i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %lb = and i32 %x, 2 %cmp = icmp ne i32 %lb, 0 @@ -570,10 +513,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[X3]], 8 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 true ; %lb = and i32 %x, 4 %cmp = icmp ne i32 %lb, 0 @@ -627,10 +567,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = icmp slt i32 [[X3]], 0 -; CHECK-NEXT: ret i1 [[Z]] +; CHECK-NEXT: ret i1 false ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -647,10 +584,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -666,10 +600,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = add i32 [[X3]], -32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 -31 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -686,10 +617,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = or i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -705,10 +633,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = xor i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 33 ; %lb = and i32 %x, 1 %cmp = icmp ne i32 %lb, 0 @@ -725,10 +650,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 8 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = and i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 0 ; %lb = and i32 %x, 8 %cmp = icmp ne i32 %lb, 0 @@ -744,10 +666,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = and i32 [[X3]], 32 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 0 ; %lb = and i32 %x, 4 %cmp = icmp ne i32 %lb, 0 @@ -764,10 +683,7 @@ ; CHECK-NEXT: [[LB:%.*]] = and i32 [[X:%.*]], 2 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[LB]], 0 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[X2:%.*]] = sub nsw i32 0, [[X]] -; CHECK-NEXT: [[X3:%.*]] = and i32 [[X2]], [[X]] -; CHECK-NEXT: [[Z:%.*]] = and i32 [[X3]], 240 -; CHECK-NEXT: ret i32 [[Z]] +; CHECK-NEXT: ret i32 0 ; %lb = and i32 %x, 2 %cmp = icmp ne i32 %lb, 0 diff --git a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll --- a/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll +++ b/llvm/test/Transforms/InstSimplify/ctpop-pow2.ll @@ -44,8 +44,7 @@ ; CHECK-NEXT: [[X1:%.*]] = or i8 [[X:%.*]], 1 ; CHECK-NEXT: [[V0:%.*]] = sub i8 0, [[X1]] ; CHECK-NEXT: [[V1:%.*]] = and i8 [[X1]], [[V0]] -; CHECK-NEXT: [[CNT:%.*]] = call i8 @llvm.ctpop.i8(i8 [[V1]]) -; CHECK-NEXT: ret i8 [[CNT]] +; CHECK-NEXT: ret i8 [[V1]] ; %x1 = or i8 %x, 1 %v0 = sub i8 0, %x1