Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2349,15 +2349,24 @@ // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. + // Output known-0 bits are also known if the top bits of each input are + // known to be clear. For example, if one input has the top 10 bits clear + // and the other has the top 8 bits clear, we know the top 7 bits of the + // output must be clear. computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); - unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + unsigned KnownZeroHigh = KnownZero2.countLeadingOnes(); + unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); - KnownZeroOut = std::min(KnownZeroOut, + KnownZeroHigh = std::min(KnownZeroHigh, + KnownZero2.countLeadingOnes()); + KnownZeroLow = std::min(KnownZeroLow, KnownZero2.countTrailingOnes()); if (Op.getOpcode() == ISD::ADD) { - KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow); + if (KnownZeroHigh > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1); break; } @@ -2365,8 +2374,8 @@ // information if we know (at least) that the low two bits are clear. We // then return to the caller that the low bit is unknown but that other bits // are known zero. - if (KnownZeroOut >= 2) // ADDE - KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut); + if (KnownZeroLow >= 2) // ADDE + KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow); break; } case ISD::SREM: Index: test/CodeGen/X86/add-and-knownbits.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/add-and-knownbits.ll @@ -0,0 +1,14 @@ +; RUN: llc -march=x86-64 < %s | FileCheck %s + +; Make sure the 'and' is folded away, since we can statically +; prove the add will not overflow the 17 low bits. +; CHECK-LABEL: add_and +; CHECK: leal 65535(%rdi), %eax +; CHECK-NEXT retq +define i32 @add_and(i16 zeroext %a) { + %b = zext i16 %a to i32 + %c = add i32 %b, 65535 + %d = and i32 %c, 131071 + ret i32 %d +} + Index: test/CodeGen/X86/win64_frame.ll =================================================================== --- test/CodeGen/X86/win64_frame.ll +++ test/CodeGen/X86/win64_frame.ll @@ -100,8 +100,9 @@ alloca i32, i32 %a ; CHECK: movl %ecx, %eax - ; CHECK: leaq 15(,%rax,4), %rax - ; CHECK: andq $-16, %rax + ; CHECK: leaq 15(,%rax,4), %rcx + ; CHECK: movabsq $34359738352, %rax + ; CHECK: andq %rcx, %rax ; CHECK: callq __chkstk ; CHECK: subq %rax, %rsp