diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5083,6 +5083,22 @@ if (Depth >= MaxRecursionDepth) return false; // Limit search depth. + auto PeekThroughCasts = [](SDValue V) { + while (true) { + switch (V.getOpcode()) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ANY_EXTEND: + V = V.getOperand(0); + break; + default: + return V; + } + } + }; + + Op = PeekThroughCasts(Op); + assert(!Op.getValueType().isFloatingPoint() && "Floating point types unsupported - use isKnownNeverZeroFloat"); @@ -5106,21 +5122,28 @@ return isKnownNeverZero(Op.getOperand(1), Depth + 1) && isKnownNeverZero(Op.getOperand(2), Depth + 1); - case ISD::SHL: + case ISD::SHL: { if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) return isKnownNeverZero(Op.getOperand(0), Depth + 1); - + KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); // 1 << X is never zero. TODO: This can be expanded if we can bound X. // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero() - if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0]) + if (ValKnown.One[0]) return true; - break; + APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); + if (MaxCnt.ule(ValKnown.getBitWidth()) && + !ValKnown.One.shl(MaxCnt).isZero()) + return true; + break; + } case ISD::UADDSAT: case ISD::UMAX: return isKnownNeverZero(Op.getOperand(1), Depth + 1) || isKnownNeverZero(Op.getOperand(0), Depth + 1); + case ISD::SMAX: + case ISD::SMIN: case ISD::UMIN: return isKnownNeverZero(Op.getOperand(1), Depth + 1) && isKnownNeverZero(Op.getOperand(0), Depth + 1); @@ -5134,16 +5157,21 @@ return isKnownNeverZero(Op.getOperand(0), Depth + 1); case ISD::SRA: - case ISD::SRL: + case ISD::SRL: { if (Op->getFlags().hasExact()) return isKnownNeverZero(Op.getOperand(0), Depth + 1); + KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); // Signed >> X is never zero. TODO: This can be expanded if we can bound X. // The expression is really // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero() - if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative()) + if (ValKnown.isNegative()) + return true; + APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); + if (MaxCnt.ule(ValKnown.getBitWidth()) && + !ValKnown.One.lshr(MaxCnt).isZero()) return true; break; - + } case ISD::UDIV: case ISD::SDIV: // div exact can only produce a zero if the dividend is zero. @@ -5176,10 +5204,6 @@ isKnownNeverZero(Op.getOperand(0), Depth + 1)) return true; break; - - case ISD::ZERO_EXTEND: - case ISD::SIGN_EXTEND: - return isKnownNeverZero(Op.getOperand(0), Depth + 1); } return computeKnownBits(Op, Depth).isNonZero(); diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll --- a/llvm/test/CodeGen/X86/known-never-zero.ll +++ b/llvm/test/CodeGen/X86/known-never-zero.ll @@ -258,9 +258,7 @@ ; CHECK-NEXT: addl $4, %esi ; CHECK-NEXT: cmpl %esi, %eax ; CHECK-NEXT: cmovll %eax, %esi -; CHECK-NEXT: bsfl %esi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %esi, %eax ; CHECK-NEXT: retq %x = shl nuw i32 4, %xx %y = add nuw nsw i32 %yy, 4 @@ -298,9 +296,7 @@ ; CHECK-NEXT: addl $4, %esi ; CHECK-NEXT: cmpl %esi, %eax ; CHECK-NEXT: cmovgl %eax, %esi -; CHECK-NEXT: bsfl %esi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %esi, %eax ; CHECK-NEXT: retq %x = shl nuw i32 4, %xx %y = add nuw nsw i32 %yy, 4