diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5065,14 +5065,95 @@ [](ConstantSDNode *C) { return !C->isZero(); })) return true; - // TODO: Recognize more cases here. + // TODO: Recognize more cases here. Most of the cases are also incomplete to + // some degree. switch (Op.getOpcode()) { - default: break; + default: + break; + case ISD::OR: - if (isKnownNeverZero(Op.getOperand(1), Depth + 1) || - isKnownNeverZero(Op.getOperand(0), Depth + 1)) + return isKnownNeverZero(Op.getOperand(1), Depth + 1) || + isKnownNeverZero(Op.getOperand(0), Depth + 1); + + case ISD::VSELECT: + case ISD::SELECT: + return isKnownNeverZero(Op.getOperand(1), Depth + 1) && + isKnownNeverZero(Op.getOperand(2), Depth + 1); + + case ISD::SHL: + if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) + return isKnownNeverZero(Op.getOperand(0), Depth + 1); + + // 1 << X is never zero. TODO: This can be expanded if we can bound X. + // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero() + if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0]) + return true; + break; + + case ISD::UADDSAT: + case ISD::UMAX: + return isKnownNeverZero(Op.getOperand(1), Depth + 1) || + isKnownNeverZero(Op.getOperand(0), Depth + 1); + + case ISD::UMIN: + return isKnownNeverZero(Op.getOperand(1), Depth + 1) && + isKnownNeverZero(Op.getOperand(0), Depth + 1); + + case ISD::ROTL: + case ISD::ROTR: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTPOP: + case ISD::ABS: + return isKnownNeverZero(Op.getOperand(0), Depth + 1); + + case ISD::SRA: + case ISD::SRL: + if (Op->getFlags().hasExact()) + return isKnownNeverZero(Op.getOperand(0), Depth + 1); + // Signed >> X is never zero. TODO: This can be expanded if we can bound X. + // The expression is really + // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero() + if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative()) return true; break; + + case ISD::UDIV: + case ISD::SDIV: + // div exact can only produce a zero if the dividend is zero. + // TODO: For udiv this is also true if Op1 u<= Op0 + if (Op->getFlags().hasExact()) + return isKnownNeverZero(Op.getOperand(0), Depth + 1); + break; + + case ISD::ADD: + if (Op->getFlags().hasNoUnsignedWrap()) + if (isKnownNeverZero(Op.getOperand(1), Depth + 1) || + isKnownNeverZero(Op.getOperand(0), Depth + 1)) + return true; + // TODO: There are a lot more cases we can prove for add. + break; + + case ISD::SUB: { + if (isNullConstant(Op.getOperand(0))) + return isKnownNeverZero(Op.getOperand(1), Depth + 1); + + std::optional ne = + KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1), + computeKnownBits(Op.getOperand(1), Depth + 1)); + return ne && *ne; + } + + case ISD::MUL: + if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) + if (isKnownNeverZero(Op.getOperand(1), Depth + 1) && + isKnownNeverZero(Op.getOperand(0), Depth + 1)) + return true; + break; + + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + return isKnownNeverZero(Op.getOperand(0), Depth + 1); } return computeKnownBits(Op, Depth).isNonZero(); diff --git a/llvm/test/CodeGen/X86/divrem-by-select.ll b/llvm/test/CodeGen/X86/divrem-by-select.ll --- a/llvm/test/CodeGen/X86/divrem-by-select.ll +++ b/llvm/test/CodeGen/X86/divrem-by-select.ll @@ -67,20 +67,16 @@ ; CHECK-X64-V4: # %bb.0: ; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0 ; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1 -; CHECK-X64-V4-NEXT: vpbroadcastq {{.*#+}} xmm0 = [1,1] -; CHECK-X64-V4-NEXT: vpbroadcastq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} -; CHECK-X64-V4-NEXT: vpextrq $1, %xmm0, %rcx -; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rax -; CHECK-X64-V4-NEXT: xorl %edx, %edx -; CHECK-X64-V4-NEXT: divq %rcx -; CHECK-X64-V4-NEXT: movq %rax, %rcx -; CHECK-X64-V4-NEXT: vmovq %xmm0, %rsi -; CHECK-X64-V4-NEXT: vmovq %xmm1, %rax -; CHECK-X64-V4-NEXT: xorl %edx, %edx -; CHECK-X64-V4-NEXT: divq %rsi +; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rdx +; CHECK-X64-V4-NEXT: movabsq $-3689348814741910323, %rax # imm = 0xCCCCCCCCCCCCCCCD +; CHECK-X64-V4-NEXT: mulxq %rax, %rcx, %rcx ; CHECK-X64-V4-NEXT: vmovq %rcx, %xmm0 -; CHECK-X64-V4-NEXT: vmovq %rax, %xmm1 -; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-X64-V4-NEXT: vmovq %xmm1, %rdx +; CHECK-X64-V4-NEXT: mulxq %rax, %rax, %rax +; CHECK-X64-V4-NEXT: vmovq %rax, %xmm2 +; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; CHECK-X64-V4-NEXT: vpsrlq $3, %xmm0, %xmm1 {%k1} +; CHECK-X64-V4-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-X64-V4-NEXT: retq ;; Fails at the moment because `10` is even so there is no common @@ -115,23 +111,23 @@ ; ; CHECK-X64-V4-LABEL: udiv_indentity_non_zero: ; CHECK-X64-V4: # %bb.0: -; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0 -; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1 -; CHECK-X64-V4-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 -; CHECK-X64-V4-NEXT: vpbroadcastq {{.*#+}} xmm3 = [1,1] -; CHECK-X64-V4-NEXT: vpsubq %xmm0, %xmm2, %xmm3 {%k1} -; CHECK-X64-V4-NEXT: vpextrq $1, %xmm3, %rcx +; CHECK-X64-V4-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; CHECK-X64-V4-NEXT: vpsubq %xmm3, %xmm2, %xmm2 +; CHECK-X64-V4-NEXT: vpextrq $1, %xmm2, %rcx ; CHECK-X64-V4-NEXT: vpextrq $1, %xmm1, %rax ; CHECK-X64-V4-NEXT: xorl %edx, %edx ; CHECK-X64-V4-NEXT: divq %rcx ; CHECK-X64-V4-NEXT: movq %rax, %rcx -; CHECK-X64-V4-NEXT: vmovq %xmm3, %rsi +; CHECK-X64-V4-NEXT: vmovq %xmm2, %rsi ; CHECK-X64-V4-NEXT: vmovq %xmm1, %rax ; CHECK-X64-V4-NEXT: xorl %edx, %edx ; CHECK-X64-V4-NEXT: divq %rsi +; CHECK-X64-V4-NEXT: vpsllq $63, %xmm0, %xmm0 +; CHECK-X64-V4-NEXT: vpmovq2m %xmm0, %k1 ; CHECK-X64-V4-NEXT: vmovq %rcx, %xmm0 -; CHECK-X64-V4-NEXT: vmovq %rax, %xmm1 -; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; CHECK-X64-V4-NEXT: vmovq %rax, %xmm2 +; CHECK-X64-V4-NEXT: vpunpcklqdq {{.*#+}} xmm1 {%k1} = xmm2[0],xmm0[0] +; CHECK-X64-V4-NEXT: vmovdqa %xmm1, %xmm0 ; CHECK-X64-V4-NEXT: retq %non_zero = add nsw nuw <2 x i64> %y, %d = select <2 x i1> %c, <2 x i64> %non_zero, <2 x i64> diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll --- a/llvm/test/CodeGen/X86/known-never-zero.ll +++ b/llvm/test/CodeGen/X86/known-never-zero.ll @@ -47,9 +47,7 @@ ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: movl $122, %eax ; CHECK-NEXT: cmovnel %esi, %eax -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %y = or i32 %x, 1 %z = select i1 %c, i32 %y, i32 122 @@ -85,9 +83,7 @@ ; CHECK-NEXT: movl $123, %eax ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %z = shl i32 123, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -101,9 +97,7 @@ ; CHECK-NEXT: orl $256, %esi # imm = 0x100 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: bsfl %esi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %esi, %eax ; CHECK-NEXT: retq %y = or i32 %yy, 256 %z = shl nsw i32 %y, %x @@ -118,9 +112,7 @@ ; CHECK-NEXT: orl $256, %esi # imm = 0x100 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %esi -; CHECK-NEXT: bsfl %esi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %esi, %eax ; CHECK-NEXT: retq %y = or i32 %yy, 256 %z = shl nuw i32 %y, %x @@ -153,9 +145,7 @@ ; CHECK-NEXT: incl %edi ; CHECK-NEXT: movl $-1, %eax ; CHECK-NEXT: cmovnel %edi, %eax -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %z = call i32 @llvm.uadd.sat.i32(i32 %x, i32 1) %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -190,9 +180,7 @@ ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: cmpl %eax, %edi ; CHECK-NEXT: cmoval %edi, %eax -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %yy = shl nuw i32 4, %y %z = call i32 @llvm.umax.i32(i32 %x, i32 %yy) @@ -228,9 +216,7 @@ ; CHECK-NEXT: addl $4, %esi ; CHECK-NEXT: cmpl %esi, %eax ; CHECK-NEXT: cmovbl %eax, %esi -; CHECK-NEXT: bsfl %esi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %esi, %eax ; CHECK-NEXT: retq %x = shl nuw i32 4, %xx %y = add nuw nsw i32 %yy, 4 @@ -371,9 +357,7 @@ ; CHECK-NEXT: orl $256, %esi # imm = 0x100 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: sarl %cl, %esi -; CHECK-NEXT: bsfl %esi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %esi, %eax ; CHECK-NEXT: retq %y = or i32 %yy, 256 %z = ashr exact i32 %y, %x @@ -407,9 +391,7 @@ ; CHECK-NEXT: movl $-2147360405, %eax # imm = 0x8001E16B ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %eax -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %z = lshr i32 2147606891, %x %r = call i32 @llvm.cttz.i32(i32 %z, i1 false) @@ -423,9 +405,7 @@ ; CHECK-NEXT: orl $256, %esi # imm = 0x100 ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shrl %cl, %esi -; CHECK-NEXT: bsfl %esi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %esi, %eax ; CHECK-NEXT: retq %y = or i32 %yy, 256 %z = lshr exact i32 %y, %x @@ -459,9 +439,7 @@ ; CHECK-NEXT: orl $64, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: divl %esi -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %x = or i32 %xx, 64 %z = udiv exact i32 %x, %y @@ -495,9 +473,7 @@ ; CHECK-NEXT: orl $64, %eax ; CHECK-NEXT: cltd ; CHECK-NEXT: idivl %esi -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %x = or i32 %xx, 64 %z = sdiv exact i32 %x, %y @@ -529,9 +505,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: orl $1, %edi ; CHECK-NEXT: addl %esi, %edi -; CHECK-NEXT: bsfl %edi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %edi, %eax ; CHECK-NEXT: retq %x = or i32 %xx, 1 %z = add nuw i32 %x, %y @@ -565,9 +539,7 @@ ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: negl %eax -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %x = shl nuw nsw i32 256, %xx %z = sub i32 0, %x @@ -582,9 +554,7 @@ ; CHECK-NEXT: orl $64, %eax ; CHECK-NEXT: andl $-65, %edi ; CHECK-NEXT: subl %eax, %edi -; CHECK-NEXT: bsfl %edi, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %edi, %eax ; CHECK-NEXT: retq %x = or i32 %xx, 64 %y = and i32 %xx, -65 @@ -745,9 +715,7 @@ ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: movzwl %ax, %eax -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %x = shl nuw nsw i16 256, %xx %z = zext i16 %x to i32 @@ -780,9 +748,7 @@ ; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx ; CHECK-NEXT: shll %cl, %eax ; CHECK-NEXT: cwtl -; CHECK-NEXT: bsfl %eax, %ecx -; CHECK-NEXT: movl $32, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK-NEXT: rep bsfl %eax, %eax ; CHECK-NEXT: retq %x = shl nuw nsw i16 256, %xx %z = sext i16 %x to i32