Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1037,8 +1037,6 @@ bool isCheapToSpeculateCtlz() const override; - bool isCtlzFast() const override; - bool hasBitPreservingFPLogic(EVT VT) const override; bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override { Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5720,10 +5720,6 @@ return true; } -bool X86TargetLowering::isCtlzFast() const { - return Subtarget.hasFastLZCNT(); -} - bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { return true; @@ -47563,113 +47559,6 @@ return DAG.getBitcast(VT, Mask); } -// Helper function for combineOrCmpEqZeroToCtlzSrl -// Transforms: -// seteq(cmp x, 0) -// into: -// srl(ctlz x), log2(bitsize(x)) -// Input pattern is checked by caller. -static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, - SelectionDAG &DAG) { - SDValue Cmp = Op.getOperand(1); - EVT VT = Cmp.getOperand(0).getValueType(); - unsigned Log2b = Log2_32(VT.getSizeInBits()); - SDLoc dl(Op); - SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0)); - // The result of the shift is true or false, and on X86, the 32-bit - // encoding of shr and lzcnt is more desirable. - SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); - SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, - DAG.getConstant(Log2b, dl, MVT::i8)); - return DAG.getZExtOrTrunc(Scc, dl, ExtTy); -} - -// Try to transform: -// zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0)))) -// into: -// srl(or(ctlz(x), ctlz(y)), log2(bitsize(x)) -// Will also attempt to match more generic cases, eg: -// zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0))) -// Only applies if the target supports the FastLZCNT feature. -static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI, - const X86Subtarget &Subtarget) { - if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast()) - return SDValue(); - - auto isORCandidate = [](SDValue N) { - return (N->getOpcode() == ISD::OR && N->hasOneUse()); - }; - - // Check the zero extend is extending to 32-bit or more. The code generated by - // srl(ctlz) for 16-bit or less variants of the pattern would require extra - // instructions to clear the upper bits. - if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) || - !isORCandidate(N->getOperand(0))) - return SDValue(); - - // Check the node matches: setcc(eq, cmp 0) - auto isSetCCCandidate = [](SDValue N) { - return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && - X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && - N->getOperand(1).getOpcode() == X86ISD::CMP && - isNullConstant(N->getOperand(1).getOperand(1)) && - N->getOperand(1).getValueType().bitsGE(MVT::i32); - }; - - SDNode *OR = N->getOperand(0).getNode(); - SDValue LHS = OR->getOperand(0); - SDValue RHS = OR->getOperand(1); - - // Save nodes matching or(or, setcc(eq, cmp 0)). - SmallVector ORNodes; - while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) || - (isORCandidate(RHS) && isSetCCCandidate(LHS)))) { - ORNodes.push_back(OR); - OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode(); - LHS = OR->getOperand(0); - RHS = OR->getOperand(1); - } - - // The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)). - if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) || - !isORCandidate(SDValue(OR, 0))) - return SDValue(); - - // We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it - // to - // or(srl(ctlz),srl(ctlz)). - // The dag combiner can then fold it into: - // srl(or(ctlz, ctlz)). - EVT VT = OR->getValueType(0); - SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); - SDValue Ret, NewRHS; - if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); - - if (!Ret) - return SDValue(); - - // Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern. - while (ORNodes.size() > 0) { - OR = ORNodes.pop_back_val(); - LHS = OR->getOperand(0); - RHS = OR->getOperand(1); - // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or). - if (RHS->getOpcode() == ISD::OR) - std::swap(LHS, RHS); - NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); - if (!NewRHS) - return SDValue(); - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); - } - - if (Ret) - Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); - - return Ret; -} - static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, SDValue And1_L, SDValue And1_R, SDLoc DL, SelectionDAG &DAG) { @@ -51168,9 +51057,6 @@ if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) return NewAdd; - if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget)) - return R; - // TODO: Combine with any target/faux shuffle. if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 && VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { Index: llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll =================================================================== --- llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; Test patterns which generates lzcnt instructions. ; Eg: zext(or(setcc(cmp), setcc(cmp))) -> shr(or(lzcnt, lzcnt)) -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL --check-prefix=FASTLZCNT %s -; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL --check-prefix=NOFASTLZCNT %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 | FileCheck --check-prefix=ALL %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=btver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 | FileCheck --check-prefix=ALL %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver1 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 | FileCheck --check-prefix=ALL %s +; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=znver2 -mattr=-fast-lzcnt | FileCheck --check-prefix=ALL %s ; Test one 32-bit input, output is 32-bit, no transformations expected. define i32 @test_zext_cmp0(i32 %a) { @@ -24,23 +24,15 @@ ; Test two 32-bit inputs, output is 32-bit. define i32 @test_zext_cmp1(i32 %a, i32 %b) { -; FASTLZCNT-LABEL: test_zext_cmp1: -; FASTLZCNT: # %bb.0: -; FASTLZCNT-NEXT: lzcntl %edi, %ecx -; FASTLZCNT-NEXT: lzcntl %esi, %eax -; FASTLZCNT-NEXT: orl %ecx, %eax -; FASTLZCNT-NEXT: shrl $5, %eax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp1: -; NOFASTLZCNT: # %bb.0: -; NOFASTLZCNT-NEXT: testl %edi, %edi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testl %esi, %esi -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp1: +; ALL: # %bb.0: +; ALL-NEXT: testl %edi, %edi +; ALL-NEXT: sete %al +; ALL-NEXT: testl %esi, %esi +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq %cmp = icmp eq i32 %a, 0 %cmp1 = icmp eq i32 %b, 0 %or = or i1 %cmp, %cmp1 @@ -50,23 +42,15 @@ ; Test two 64-bit inputs, output is 64-bit. define i64 @test_zext_cmp2(i64 %a, i64 %b) { -; FASTLZCNT-LABEL: test_zext_cmp2: -; FASTLZCNT: # %bb.0: -; FASTLZCNT-NEXT: lzcntq %rdi, %rcx -; FASTLZCNT-NEXT: lzcntq %rsi, %rax -; FASTLZCNT-NEXT: orl %ecx, %eax -; FASTLZCNT-NEXT: shrl $6, %eax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp2: -; NOFASTLZCNT: # %bb.0: -; NOFASTLZCNT-NEXT: testq %rdi, %rdi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testq %rsi, %rsi -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp2: +; ALL: # %bb.0: +; ALL-NEXT: testq %rdi, %rdi +; ALL-NEXT: sete %al +; ALL-NEXT: testq %rsi, %rsi +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq %cmp = icmp eq i64 %a, 0 %cmp1 = icmp eq i64 %b, 0 %or = or i1 %cmp, %cmp1 @@ -97,23 +81,15 @@ ; Test two 32-bit inputs, output is 64-bit. define i64 @test_zext_cmp4(i32 %a, i32 %b) { -; FASTLZCNT-LABEL: test_zext_cmp4: -; FASTLZCNT: # %bb.0: # %entry -; FASTLZCNT-NEXT: lzcntl %edi, %ecx -; FASTLZCNT-NEXT: lzcntl %esi, %eax -; FASTLZCNT-NEXT: orl %ecx, %eax -; FASTLZCNT-NEXT: shrl $5, %eax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp4: -; NOFASTLZCNT: # %bb.0: # %entry -; NOFASTLZCNT-NEXT: testl %edi, %edi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testl %esi, %esi -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp4: +; ALL: # %bb.0: # %entry +; ALL-NEXT: testl %edi, %edi +; ALL-NEXT: sete %al +; ALL-NEXT: testl %esi, %esi +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq entry: %cmp = icmp eq i32 %a, 0 %cmp1 = icmp eq i32 %b, 0 @@ -124,24 +100,15 @@ ; Test two 64-bit inputs, output is 32-bit. define i32 @test_zext_cmp5(i64 %a, i64 %b) { -; FASTLZCNT-LABEL: test_zext_cmp5: -; FASTLZCNT: # %bb.0: # %entry -; FASTLZCNT-NEXT: lzcntq %rdi, %rcx -; FASTLZCNT-NEXT: lzcntq %rsi, %rax -; FASTLZCNT-NEXT: orl %ecx, %eax -; FASTLZCNT-NEXT: shrl $6, %eax -; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp5: -; NOFASTLZCNT: # %bb.0: # %entry -; NOFASTLZCNT-NEXT: testq %rdi, %rdi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testq %rsi, %rsi -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp5: +; ALL: # %bb.0: # %entry +; ALL-NEXT: testq %rdi, %rdi +; ALL-NEXT: sete %al +; ALL-NEXT: testq %rsi, %rsi +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq entry: %cmp = icmp eq i64 %a, 0 %cmp1 = icmp eq i64 %b, 0 @@ -152,28 +119,18 @@ ; Test three 32-bit inputs, output is 32-bit. define i32 @test_zext_cmp6(i32 %a, i32 %b, i32 %c) { -; FASTLZCNT-LABEL: test_zext_cmp6: -; FASTLZCNT: # %bb.0: # %entry -; FASTLZCNT-NEXT: lzcntl %edi, %eax -; FASTLZCNT-NEXT: lzcntl %esi, %ecx -; FASTLZCNT-NEXT: orl %eax, %ecx -; FASTLZCNT-NEXT: lzcntl %edx, %eax -; FASTLZCNT-NEXT: orl %ecx, %eax -; FASTLZCNT-NEXT: shrl $5, %eax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp6: -; NOFASTLZCNT: # %bb.0: # %entry -; NOFASTLZCNT-NEXT: testl %edi, %edi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testl %esi, %esi -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: testl %edx, %edx -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: orb %cl, %al -; NOFASTLZCNT-NEXT: movzbl %al, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp6: +; ALL: # %bb.0: # %entry +; ALL-NEXT: testl %edi, %edi +; ALL-NEXT: sete %al +; ALL-NEXT: testl %esi, %esi +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: testl %edx, %edx +; ALL-NEXT: sete %al +; ALL-NEXT: orb %cl, %al +; ALL-NEXT: movzbl %al, %eax +; ALL-NEXT: retq entry: %cmp = icmp eq i32 %a, 0 %cmp1 = icmp eq i32 %b, 0 @@ -187,28 +144,18 @@ ; Test three 32-bit inputs, output is 32-bit, but compared to test_zext_cmp6 test, ; %.cmp2 inputs' order is inverted. define i32 @test_zext_cmp7(i32 %a, i32 %b, i32 %c) { -; FASTLZCNT-LABEL: test_zext_cmp7: -; FASTLZCNT: # %bb.0: # %entry -; FASTLZCNT-NEXT: lzcntl %edi, %eax -; FASTLZCNT-NEXT: lzcntl %esi, %ecx -; FASTLZCNT-NEXT: orl %eax, %ecx -; FASTLZCNT-NEXT: lzcntl %edx, %eax -; FASTLZCNT-NEXT: orl %ecx, %eax -; FASTLZCNT-NEXT: shrl $5, %eax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp7: -; NOFASTLZCNT: # %bb.0: # %entry -; NOFASTLZCNT-NEXT: testl %edi, %edi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testl %esi, %esi -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: testl %edx, %edx -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: orb %cl, %al -; NOFASTLZCNT-NEXT: movzbl %al, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp7: +; ALL: # %bb.0: # %entry +; ALL-NEXT: testl %edi, %edi +; ALL-NEXT: sete %al +; ALL-NEXT: testl %esi, %esi +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: testl %edx, %edx +; ALL-NEXT: sete %al +; ALL-NEXT: orb %cl, %al +; ALL-NEXT: movzbl %al, %eax +; ALL-NEXT: retq entry: %cmp = icmp eq i32 %a, 0 %cmp1 = icmp eq i32 %b, 0 @@ -221,33 +168,21 @@ ; Test four 32-bit inputs, output is 32-bit. define i32 @test_zext_cmp8(i32 %a, i32 %b, i32 %c, i32 %d) { -; FASTLZCNT-LABEL: test_zext_cmp8: -; FASTLZCNT: # %bb.0: # %entry -; FASTLZCNT-NEXT: lzcntl %edi, %eax -; FASTLZCNT-NEXT: lzcntl %esi, %esi -; FASTLZCNT-NEXT: lzcntl %edx, %edx -; FASTLZCNT-NEXT: orl %eax, %esi -; FASTLZCNT-NEXT: lzcntl %ecx, %eax -; FASTLZCNT-NEXT: orl %edx, %eax -; FASTLZCNT-NEXT: orl %esi, %eax -; FASTLZCNT-NEXT: shrl $5, %eax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp8: -; NOFASTLZCNT: # %bb.0: # %entry -; NOFASTLZCNT-NEXT: testl %edi, %edi -; NOFASTLZCNT-NEXT: sete %dil -; NOFASTLZCNT-NEXT: testl %esi, %esi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: orb %dil, %al -; NOFASTLZCNT-NEXT: testl %edx, %edx -; NOFASTLZCNT-NEXT: sete %dl -; NOFASTLZCNT-NEXT: testl %ecx, %ecx -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %dl, %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp8: +; ALL: # %bb.0: # %entry +; ALL-NEXT: testl %edi, %edi +; ALL-NEXT: sete %dil +; ALL-NEXT: testl %esi, %esi +; ALL-NEXT: sete %al +; ALL-NEXT: orb %dil, %al +; ALL-NEXT: testl %edx, %edx +; ALL-NEXT: sete %dl +; ALL-NEXT: testl %ecx, %ecx +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %dl, %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq entry: %cmp = icmp eq i32 %a, 0 %cmp1 = icmp eq i32 %b, 0 @@ -262,25 +197,15 @@ ; Test one 32-bit input, one 64-bit input, output is 32-bit. define i32 @test_zext_cmp9(i32 %a, i64 %b) { -; FASTLZCNT-LABEL: test_zext_cmp9: -; FASTLZCNT: # %bb.0: # %entry -; FASTLZCNT-NEXT: lzcntq %rsi, %rax -; FASTLZCNT-NEXT: lzcntl %edi, %ecx -; FASTLZCNT-NEXT: shrl $5, %ecx -; FASTLZCNT-NEXT: shrl $6, %eax -; FASTLZCNT-NEXT: orl %ecx, %eax -; FASTLZCNT-NEXT: # kill: def $eax killed $eax killed $rax -; FASTLZCNT-NEXT: retq -; -; NOFASTLZCNT-LABEL: test_zext_cmp9: -; NOFASTLZCNT: # %bb.0: # %entry -; NOFASTLZCNT-NEXT: testl %edi, %edi -; NOFASTLZCNT-NEXT: sete %al -; NOFASTLZCNT-NEXT: testq %rsi, %rsi -; NOFASTLZCNT-NEXT: sete %cl -; NOFASTLZCNT-NEXT: orb %al, %cl -; NOFASTLZCNT-NEXT: movzbl %cl, %eax -; NOFASTLZCNT-NEXT: retq +; ALL-LABEL: test_zext_cmp9: +; ALL: # %bb.0: # %entry +; ALL-NEXT: testl %edi, %edi +; ALL-NEXT: sete %al +; ALL-NEXT: testq %rsi, %rsi +; ALL-NEXT: sete %cl +; ALL-NEXT: orb %al, %cl +; ALL-NEXT: movzbl %cl, %eax +; ALL-NEXT: retq entry: %cmp = icmp eq i32 %a, 0 %cmp1 = icmp eq i64 %b, 0