diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -48821,15 +48821,29 @@ } } - // Look for a truncate with a single use. - if (Op.getOpcode() != ISD::TRUNCATE || !Op.hasOneUse()) - return SDValue(); - - Op = Op.getOperand(0); + // Look for a truncate. + if (Op.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + // See if we can compare with zero against the truncation source, + // which should help using the Z flag from many ops. Only do this for + // i32/64 truncation sources to prevent i16 ops. + SDValue TruncSrc = Op.getOperand(0); + EVT TruncSrcVT = TruncSrc.getValueType(); + APInt UpperBits = + APInt::getBitsSetFrom(TruncSrcVT.getSizeInBits(), VT.getSizeInBits()); + if (TruncSrcVT.getSizeInBits() >= 32 && + DAG.getTargetLoweringInfo().isTypeLegal(TruncSrcVT) && + DAG.MaskedValueIsZero(TruncSrc, UpperBits) && + onlyZeroFlagUsed(SDValue(N, 0))) { + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, TruncSrc, + DAG.getConstant(0, dl, TruncSrcVT)); + } - // Arithmetic op can only have one use. - if (!Op.hasOneUse()) - return SDValue(); + // After this the truncate and arithmetic op must have a single use. + if (!Op.hasOneUse() || !TruncSrc.hasOneUse()) + return SDValue(); + Op = TruncSrc; unsigned NewOpc; switch (Op.getOpcode()) { diff --git a/llvm/test/CodeGen/X86/and-with-overflow.ll b/llvm/test/CodeGen/X86/and-with-overflow.ll --- a/llvm/test/CodeGen/X86/and-with-overflow.ll +++ b/llvm/test/CodeGen/X86/and-with-overflow.ll @@ -75,7 +75,6 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: andl $-17, %eax -; X64-NEXT: testw %ax, %ax ; X64-NEXT: cmovel %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr34137.ll b/llvm/test/CodeGen/X86/pr34137.ll --- a/llvm/test/CodeGen/X86/pr34137.ll +++ b/llvm/test/CodeGen/X86/pr34137.ll @@ -11,12 +11,10 @@ ; CHECK-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK-NEXT: movzwl {{.*}}(%rip), %ecx ; CHECK-NEXT: andl %eax, %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl %ecx, %edx -; CHECK-NEXT: movzwl %dx, %edx -; CHECK-NEXT: movl %edx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: xorl %edx, %edx -; CHECK-NEXT: testw %cx, %ax +; CHECK-NEXT: andl %eax, %ecx +; CHECK-NEXT: movzwl %cx, %ecx +; CHECK-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: sete %dl ; CHECK-NEXT: andl %eax, %edx ; CHECK-NEXT: movq %rdx, {{.*}}(%rip) diff --git a/llvm/test/CodeGen/X86/pr49028.ll b/llvm/test/CodeGen/X86/pr49028.ll --- a/llvm/test/CodeGen/X86/pr49028.ll +++ b/llvm/test/CodeGen/X86/pr49028.ll @@ -8,7 +8,6 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl %eax -; X86-NEXT: testw %ax, %ax ; X86-NEXT: sete (%ecx) ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -17,7 +16,6 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrl %eax -; X64-NEXT: testw %ax, %ax ; X64-NEXT: sete (%rsi) ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll --- a/llvm/test/CodeGen/X86/setcc-logic.ll +++ b/llvm/test/CodeGen/X86/setcc-logic.ll @@ -646,12 +646,18 @@ } define i1 @or_cmp_eq_i16(i16 zeroext %x, i16 zeroext %y) { -; CHECK-LABEL: or_cmp_eq_i16: -; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi -; CHECK-NEXT: testw %si, %di -; CHECK-NEXT: sete %al -; CHECK-NEXT: retq +; NOBMI-LABEL: or_cmp_eq_i16: +; NOBMI: # %bb.0: +; NOBMI-NEXT: notl %edi +; NOBMI-NEXT: testl %esi, %edi +; NOBMI-NEXT: sete %al +; NOBMI-NEXT: retq +; +; BMI-LABEL: or_cmp_eq_i16: +; BMI: # %bb.0: +; BMI-NEXT: andnl %esi, %edi, %eax +; BMI-NEXT: sete %al +; BMI-NEXT: retq %o = or i16 %x, %y %c = icmp eq i16 %x, %o ret i1 %c diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll --- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll +++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll @@ -887,7 +887,7 @@ ; SSE-NEXT: cmpneqps %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -994,7 +994,7 @@ ; SSE: # %bb.0: ; SSE-NEXT: pcmpgtw %xmm0, %xmm1 ; SSE-NEXT: pmovmskb %xmm1, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -1002,7 +1002,7 @@ ; AVX: # %bb.0: ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: testw %ax, %ax +; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: setne %al ; AVX-NEXT: retq ; @@ -1119,7 +1119,7 @@ ; SSE-NEXT: pcmpeqd %xmm0, %xmm2 ; SSE-NEXT: packssdw %xmm3, %xmm2 ; SSE-NEXT: pmovmskb %xmm2, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll --- a/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-and-cmp.ll @@ -470,7 +470,7 @@ ; SSE-NEXT: psrld $16, %xmm1 ; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -479,7 +479,7 @@ ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: testw %ax, %ax +; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: sete %al ; AVX-NEXT: retq %1 = call i16 @llvm.vector.reduce.and.v2i16(<2 x i16> %a0) @@ -496,7 +496,7 @@ ; SSE-NEXT: psrld $16, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -507,7 +507,7 @@ ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: testw %ax, %ax +; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: setne %al ; AVX-NEXT: retq %1 = call i16 @llvm.vector.reduce.and.v4i16(<4 x i16> %a0) @@ -526,7 +526,7 @@ ; SSE-NEXT: psrld $16, %xmm1 ; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -539,7 +539,7 @@ ; AVX-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: testw %ax, %ax +; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: sete %al ; AVX-NEXT: retq %1 = call i16 @llvm.vector.reduce.and.v8i16(<8 x i16> %a0) @@ -559,7 +559,7 @@ ; SSE-NEXT: psrld $16, %xmm1 ; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -574,7 +574,7 @@ ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: testw %ax, %ax +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: setne %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -590,7 +590,7 @@ ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: testw %ax, %ax +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: setne %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -606,7 +606,7 @@ ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: testw %ax, %ax +; AVX512-NEXT: testl %eax, %eax ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -629,7 +629,7 @@ ; SSE-NEXT: psrld $16, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -645,7 +645,7 @@ ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: testw %ax, %ax +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -662,7 +662,7 @@ ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: testw %ax, %ax +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -680,7 +680,7 @@ ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: testw %ax, %ax +; AVX512-NEXT: testl %eax, %eax ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -707,7 +707,7 @@ ; SSE-NEXT: psrld $16, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -725,7 +725,7 @@ ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: testw %ax, %ax +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: setne %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -744,7 +744,7 @@ ; AVX2-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: testw %ax, %ax +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: setne %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -763,7 +763,7 @@ ; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: testw %ax, %ax +; AVX512-NEXT: testl %eax, %eax ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -810,7 +810,7 @@ ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: testb %al, %al +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -821,7 +821,7 @@ ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: testb %al, %al +; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: setne %al ; AVX-NEXT: retq %1 = call i8 @llvm.vector.reduce.and.v4i8(<4 x i8> %a0) @@ -841,7 +841,7 @@ ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax -; SSE-NEXT: testb %al, %al +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -854,7 +854,7 @@ ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: testb %al, %al +; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: sete %al ; AVX-NEXT: retq %1 = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a0) @@ -876,7 +876,7 @@ ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: testb %al, %al +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -891,7 +891,7 @@ ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: testb %al, %al +; AVX-NEXT: testl %eax, %eax ; AVX-NEXT: setne %al ; AVX-NEXT: retq %1 = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a0) @@ -914,7 +914,7 @@ ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: pand %xmm1, %xmm0 ; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: testb %al, %al +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; @@ -931,7 +931,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: testb %al, %al +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: sete %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -949,7 +949,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: testb %al, %al +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: sete %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -967,7 +967,7 @@ ; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: testb %al, %al +; AVX512-NEXT: testl %eax, %eax ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -993,7 +993,7 @@ ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax -; SSE-NEXT: testb %al, %al +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ; @@ -1011,7 +1011,7 @@ ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: testb %al, %al +; AVX1-NEXT: testl %eax, %eax ; AVX1-NEXT: setne %al ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -1030,7 +1030,7 @@ ; AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vmovd %xmm0, %eax -; AVX2-NEXT: testb %al, %al +; AVX2-NEXT: testl %eax, %eax ; AVX2-NEXT: setne %al ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1080,7 +1080,7 @@ ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: pand %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %eax -; SSE-NEXT: testb %al, %al +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: sete %al ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll --- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll @@ -1139,7 +1139,7 @@ ; SSE-NEXT: pcmpeqd %xmm2, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 ; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: testw %ax, %ax +; SSE-NEXT: testl %eax, %eax ; SSE-NEXT: setne %al ; SSE-NEXT: retq ;