Index: include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- include/llvm/CodeGen/ISDOpcodes.h +++ include/llvm/CodeGen/ISDOpcodes.h @@ -949,6 +949,17 @@ return Code == SETUGT || Code == SETUGE || Code == SETULT || Code == SETULE; } + /// If the condition code is an unsigned integer predicate, make it signed. + inline CondCode getSignedIntCondCode(CondCode Code) { + switch (Code) { + case SETUGT: return SETGT; + case SETUGE: return SETGE; + case SETULT: return SETLT; + case SETULE: return SETLE; + default: return Code; + } + } + /// Return true if the specified condition returns true if the two operands to /// the condition are equal. Note that if one of the two operands is a NaN, /// this value is meaningless. Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -17173,6 +17173,12 @@ // operations may be required for some comparisons. unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ : X86ISD::PCMPGT; + + // If both operands are known non-negative, then an unsigned compare is the + // same as a signed compare and there's no need to flip signbits. + if (DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)) + Cond = ISD::getSignedIntCondCode(Cond); + bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || Cond == ISD::SETGE || Cond == ISD::SETUGE; bool Invert = Cond == ISD::SETNE || Index: test/CodeGen/X86/avx512-cvt.ll =================================================================== --- test/CodeGen/X86/avx512-cvt.ll +++ test/CodeGen/X86/avx512-cvt.ll @@ -1549,8 +1549,6 @@ ; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpextrb $8, %xmm0, %eax ; NOVL-NEXT: andl $1, %eax @@ -1579,8 +1577,6 @@ ; NOVL: # BB#0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] -; NOVL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NOVL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; NOVL-NEXT: retq Index: test/CodeGen/X86/vector-unsigned-cmp.ll =================================================================== --- test/CodeGen/X86/vector-unsigned-cmp.ll +++ test/CodeGen/X86/vector-unsigned-cmp.ll @@ -13,7 +13,7 @@ ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -30,9 +30,6 @@ ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <2 x i64> %x, @@ -46,7 +43,7 @@ ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm2 @@ -63,9 +60,6 @@ ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <2 x i64> %x, @@ -79,7 +73,7 @@ ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm2 @@ -98,9 +92,6 @@ ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -116,7 +107,7 @@ ; SSE: # BB#0: ; SSE-NEXT: psrlq $1, %xmm0 ; SSE-NEXT: psrlq $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0] ; SSE-NEXT: pxor %xmm2, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm2 @@ -135,9 +126,6 @@ ; AVX: # BB#0: ; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 @@ -153,31 +141,15 @@ ; SSE: # BB#0: ; SSE-NEXT: psrld $1, %xmm0 ; SSE-NEXT: psrld $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: ugt_v4i32: -; AVX1: # BB#0: -; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ugt_v4i32: -; AVX2: # BB#0: -; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: ugt_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %sh1 = lshr <4 x i32> %x, %sh2 = lshr <4 x i32> %y, %cmp = icmp ugt <4 x i32> %sh1, %sh2 @@ -189,32 +161,16 @@ ; SSE: # BB#0: ; SSE-NEXT: psrld $1, %xmm0 ; SSE-NEXT: psrld $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: ult_v4i32: -; AVX1: # BB#0: -; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: ult_v4i32: -; AVX2: # BB#0: -; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 -; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: retq +; AVX-LABEL: ult_v4i32: +; AVX: # BB#0: +; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 +; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 +; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: retq %sh1 = lshr <4 x i32> %x, %sh2 = lshr <4 x i32> %y, %cmp = icmp ult <4 x i32> %sh1, %sh2 @@ -222,32 +178,22 @@ } define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) { -; SSE2-LABEL: uge_v4i32: -; SSE2: # BB#0: -; SSE2-NEXT: psrld $1, %xmm0 -; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: uge_v4i32: -; SSE41: # BB#0: -; SSE41-NEXT: psrld $1, %xmm0 -; SSE41-NEXT: psrld $1, %xmm1 -; SSE41-NEXT: pmaxud %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: uge_v4i32: +; SSE: # BB#0: +; SSE-NEXT: psrld $1, %xmm0 +; SSE-NEXT: psrld $1, %xmm1 +; SSE-NEXT: pcmpgtd %xmm0, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: uge_v4i32: ; AVX: # BB#0: ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <4 x i32> %x, %sh2 = lshr <4 x i32> %y, @@ -256,32 +202,22 @@ } define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) { -; SSE2-LABEL: ule_v4i32: -; SSE2: # BB#0: -; SSE2-NEXT: psrld $1, %xmm0 -; SSE2-NEXT: psrld $1, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: pxor %xmm2, %xmm1 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: ule_v4i32: -; SSE41: # BB#0: -; SSE41-NEXT: psrld $1, %xmm0 -; SSE41-NEXT: psrld $1, %xmm1 -; SSE41-NEXT: pminud %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: ule_v4i32: +; SSE: # BB#0: +; SSE-NEXT: psrld $1, %xmm0 +; SSE-NEXT: psrld $1, %xmm1 +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: ule_v4i32: ; AVX: # BB#0: ; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrld $1, %xmm1, %xmm1 -; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <4 x i32> %x, %sh2 = lshr <4 x i32> %y, @@ -294,9 +230,6 @@ ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE-NEXT: pxor %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: pcmpgtw %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -304,9 +237,6 @@ ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <8 x i16> %x, @@ -320,20 +250,14 @@ ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pcmpgtw %xmm0, %xmm2 -; SSE-NEXT: movdqa %xmm2, %xmm0 +; SSE-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: ult_v8i16: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <8 x i16> %x, @@ -343,29 +267,22 @@ } define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) { -; SSE2-LABEL: uge_v8i16: -; SSE2: # BB#0: -; SSE2-NEXT: psrlw $1, %xmm0 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm0, %xmm1 -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: uge_v8i16: -; SSE41: # BB#0: -; SSE41-NEXT: psrlw $1, %xmm0 -; SSE41-NEXT: psrlw $1, %xmm1 -; SSE41-NEXT: pmaxuw %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: uge_v8i16: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: pcmpgtw %xmm0, %xmm1 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: uge_v8i16: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <8 x i16> %x, %sh2 = lshr <8 x i16> %y, @@ -374,29 +291,22 @@ } define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) { -; SSE2-LABEL: ule_v8i16: -; SSE2: # BB#0: -; SSE2-NEXT: psrlw $1, %xmm0 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: psubusw %xmm1, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE41-LABEL: ule_v8i16: -; SSE41: # BB#0: -; SSE41-NEXT: psrlw $1, %xmm0 -; SSE41-NEXT: psrlw $1, %xmm1 -; SSE41-NEXT: pminuw %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE41-NEXT: retq +; SSE-LABEL: ule_v8i16: +; SSE: # BB#0: +; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: psrlw $1, %xmm1 +; SSE-NEXT: pcmpgtw %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: ule_v8i16: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <8 x i16> %x, %sh2 = lshr <8 x i16> %y, @@ -408,22 +318,20 @@ ; SSE-LABEL: ugt_v16i8: ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 -; SSE-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: por %xmm2, %xmm1 -; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: pcmpgtb %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: ugt_v16i8: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <16 x i8> %x, @@ -436,11 +344,10 @@ ; SSE-LABEL: ult_v16i8: ; SSE: # BB#0: ; SSE-NEXT: psrlw $1, %xmm0 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: pand {{.*}}(%rip), %xmm1 -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pand %xmm1, %xmm2 ; SSE-NEXT: pcmpgtb %xmm0, %xmm2 ; SSE-NEXT: movdqa %xmm2, %xmm0 ; SSE-NEXT: retq @@ -448,11 +355,10 @@ ; AVX-LABEL: ult_v16i8: ; AVX: # BB#0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <16 x i8> %x, @@ -468,9 +374,10 @@ ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] ; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: pmaxub %xmm0, %xmm1 -; SSE-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE-NEXT: pand %xmm1, %xmm2 +; SSE-NEXT: pcmpgtb %xmm0, %xmm2 +; SSE-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: uge_v16i8: @@ -480,8 +387,9 @@ ; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <16 x i8> %x, %sh2 = lshr <16 x i8> %y, @@ -497,8 +405,9 @@ ; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: psrlw $1, %xmm1 ; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: pminub %xmm0, %xmm1 -; SSE-NEXT: pcmpeqb %xmm1, %xmm0 +; SSE-NEXT: pcmpgtb %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: ule_v16i8: @@ -508,8 +417,9 @@ ; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm1 -; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %sh1 = lshr <16 x i8> %x, %sh2 = lshr <16 x i8> %y,