diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21597,6 +21597,17 @@ return DAG.getBitcast(VT, Result); } + if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) { + Op0 = DAG.getBitcast(MVT::v4i32, Op0); + Op1 = DAG.getConstant(-1, dl, MVT::v4i32); + + SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); + static const int MaskHi[] = { 1, 1, 3, 3 }; + SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); + + return DAG.getBitcast(VT, Result); + } + // Since SSE has no unsigned integer comparisons, we need to flip the sign // bits of the inputs before performing those operations. The lower // compare is always unsigned. @@ -40814,8 +40825,8 @@ default: return SDValue(); case MVT::v16i8: case MVT::v8i16: - case MVT::v4i32: if (!Subtarget.hasSSE2()) return SDValue(); break; - case MVT::v2i64: if (!Subtarget.hasSSE42()) return SDValue(); break; + case MVT::v4i32: + case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break; case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: @@ -40839,7 +40850,7 @@ // Create a greater-than comparison against -1. We don't use the more obvious // greater-than-or-equal-to-zero because SSE/AVX don't have that instruction. - return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); + return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT); } /// Detect patterns of truncation with unsigned saturation: diff --git a/llvm/test/CodeGen/X86/vector-pcmp.ll b/llvm/test/CodeGen/X86/vector-pcmp.ll --- a/llvm/test/CodeGen/X86/vector-pcmp.ll +++ b/llvm/test/CodeGen/X86/vector-pcmp.ll @@ -61,10 +61,9 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %x) { ; SSE2-LABEL: test_pcmpgtq: ; SSE2: # %bb.0: -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_pcmpgtq: @@ -187,13 +186,11 @@ define <4 x i64> @test_pcmpgtq_256(<4 x i64> %x) { ; SSE2-LABEL: test_pcmpgtq_256: ; SSE2: # %bb.0: -; SSE2-NEXT: psrad $31, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; SSE2-NEXT: psrad $31, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE2-NEXT: pcmpgtd %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: retq ; ; SSE42-LABEL: test_pcmpgtq_256: