Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19745,6 +19745,34 @@ return DAG.getBuildVector(VT, DL, NewVecC); } +/// Given a simple buildvector constant, return a new vector constant with each +/// element incremented. If incrementing would result in overflow or this +/// is not a simple vector constant, return an empty value. +static SDValue incrementVectorConstant(SDValue V, SelectionDAG &DAG) { + auto *BV = dyn_cast(V.getNode()); + if (!BV) + return SDValue(); + + MVT VT = V.getSimpleValueType(); + MVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector NewVecC; + SDLoc DL(V); + for (unsigned i = 0; i < NumElts; ++i) { + auto *Elt = dyn_cast(BV->getOperand(i)); + if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT) + return SDValue(); + + // Avoid overflow. + if (Elt->getAPIntValue().isMaxValue()) + return SDValue(); + + NewVecC.push_back(DAG.getConstant(Elt->getAPIntValue() + 1, DL, EltVT)); + } + + return DAG.getBuildVector(VT, DL, NewVecC); +} + /// As another special case, use PSUBUS[BW] when it's profitable. E.g. for /// Op0 u<= Op1: /// t = psubus Op0, Op1 @@ -19777,6 +19805,18 @@ Op1 = ULEOp1; break; } + case ISD::SETUGT: { + // If the comparison is against a constant, we can turn this into a setuge. + // This is beneficial because materializing a constant 0 for the PCMPEQ is + // probably cheaper than XOR+PCMPGT using 2 different vector constants: + // cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0 + SDValue UGEOp1 = incrementVectorConstant(Op1, DAG); + if (!UGEOp1) + return SDValue(); + Op1 = Op0; + Op0 = UGEOp1; + break; + } // Psubus is better than flip-sign because it requires no inversion. case ISD::SETUGE: std::swap(Op0, Op1); Index: llvm/test/CodeGen/X86/vec_setcc-2.ll =================================================================== --- llvm/test/CodeGen/X86/vec_setcc-2.ll +++ llvm/test/CodeGen/X86/vec_setcc-2.ll @@ -194,8 +194,10 @@ define <8 x i1> @ugt_v8i16_splat(<8 x i16> %x) { ; SSE2-LABEL: ugt_v8i16_splat: ; SSE2: ## %bb.0: -; SSE2-NEXT: pxor {{.*}}(%rip), %xmm0 -; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [243,243,243,243,243,243,243,243] +; SSE2-NEXT: psubusw %xmm0, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pcmpeqw %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ugt_v8i16_splat: @@ -541,9 +543,10 @@ define <8 x i16> @PR39859(<8 x i16> %x, <8 x i16> %y) { ; SSE2-LABEL: PR39859: ; SSE2: ## %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] -; SSE2-NEXT: pxor %xmm0, %xmm2 -; SSE2-NEXT: pcmpgtw {{.*}}(%rip), %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [43,43,43,43,43,43,43,43] +; SSE2-NEXT: psubusw %xmm0, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm2 +; SSE2-NEXT: pcmpeqw %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pandn %xmm0, %xmm2 ; SSE2-NEXT: por %xmm1, %xmm2