Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -26332,16 +26332,35 @@ SDLoc DL(N); // Now we know we at least have a plendvb with the mask val. See if - // we can form a psignb/w/d. - // psign = x.type == y.type == mask.type && y = sub(0, x); + // we can form a psignb/w/d: + // (src < 0 ? -dst : src > 0 ? dst : 0) + // + // Here, src is Mask, and that's either: + // - all ones: negative, so result in -dst (sub 0, x) + // - all zeroes: PSIGN would result in 0, which is unwanted. Instead, + // OR the Mask with 1 to make it..: + // - non-zero, positive: PSIGN results in dst (x). + // + // In other words, we can combine: + // or (and (m, x), (pandn m, (sub 0, x))) + // into: + // psign x, (or m, 1) if (Y.getOpcode() == ISD::SUB && Y.getOperand(1) == X && ISD::isBuildVectorAllZeros(Y.getOperand(0).getNode()) && X.getValueType() == MaskVT && Y.getValueType() == MaskVT) { assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && "Unsupported VT for PSIGN"); - Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask.getOperand(0)); + + SDValue One = DAG.getConstant(1, DL, MaskVT.getVectorElementType()); + SDValue Ones = DAG.getNode( + ISD::BUILD_VECTOR, DL, MaskVT, + SmallVector(MaskVT.getVectorNumElements(), One)); + + Mask = DAG.getNode(ISD::OR, DL, MaskVT, Mask.getOperand(0), Ones); + Mask = DAG.getNode(X86ISD::PSIGN, DL, MaskVT, X, Mask); return DAG.getBitcast(VT, Mask); } + // PBLENDVB only available on SSE 4.1 if (!Subtarget.hasSSE41()) return SDValue(); Index: test/CodeGen/X86/avx2-logic.ll =================================================================== --- test/CodeGen/X86/avx2-logic.ll +++ test/CodeGen/X86/avx2-logic.ll @@ -72,6 +72,8 @@ define <8 x i32> @signd(<8 x i32> %a, <8 x i32> %b) nounwind { ; CHECK-LABEL: signd: ; CHECK: ## BB#0: ## %entry +; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm2 +; CHECK-NEXT: vpor %ymm2, %ymm1, %ymm1 ; CHECK-NEXT: vpsignd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq entry: Index: test/CodeGen/X86/vec-sign.ll =================================================================== --- test/CodeGen/X86/vec-sign.ll +++ test/CodeGen/X86/vec-sign.ll @@ -4,6 +4,7 @@ define <4 x i32> @signd(<4 x i32> %a, <4 x i32> %b) nounwind { ; CHECK-LABEL: signd: ; CHECK: # BB#0: # %entry +; CHECK-NEXT: por {{.*}}(%rip), %xmm1 ; CHECK-NEXT: psignd %xmm1, %xmm0 ; CHECK-NEXT: retq entry: