diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7036,7 +7036,8 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { V = peekThroughBitcasts(V); if (V.getOpcode() == ISD::XOR && - ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) + (ISD::isBuildVectorAllOnes(V.getOperand(1).getNode()) || + isAllOnesConstant(V.getOperand(1)))) return V.getOperand(0); if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { @@ -48117,12 +48118,28 @@ if (V.getOpcode() == X86ISD::VBROADCAST) { SDValue Src = V.getOperand(0); EVT SrcVT = Src.getValueType(); - if (!SrcVT.isVector()) + if (!SrcVT.isVector() && !(Src.hasOneUse() && V.hasOneUse())) return SDValue(); if (SDValue Not = IsNOT(Src, DAG)) return DAG.getNode(X86ISD::VBROADCAST, SDLoc(V), VT, DAG.getBitcast(SrcVT, Not)); } + // Fold PSHUFD(SCALAR_TO_VECTOR(NOT(Y))) -> PSHUFD(SCALAR_TO_VECTOR(Y)) + SDValue Shuffle = peekThroughOneUseBitcasts(V); + if (Shuffle.getOpcode() == X86ISD::PSHUFD) { + SDValue Src = Shuffle.getOperand(0); + SDValue S2V = peekThroughBitcasts(Src); + if (S2V.getOpcode() != ISD::SCALAR_TO_VECTOR || !S2V.hasOneUse()) + return SDValue(); + if (SDValue Not = IsNOT(S2V.getOperand(0), DAG)) { + SDValue NotS2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(S2V), + S2V.getValueType(), Not); + SDValue NotSrc = DAG.getBitcast(Src.getValueType(), NotS2V); + return DAG.getNode(X86ISD::PSHUFD, SDLoc(Shuffle), + Shuffle.getValueType(), NotSrc, + Shuffle.getOperand(1)); + } + } return SDValue(); }; diff --git a/llvm/test/CodeGen/X86/combine-bitselect.ll b/llvm/test/CodeGen/X86/combine-bitselect.ll --- a/llvm/test/CodeGen/X86/combine-bitselect.ll +++ b/llvm/test/CodeGen/X86/combine-bitselect.ll @@ -565,6 +565,55 @@ ret <4 x i64> %7 } +define <4 x i32> @bitselect_v4i32_broadcast_rrr(i32 %a0, <4 x i32> %a1) { +; SSE-LABEL: bitselect_v4i32_broadcast_rrr: +; SSE: # %bb.0: +; SSE-NEXT: movd %edi, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; SSE-NEXT: pandn %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: retq +; +; XOP-LABEL: bitselect_v4i32_broadcast_rrr: +; XOP: # %bb.0: +; XOP-NEXT: vmovd %edi, %xmm1 +; XOP-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; XOP-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; XOP-NEXT: retq +; +; AVX1-LABEL: bitselect_v4i32_broadcast_rrr: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd %edi, %xmm1 +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] +; AVX1-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: bitselect_v4i32_broadcast_rrr: +; AVX2: # %bb.0: +; AVX2-NEXT: vmovd %edi, %xmm1 +; AVX2-NEXT: vpbroadcastd %xmm1, %xmm1 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: bitselect_v4i32_broadcast_rrr: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vmovd %edi, %xmm1 +; AVX512F-NEXT: vpbroadcastd %xmm1, %xmm1 +; AVX512F-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: bitselect_v4i32_broadcast_rrr: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpbroadcastd %edi, %xmm1 +; AVX512VL-NEXT: vpandn %xmm0, %xmm1, %xmm0 +; AVX512VL-NEXT: retq + %1 = xor i32 %a0, -1 + %2 = insertelement <4 x i32> undef, i32 %1, i64 0 + %3 = shufflevector <4 x i32> %2, <4 x i32> poison, <4 x i32> zeroinitializer + %4 = and <4 x i32> %3, %a1 + ret <4 x i32> %4 +} + define <4 x i64> @bitselect_v4i64_broadcast_rrm(<4 x i64> %a0, <4 x i64> %a1, ptr %p2) { ; SSE-LABEL: bitselect_v4i64_broadcast_rrm: ; SSE: # %bb.0: