Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14617,6 +14617,39 @@ return SDValue(); } +// Combine shuffles of splat-shuffles of the form: +// shuffle (shuffle V, undef, splat-mask), undef, M +// If splat-mask contains undef elements, we need to be careful about +// introducing undef's in the folded mask which are not the result of composing +// the masks of the shuffles. +static SDValue combineShuffleOfSplat(ArrayRef UserMask, + ShuffleVectorSDNode *Splat, + SelectionDAG &DAG) { + EVT VT = Splat->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector NewMask; + // Build the new mask by composing the user shuffle's mask with the + // splat-shuffle's mask. This is to ensure we don't introduce undef's + // incorrectly to the new mask. + bool IsSameMask = true; + for (unsigned i = 0; i < NumElts; ++i) + if (UserMask[i] < 0) + NewMask.push_back(Splat->getMaskElt(i)); + else { + int NewIdx = Splat->getMaskElt(UserMask[i]); + IsSameMask &= NewIdx == Splat->getMaskElt(i); + NewMask.push_back(NewIdx); + } + + // If the new composed mask is identical to the splat-shuffle's mask, simplify + // to the splat-shuffle itself. Otherwise create a new shuffle with the new + // mask. + if (IsSameMask) + return SDValue(Splat, 0); + return DAG.getVectorShuffle(VT, SDLoc(Splat), Splat->getOperand(0), + Splat->getOperand(1), NewMask); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -14663,6 +14696,12 @@ return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } + // A shuffle of a single vector that is a splat can always be folded. + if (auto *N0Shuf = dyn_cast(N0)) + if (N1->isUndef() && N0Shuf->isSplat()) + if (SDValue V = combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG)) + return V; + // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { Index: test/CodeGen/X86/shuffle-of-splat-multiuses.ll =================================================================== --- test/CodeGen/X86/shuffle-of-splat-multiuses.ll +++ test/CodeGen/X86/shuffle-of-splat-multiuses.ll @@ -5,9 +5,8 @@ define <2 x double> @foo2(<2 x double> %v, <2 x double> *%p) nounwind { ; AVX2-LABEL: foo2: ; AVX2: # BB#0: -; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,1] -; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] -; AVX2-NEXT: vmovapd %xmm1, (%rdi) +; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,1] +; AVX2-NEXT: vmovapd %xmm0, (%rdi) ; AVX2-NEXT: retq %res = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> %res1 = shufflevector<2 x double> %res, <2 x double> undef, <2 x i32> @@ -18,9 +17,8 @@ define <4 x double> @foo4(<4 x double> %v, <4 x double> *%p) nounwind { ; AVX2-LABEL: foo4: ; AVX2: # BB#0: -; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,2,2] -; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm1[2,0,2,3] -; AVX2-NEXT: vmovapd %ymm1, (%rdi) +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] +; AVX2-NEXT: vmovapd %ymm0, (%rdi) ; AVX2-NEXT: retq %res = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> %res1 = shufflevector<4 x double> %res, <4 x double> undef, <4 x i32> @@ -32,10 +30,8 @@ ; AVX2-LABEL: foo8: ; AVX2: # BB#0: ; AVX2-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] -; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,2,2,2] -; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = <2,0,u,u,5,1,3,7> -; AVX2-NEXT: vpermps %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovapd %ymm1, (%rdi) +; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,2] +; AVX2-NEXT: vmovapd %ymm0, (%rdi) ; AVX2-NEXT: retq %res = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> %res1 = shufflevector<8 x float> %res, <8 x float> undef, <8 x i32> @@ -76,9 +72,10 @@ define <4 x i32> @undef_splatmask4(<4 x i32> %v, <4 x i32>* %p) nounwind { ; AVX2-LABEL: undef_splatmask4: ; AVX2: # BB#0: -; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] -; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,2,3] +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; AVX2-NEXT: vmovdqa %xmm0, (%rdi) +; AVX2-NEXT: vmovdqa %xmm1, %xmm0 ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32> @@ -89,9 +86,10 @@ define <4 x i32> @undef_splatmask5(<4 x i32> %v, <4 x i32>* %p) nounwind { ; AVX2-LABEL: undef_splatmask5: ; AVX2: # BB#0: -; AVX2-NEXT: vpbroadcastq %xmm0, %xmm1 -; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,2,2,3] -; AVX2-NEXT: vmovdqa %xmm1, (%rdi) +; AVX2-NEXT: vpbroadcastd %xmm0, %xmm1 +; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX2-NEXT: vmovdqa %xmm0, (%rdi) +; AVX2-NEXT: vmovdqa %xmm1, %xmm0 ; AVX2-NEXT: retq %res = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> %res1 = shufflevector <4 x i32> %res, <4 x i32> undef, <4 x i32>