Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3013,48 +3013,55 @@ } // Normalize the shuffle vector since mask and vector length don't match. - if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { - // Mask is longer than the source vectors and is a multiple of the source - // vectors. We can use concatenate vector to make the mask and vectors - // lengths match. - - unsigned NumConcat = MaskNumElts / SrcNumElts; - - // Check if the shuffle is some kind of concatenation of the input vectors. - bool IsConcat = true; - SmallVector ConcatSrcs(NumConcat, -1); - for (unsigned i = 0; i != MaskNumElts; ++i) { - int Idx = Mask[i]; - if (Idx < 0) - continue; - // Ensure the indices in each SrcVT sized piece are sequential and that - // the same source is used for the whole piece. - if ((Idx % SrcNumElts != (i % SrcNumElts)) || - (ConcatSrcs[i / SrcNumElts] >= 0 && - ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { - IsConcat = false; - break; + if (SrcNumElts < MaskNumElts) { + // Mask is longer than the source vectors. We can use concatenate vector to + // make the mask and vectors lengths match. + + if (MaskNumElts % SrcNumElts == 0) { + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool IsConcat = true; + SmallVector ConcatSrcs(NumConcat, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + continue; + // Ensure the indices in each SrcVT sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { + IsConcat = false; + break; + } + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; } - // Remember which source this index came from. - ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; - } - // The shuffle is concatenating multiple vectors together. Just emit - // a CONCAT_VECTORS operation. - if (IsConcat) { - SmallVector ConcatOps; - for (auto Src : ConcatSrcs) { - if (Src < 0) - ConcatOps.push_back(DAG.getUNDEF(SrcVT)); - else if (Src == 0) - ConcatOps.push_back(Src1); - else - ConcatOps.push_back(Src2); + // The shuffle is concatenating multiple vectors together. Just emit + // a CONCAT_VECTORS operation. + if (IsConcat) { + SmallVector ConcatOps; + for (auto Src : ConcatSrcs) { + if (Src < 0) + ConcatOps.push_back(DAG.getUNDEF(SrcVT)); + else if (Src == 0) + ConcatOps.push_back(Src1); + else + ConcatOps.push_back(Src2); + } + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); + return; } - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps)); - return; } + unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts); + unsigned NumConcat = PaddedMaskNumElts / SrcNumElts; + EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), + PaddedMaskNumElts); + // Pad both vectors with undefs to make them the same length as the mask. SDValue UndefVal = DAG.getUNDEF(SrcVT); @@ -3063,10 +3070,12 @@ MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1.isUndef() ? DAG.getUNDEF(VT) - : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MOps1); - Src2 = Src2.isUndef() ? DAG.getUNDEF(VT) - : DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MOps2); + Src1 = Src1.isUndef() + ? DAG.getUNDEF(PaddedVT) + : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1); + Src2 = Src2.isUndef() + ? DAG.getUNDEF(PaddedVT) + : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2); // Readjust mask for new input vector length. SmallVector MappedOps; @@ -3077,7 +3086,19 @@ MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps)); + for (unsigned i = MaskNumElts; i != PaddedMaskNumElts; ++i) + MappedOps.push_back(-1); + + SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps); + + // If the concatenated vector was padded, extract a subvector with the correct + // number of elements. + if (MaskNumElts != PaddedMaskNumElts) + Result = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, VT, Result, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + setValue(&I, Result); return; } Index: test/CodeGen/X86/pr29025.ll =================================================================== --- test/CodeGen/X86/pr29025.ll +++ test/CodeGen/X86/pr29025.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-pc-linux -mattr=+avx < %s | FileCheck %s + +define void @foo(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <12 x i8> *%p) { +; CHECK-LABEL: foo: +; CHECK: # BB#0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: vpshufb {{.*#+}} xmm1 = zero,xmm1[0,u],zero,xmm1[4,u],zero,xmm1[8,u],zero,xmm1[12,u,u,u,u,u] +; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0],zero,xmm0[u,4],zero,xmm0[u,8],zero,xmm0[u,12],zero,xmm0[u,u,u,u,u] +; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpextrd $2, %xmm0, 8(%eax) +; CHECK-NEXT: vpextrd $1, %xmm0, 4(%eax) +; CHECK-NEXT: vmovd %xmm0, (%eax) +; CHECK-NEXT: retl + %s1 = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> + %s2 = shufflevector <4 x i8> %c, <4 x i8> undef, <8 x i32> + %r = shufflevector <8 x i8> %s1, <8 x i8> %s2, <12 x i32> + store <12 x i8> %r, <12 x i8>* %p, align 1 + ret void +}