Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -15112,6 +15112,38 @@ return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } +static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + // TODO - handle cases other than BUILD_VECTOR. + auto *BV0 = dyn_cast(N0); + auto *BV1 = dyn_cast(N1); + if (!BV0 && !BV1) + return SDValue(); + + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + bool Changed = false; + SmallVector NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (BV0 && 0 <= Idx && Idx < (int)NumElts && + BV0->getOperand(Idx).isUndef()) { + Changed = true; + Idx = -1; + } else if (BV1 && Idx > (int)NumElts && + BV1->getOperand(Idx - NumElts).isUndef()) { + Changed = true; + Idx = -1; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); + + return SDValue(); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { @@ -15461,6 +15493,10 @@ return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } + // Simplify shuffle mask if a referenced element is UNDEF. + if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) + return V; + // A shuffle of a single vector that is a splat can always be folded. if (auto *N0Shuf = dyn_cast(N0)) if (N1->isUndef() && N0Shuf->isSplat()) Index: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll +++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll @@ -836,14 +836,12 @@ ; X32-LABEL: broadcast_shuffle_1000: ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastsd (%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: broadcast_shuffle_1000: ; X64: ## BB#0: -; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastsd (%rdi), %ymm0 ; X64-NEXT: retq %1 = load double, double* %p %2 = insertelement <2 x double> undef, double %1, i32 0 @@ -855,18 +853,14 @@ ; X32-LABEL: broadcast_shuffle1032: ; X32: ## BB#0: ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X32-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: broadcast_shuffle1032: ; X64: ## BB#0: -; X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; X64-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2] +; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq %1 = load double, double* %p %2 = insertelement <2 x double> undef, double %1, i32 1