Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14007,6 +14007,11 @@ // when we start sorting the vectors by type. return SDValue(); } + } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() && + InVT1.getSizeInBits() == VT.getSizeInBits()) { + SmallVector ConcatOps(2, DAG.getUNDEF(InVT2)); + ConcatOps[0] = VecIn2; + VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. Index: test/CodeGen/AArch64/arm64-neon-copy.ll =================================================================== --- test/CodeGen/AArch64/arm64-neon-copy.ll +++ test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1378,7 +1378,7 @@ define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { ; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: -; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0] +; CHECK: zip1 {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d entry: %vecext = extractelement <2 x i64> %x, i32 0 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 Index: test/CodeGen/X86/vector-shuffle-v48.ll =================================================================== --- test/CodeGen/X86/vector-shuffle-v48.ll +++ test/CodeGen/X86/vector-shuffle-v48.ll @@ -3,26 +3,18 @@ define <16 x i8> @foo(<48 x i8>* %x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: foo: ; CHECK: # BB#0: -; CHECK-NEXT: vmovdqu (%rdi), %ymm4 -; CHECK-NEXT: vmovdqu 32(%rdi), %xmm5 -; CHECK-NEXT: vpextrb $13, %xmm5, %eax -; CHECK-NEXT: vpextrb $10, %xmm5, %ecx -; CHECK-NEXT: vpextrb $7, %xmm5, %edx -; CHECK-NEXT: vpextrb $4, %xmm5, %esi -; CHECK-NEXT: vpextrb $1, %xmm5, %edi -; CHECK-NEXT: vextracti128 $1, %ymm4, %xmm5 -; CHECK-NEXT: vpshufb {{.*#+}} xmm6 = xmm5[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7] -; CHECK-NEXT: vpshufb {{.*#+}} xmm7 = xmm4[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15] -; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm6 = xmm7[0],xmm6[0] -; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm4[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7] -; CHECK-NEXT: vinserti128 $1, %xmm6, %ymm4, %ymm4 +; CHECK-NEXT: vmovdqu 32(%rdi), %xmm8 +; CHECK-NEXT: vmovdqu (%rdi), %ymm5 +; CHECK-NEXT: vextracti128 $1, %ymm5, %xmm6 +; CHECK-NEXT: vpshufb {{.*#+}} xmm7 = xmm6[2,2,5,5,5,5,3,3,4,4,5,5,6,6,7,7] +; CHECK-NEXT: vpshufb {{.*#+}} xmm4 = xmm5[12,12,13,13,15,15,15,15,12,12,13,13,14,14,15,15] +; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm7[0] +; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[0,0,1,1,3,3,3,3,6,6,9,9,9,9,7,7] +; CHECK-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4 ; CHECK-NEXT: vpand {{.*}}(%rip), %ymm4, %ymm4 -; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[8,11,14],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpinsrb $3, %edi, %xmm5, %xmm5 -; CHECK-NEXT: vpinsrb $4, %esi, %xmm5, %xmm5 -; CHECK-NEXT: vpinsrb $5, %edx, %xmm5, %xmm5 -; CHECK-NEXT: vpinsrb $6, %ecx, %xmm5, %xmm5 -; CHECK-NEXT: vpinsrb $7, %eax, %xmm5, %xmm5 +; CHECK-NEXT: vpshufb {{.*#+}} xmm5 = zero,zero,zero,xmm8[1,4,7,10,13,u,u,u,u,u,u,u,u] +; CHECK-NEXT: vpshufb {{.*#+}} xmm6 = xmm6[8,11,14],zero,zero,zero,zero,zero,xmm6[u,u,u,u,u,u,u,u] +; CHECK-NEXT: vpor %xmm6, %xmm5, %xmm5 ; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero,xmm5[4],zero,zero,zero,xmm5[5],zero,zero,zero,xmm5[6],zero,zero,zero,xmm5[7],zero,zero,zero ; CHECK-NEXT: vpmulld %ymm0, %ymm4, %ymm0 ; CHECK-NEXT: vpmulld %ymm1, %ymm5, %ymm1