diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37324,6 +37324,14 @@ SDValue Ins1 = peekThroughBitcasts(N.getOperand(1)); unsigned Imm = N.getConstantOperandVal(2); + // Handle subvector splat by tweaking values to match binary concat. + // vperm2x128 (ins ?, X, C1), undef, 0x11 -> + // vperm2x128 (ins ?, X, C1), (ins ?, X, C1), 0x31 -> concat X, X + if (Imm == 0x11 && Ins1.isUndef()) { + Imm = 0x31; + Ins1 = Ins0; + } + if (!(Imm == 0x31 && Ins0.getOpcode() == ISD::INSERT_SUBVECTOR && Ins1.getOpcode() == ISD::INSERT_SUBVECTOR && diff --git a/llvm/test/CodeGen/X86/avx-vperm2x128.ll b/llvm/test/CodeGen/X86/avx-vperm2x128.ll --- a/llvm/test/CodeGen/X86/avx-vperm2x128.ll +++ b/llvm/test/CodeGen/X86/avx-vperm2x128.ll @@ -130,7 +130,6 @@ ; AVX1-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: shuffle_v32i8_2323_domain: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -3098,14 +3098,13 @@ define <8 x i32> @add_v8i32_02468ACE_13579BDF(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: add_v8i32_02468ACE_13579BDF: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vphaddd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[3],ymm0[3] +; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm2[0],ymm0[3],ymm2[3] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: add_v8i32_02468ACE_13579BDF: @@ -3123,14 +3122,13 @@ define <8 x i32> @add_v8i32_8ACE0246_9BDF1357(<8 x i32> %a, <8 x i32> %b) { ; AVX1-LABEL: add_v8i32_8ACE0246_9BDF1357: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vphaddd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm2, %ymm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] -; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm2[1],ymm0[1],ymm2[2],ymm0[2] +; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[1],ymm0[2],ymm2[2] ; AVX1-NEXT: retq ; ; AVX2OR512VL-LABEL: add_v8i32_8ACE0246_9BDF1357: