diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39978,6 +39978,12 @@ } } + // For broadcasts, unless we *only* demand the 0'th element, + // stop attempts at simplification here, we aren't going to improve things, + // this is better than any potential shuffle. + if (isTargetShuffleSplat(Op) && !DemandedElts.isOneValue()) + return false; + // Get target/faux shuffle mask. APInt OpUndef, OpZero; SmallVector OpMask; diff --git a/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll b/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll --- a/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll +++ b/llvm/test/CodeGen/X86/copy-low-subvec-elt-to-high-subvec-elt.ll @@ -284,7 +284,7 @@ define <4 x i64> @vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: vec256_eltty_i64_source_subvec_0_target_subvec_mask_3_binary: ; CHECK: # %bb.0: -; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,1] +; CHECK-NEXT: vbroadcastsd %xmm1, %ymm1 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] ; CHECK-NEXT: retq %r = shufflevector <4 x i64> %x, <4 x i64> %y, <4 x i32> diff --git a/llvm/test/CodeGen/X86/horizontal-sum.ll b/llvm/test/CodeGen/X86/horizontal-sum.ll --- a/llvm/test/CodeGen/X86/horizontal-sum.ll +++ b/llvm/test/CodeGen/X86/horizontal-sum.ll @@ -752,9 +752,9 @@ ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm2[3,3,3,3] ; AVX2-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm5[0,1],xmm2[2,3] -; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm4 -; AVX2-SLOW-NEXT: vpbroadcastq %xmm3, %xmm5 -; AVX2-SLOW-NEXT: vpaddd %xmm4, %xmm5, %xmm4 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1] +; AVX2-SLOW-NEXT: vpbroadcastd %xmm3, %xmm5 +; AVX2-SLOW-NEXT: vpaddd %xmm5, %xmm4, %xmm4 ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[3] ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[2,2,2,2] ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1,2],xmm4[3] diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -645,8 +645,7 @@ ; X86-AVX1-LABEL: pinsrd_from_shufflevector_i32: ; X86-AVX1: ## %bb.0: ## %entry ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX1-NEXT: vpermilps $0, (%eax), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x08,0x00] -; X86-AVX1-NEXT: ## xmm1 = mem[0,0,0,0] +; X86-AVX1-NEXT: vbroadcastss (%eax), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x08] ; X86-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08] ; X86-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3] ; X86-AVX1-NEXT: retl ## encoding: [0xc3] @@ -669,8 +668,7 @@ ; ; X64-AVX1-LABEL: pinsrd_from_shufflevector_i32: ; X64-AVX1: ## %bb.0: ## %entry -; X64-AVX1-NEXT: vpermilps $0, (%rdi), %xmm1 ## encoding: [0xc4,0xe3,0x79,0x04,0x0f,0x00] -; X64-AVX1-NEXT: ## xmm1 = mem[0,0,0,0] +; X64-AVX1-NEXT: vbroadcastss (%rdi), %xmm1 ## encoding: [0xc4,0xe2,0x79,0x18,0x0f] ; X64-AVX1-NEXT: vblendps $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x08] ; X64-AVX1-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3] ; X64-AVX1-NEXT: retq ## encoding: [0xc3]