Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -8510,6 +8510,13 @@ SDValue V = V1; for (;;) { switch (V.getOpcode()) { + case ISD::BITCAST: { + SDValue VSrc = V.getOperand(0); + if (NumElts != VSrc.getSimpleValueType().getVectorNumElements()) + break; + V = VSrc; + continue; + } case ISD::CONCAT_VECTORS: { int OperandSize = Mask.size() / V.getNumOperands(); V = V.getOperand(BroadcastIdx / OperandSize); Index: llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll +++ llvm/trunk/test/CodeGen/X86/avx-vbroadcast.ll @@ -173,14 +173,12 @@ ; X32-LABEL: load_splat_8i32_4i32_33333333: ; X32: ## BB#0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastss 12(%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_splat_8i32_4i32_33333333: ; X64: ## BB#0: ## %entry -; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastss 12(%rdi), %ymm0 ; X64-NEXT: retq entry: %ld = load <4 x i32>, <4 x i32>* %ptr @@ -277,16 +275,12 @@ ; X32-LABEL: load_splat_4i64_2i64_1111: ; X32: ## BB#0: ## %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: vmovaps (%eax), %xmm0 -; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: vbroadcastsd 8(%eax), %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: load_splat_4i64_2i64_1111: ; X64: ## BB#0: ## %entry -; X64-NEXT: vmovaps (%rdi), %xmm0 -; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1] -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0 ; X64-NEXT: retq entry: %ld = load <2 x i64>, <2 x i64>* %ptr Index: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -1320,8 +1320,7 @@ define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) { ; AVX1-LABEL: splat_mem_v4i64_from_v2i64: ; AVX1: # BB#0: -; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: splat_mem_v4i64_from_v2i64: