Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -7380,8 +7380,9 @@ // all the smarts here sunk into that routine. However, the current // lowering of BUILD_VECTOR makes that nearly impossible until the old // vector shuffle lowering is dead. - if (SDValue V2S = getScalarValueForVectorElement( - V2, Mask[V2Index] - Mask.size(), DAG)) { + SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), + DAG); + if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { // We need to zext the scalar if it is smaller than an i32. V2S = DAG.getBitcast(EltVT, V2S); if (EltVT == MVT::i8 || EltVT == MVT::i16) { Index: test/CodeGen/X86/avx-shuffle-x86_32.ll =================================================================== --- test/CodeGen/X86/avx-shuffle-x86_32.ll +++ test/CodeGen/X86/avx-shuffle-x86_32.ll @@ -6,3 +6,14 @@ ; CHECK-LABEL: test1: ; CHECK-NOT: vinsertf128 } + +define <8 x i16> @test2(<4 x i16>* %v) nounwind { +; CHECK-LABEL: test2 +; CHECK: vmovsd +; CHECK: vmovq + %v9 = load <4 x i16>, <4 x i16> * %v, align 8 + %v10 = shufflevector <4 x i16> %v9, <4 x i16> undef, <8 x i32> + %v11 = shufflevector <8 x i16> , <8 x i16> %v10, <8 x i32> + ret <8 x i16> %v11 +} +