Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -7380,8 +7380,9 @@
   // all the smarts here sunk into that routine. However, the current
   // lowering of BUILD_VECTOR makes that nearly impossible until the old
   // vector shuffle lowering is dead.
-  if (SDValue V2S = getScalarValueForVectorElement(
-          V2, Mask[V2Index] - Mask.size(), DAG)) {
+  SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(),
+                                               DAG);
+  if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) {
     // We need to zext the scalar if it is smaller than an i32.
     V2S = DAG.getBitcast(EltVT, V2S);
     if (EltVT == MVT::i8 || EltVT == MVT::i16) {
Index: test/CodeGen/X86/avx-shuffle-x86_32.ll
===================================================================
--- test/CodeGen/X86/avx-shuffle-x86_32.ll
+++ test/CodeGen/X86/avx-shuffle-x86_32.ll
@@ -6,3 +6,14 @@
  ; CHECK-LABEL: test1:
  ; CHECK-NOT: vinsertf128
  }
+
+define <8 x i16> @test2(<4 x i16>* %v) nounwind {
+; CHECK-LABEL: test2
+; CHECK: vmovsd
+; CHECK: vmovq
+  %v9 = load <4 x i16>, <4 x i16> * %v, align 8
+  %v10 = shufflevector <4 x i16> %v9, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  %v11 = shufflevector <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 0, i16 0>, <8 x i16> %v10, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7>
+  ret <8 x i16> %v11
+}
+