Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
@@ -1205,6 +1205,10 @@
       setOperationAction(ISD::UMIN,            MVT::v16i16, Legal);
       setOperationAction(ISD::UMIN,            MVT::v8i32,  Legal);
 
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64,  Custom);
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32,  Custom);
+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
+
       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
       // when we have a 256bit-wide blend with immediate.
       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
@@ -15576,16 +15580,26 @@
   MVT InVT = In.getSimpleValueType();
   assert(VT.getSizeInBits() == InVT.getSizeInBits());
 
+  MVT SVT = VT.getVectorElementType();
   MVT InSVT = InVT.getVectorElementType();
-  assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits());
+  assert(SVT.getSizeInBits() > InSVT.getSizeInBits());
 
-  if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
+  if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16)
     return SDValue();
   if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
     return SDValue();
+  if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
+      !(VT.is256BitVector() && Subtarget.hasInt256()))
+    return SDValue();
 
   SDLoc dl(Op);
 
+  // For 256-bit vectors, we only need the lower (128-bit) half of the input.
+  if (VT.is256BitVector())
+    In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
+                     MVT::getVectorVT(InSVT, InVT.getVectorNumElements() / 2),
+                     In, DAG.getIntPtrConstant(0, dl));
+
   // SSE41 targets can use the pmovsx* instructions directly.
   if (Subtarget.hasSSE41())
     return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
@@ -27915,10 +27929,10 @@
                        DAG.getIntPtrConstant(0, DL));
   }
 
-  // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG
-  // which ensures lowering to X86ISD::VSEXT (pmovsx*).
-  if (VT.getSizeInBits() == 128) {
-    SDValue ExOp = ExtendVecSize(DL, N0, 128);
+  // If target-size is 128-bits (or 256-bits on AVX2 target), then convert to
+  // ISD::SIGN_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::VSEXT.
+  if (VT.is128BitVector() || (VT.is256BitVector() && Subtarget.hasInt256())) {
+    SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
     return DAG.getSignExtendVectorInReg(ExOp, DL, VT);
   }
 
Index: llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
+++ llvm/trunk/test/CodeGen/X86/vec_int_to_fp.ll
@@ -1085,9 +1085,7 @@
 ;
 ; AVX2-LABEL: sitofp_8i8_to_8f32:
 ; AVX2:       # BB#0:
-; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT:    vpslld $24, %ymm0, %ymm0
-; AVX2-NEXT:    vpsrad $24, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
 ; AVX2-NEXT:    vcvtdq2ps %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
Index: llvm/trunk/test/CodeGen/X86/vector-sext.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/vector-sext.ll
+++ llvm/trunk/test/CodeGen/X86/vector-sext.ll
@@ -169,9 +169,7 @@
 ;
 ; AVX2-LABEL: sext_16i8_to_8i32:
 ; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT:    vpslld $24, %ymm0, %ymm0
-; AVX2-NEXT:    vpsrad $24, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovsxbd %xmm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; X32-SSE41-LABEL: sext_16i8_to_8i32:
@@ -283,10 +281,7 @@
 ;
 ; AVX2-LABEL: sext_16i8_to_4i64:
 ; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX2-NEXT:    vpslld $24, %xmm0, %xmm0
-; AVX2-NEXT:    vpsrad $24, %xmm0, %xmm0
-; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxbq %xmm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; X32-SSE41-LABEL: sext_16i8_to_4i64:
@@ -477,10 +472,7 @@
 ;
 ; AVX2-LABEL: sext_8i16_to_4i64:
 ; AVX2:       # BB#0: # %entry
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT:    vpslld $16, %xmm0, %xmm0
-; AVX2-NEXT:    vpsrad $16, %xmm0, %xmm0
-; AVX2-NEXT:    vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT:    vpmovsxwq %xmm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; X32-SSE41-LABEL: sext_8i16_to_4i64: