Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1205,6 +1205,10 @@ setOperationAction(ISD::UMIN, MVT::v16i16, Legal); setOperationAction(ISD::UMIN, MVT::v8i32, Legal); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom); + // The custom lowering for UINT_TO_FP for v8i32 becomes interesting // when we have a 256bit-wide blend with immediate. setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); @@ -15571,16 +15575,26 @@ MVT InVT = In.getSimpleValueType(); assert(VT.getSizeInBits() == InVT.getSizeInBits()); + MVT SVT = VT.getVectorElementType(); MVT InSVT = InVT.getVectorElementType(); - assert(VT.getVectorElementType().getSizeInBits() > InSVT.getSizeInBits()); + assert(SVT.getSizeInBits() > InSVT.getSizeInBits()); - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) + if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) return SDValue(); if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) return SDValue(); + if (!(VT.is128BitVector() && Subtarget.hasSSE2()) && + !(VT.is256BitVector() && Subtarget.hasInt256())) + return SDValue(); SDLoc dl(Op); + // For 256-bit vectors, we only need the lower (128-bit) half of the input. + if (VT.is256BitVector()) + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + MVT::getVectorVT(InSVT, InVT.getVectorNumElements() / 2), + In, DAG.getIntPtrConstant(0, dl)); + // SSE41 targets can use the pmovsx* instructions directly. if (Subtarget.hasSSE41()) return DAG.getNode(X86ISD::VSEXT, dl, VT, In); @@ -27837,10 +27851,10 @@ DAG.getIntPtrConstant(0, DL)); } - // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG - // which ensures lowering to X86ISD::VSEXT (pmovsx*). - if (VT.getSizeInBits() == 128) { - SDValue ExOp = ExtendVecSize(DL, N0, 128); + // If target-size is 128-bits (or 256-bits on AVX2 target), then convert to + // ISD::SIGN_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::VSEXT. + if (VT.is128BitVector() || (VT.is256BitVector() && Subtarget.hasInt256())) { + SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits()); return DAG.getSignExtendVectorInReg(ExOp, DL, VT); } Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -1085,9 +1085,7 @@ ; ; AVX2-LABEL: sitofp_8i8_to_8f32: ; AVX2: # BB#0: -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vpslld $24, %ymm0, %ymm0 -; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0 +; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 ; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0 ; AVX2-NEXT: retq %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> Index: test/CodeGen/X86/vector-sext.ll =================================================================== --- test/CodeGen/X86/vector-sext.ll +++ test/CodeGen/X86/vector-sext.ll @@ -169,9 +169,7 @@ ; ; AVX2-LABEL: sext_16i8_to_8i32: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: vpslld $24, %ymm0, %ymm0 -; AVX2-NEXT: vpsrad $24, %ymm0, %ymm0 +; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 ; AVX2-NEXT: retq ; ; X32-SSE41-LABEL: sext_16i8_to_8i32: @@ -283,10 +281,7 @@ ; ; AVX2-LABEL: sext_16i8_to_4i64: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 -; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 ; AVX2-NEXT: retq ; ; X32-SSE41-LABEL: sext_16i8_to_4i64: @@ -477,10 +472,7 @@ ; ; AVX2-LABEL: sext_8i16_to_4i64: ; AVX2: # BB#0: # %entry -; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: vpslld $16, %xmm0, %xmm0 -; AVX2-NEXT: vpsrad $16, %xmm0, %xmm0 -; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 +; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 ; AVX2-NEXT: retq ; ; X32-SSE41-LABEL: sext_8i16_to_4i64: