Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -24267,22 +24267,36 @@ } if (VT.isVector()) { - auto ExtendToVec128 = [&DAG](SDLoc DL, SDValue N) { + auto ExtendVecSize = [&DAG](SDLoc DL, SDValue N, unsigned Size) { EVT InVT = N.getValueType(); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), InVT.getScalarType(), - 128 / InVT.getScalarSizeInBits()); - SmallVector Opnds(128 / InVT.getSizeInBits(), + Size / InVT.getScalarSizeInBits()); + SmallVector Opnds(Size / InVT.getSizeInBits(), DAG.getUNDEF(InVT)); Opnds[0] = N; return DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Opnds); }; + // If target-size is less than 128-bits, extend to a type that would extend + // to 128 bits, extend that and extract the original target vector. + if (!(128 % VT.getSizeInBits()) && + (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && + (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { + unsigned Scale = 128 / VT.getSizeInBits(); + EVT ExVT = + EVT::getVectorVT(*DAG.getContext(), SVT, 128 / SVT.geSizeInBits()); + SDValue Ex = ExtendVecSize(DL, N0, Scale * InVT.getSizeInBits()); + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, ExVT, Ex); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SExt, + DAG.getIntPtrConstant(0, DL)); + } + // If target-size is 128-bits, then convert to ISD::SIGN_EXTEND_VECTOR_INREG // which ensures lowering to X86ISD::VSEXT (pmovsx*). if (VT.getSizeInBits() == 128 && (SVT == MVT::i64 || SVT == MVT::i32 || SVT == MVT::i16) && (InSVT == MVT::i32 || InSVT == MVT::i16 || InSVT == MVT::i8)) { - SDValue ExOp = ExtendToVec128(DL, N0); + SDValue ExOp = ExtendVecSize(DL, N0, 128); return DAG.getSignExtendVectorInReg(ExOp, DL, VT); } @@ -24301,7 +24315,7 @@ ++i, Offset += NumSubElts) { SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0, DAG.getIntPtrConstant(Offset, DL)); - SrcVec = ExtendToVec128(DL, SrcVec); + SrcVec = ExtendVecSize(DL, SrcVec, 128); SrcVec = DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT); Opnds.push_back(SrcVec); } @@ -24695,16 +24709,19 @@ // Now move on to more general possibilities. SDValue Op0 = N->getOperand(0); - EVT InVT = Op0->getValueType(0); + EVT InVT = Op0.getValueType(); + EVT InSVT = InVT.getScalarType(); // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32)) // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32)) - if (InVT == MVT::v8i8 || InVT == MVT::v4i8 || - InVT == MVT::v8i16 || InVT == MVT::v4i16) { - SDLoc dl(N); - MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements()); - SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); - return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P); + if (InVT.isVector()) { + if ((InSVT == MVT::i8 || InSVT == MVT::i16) && + (InVT.getVectorNumElements() % 2) == 0) { + SDLoc dl(N); + MVT DstVT = MVT::getVectorVT(MVT::i32, InVT.getVectorNumElements()); + SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); + return DAG.getNode(ISD::SINT_TO_FP, dl, N->getValueType(0), P); + } } // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -50,31 +50,15 @@ define <2 x double> @sitofp_2vf64_i16(<8 x i16> %a) { ; SSE2-LABEL: sitofp_2vf64_i16: ; SSE2: # BB#0: -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,3] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,6,7] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: movswq %ax, %rax -; SSE2-NEXT: movd %xmm0, %rcx -; SSE2-NEXT: movswq %cx, %rcx -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0 -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_2vf64_i16: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: movswq %ax, %rax -; AVX-NEXT: vpextrq $1, %xmm0, %rcx -; AVX-NEXT: movswq %cx, %rcx -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %shuf = shufflevector <8 x i16> %a, <8 x i16> undef, <2 x i32> %cvt = sitofp <2 x i16> %shuf to <2 x double> @@ -86,30 +70,14 @@ ; SSE2: # BB#0: ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0,0,1,1] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] -; SSE2-NEXT: movd %xmm1, %rax -; SSE2-NEXT: movsbq %al, %rax -; SSE2-NEXT: movd %xmm0, %rcx -; SSE2-NEXT: movsbq %cl, %rcx -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2sdq %rcx, %xmm0 -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2sdq %rax, %xmm1 -; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: psrad $24, %xmm0 +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; AVX-LABEL: sitofp_2vf64_i8: ; AVX: # BB#0: -; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: movsbq %al, %rax -; AVX-NEXT: vpextrq $1, %xmm0, %rcx -; AVX-NEXT: movsbq %cl, %rcx -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2sdq %rcx, %xmm0, %xmm0 -; AVX-NEXT: vcvtsi2sdq %rax, %xmm0, %xmm1 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq %shuf = shufflevector <16 x i8> %a, <16 x i8> undef, <2 x i32> %cvt = sitofp <2 x i8> %shuf to <2 x double> Index: test/CodeGen/X86/vector-sext.ll =================================================================== --- test/CodeGen/X86/vector-sext.ll +++ test/CodeGen/X86/vector-sext.ll @@ -117,6 +117,46 @@ ret <4 x i64>%B } +define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { +; SSE2-LABEL: sext_2i8_to_i32: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: psraw $8, %xmm0 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: retq +; +; SSSE3-LABEL: sext_2i8_to_i32: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: psraw $8, %xmm0 +; SSSE3-NEXT: movd %xmm0, %eax +; SSSE3-NEXT: retq +; +; SSE41-LABEL: sext_2i8_to_i32: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 +; SSE41-NEXT: movd %xmm0, %eax +; SSE41-NEXT: retq +; +; AVX-LABEL: sext_2i8_to_i32: +; AVX: # BB#0: # %entry +; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: retq +; +; X32-SSE41-LABEL: sext_2i8_to_i32: +; X32-SSE41: # BB#0: # %entry +; X32-SSE41: pmovsxbw %xmm0, %xmm0 +; X32-SSE41-NEXT: movd %xmm0, %eax +; X32-SSE41-NEXT: popl %edx +; X32-SSE41-NEXT: retl +entry: + %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> + %Ex = sext <2 x i8> %Shuf to <2 x i16> + %Bc = bitcast <2 x i16> %Ex to i32 + ret i32 %Bc +} + define <4 x i32> @load_sext_test1(<4 x i16> *%ptr) { ; SSE2-LABEL: load_sext_test1: ; SSE2: # BB#0: # %entry Index: test/CodeGen/X86/widen_conv-3.ll =================================================================== --- test/CodeGen/X86/widen_conv-3.ll +++ test/CodeGen/X86/widen_conv-3.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mattr=+sse4.2 | FileCheck %s -; CHECK: cvtsi2ss +; CHECK: cvtdq2ps ; sign to float v2i16 to v2f32