diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19181,27 +19181,28 @@ MVT IntVT = CastToInt.getSimpleValueType(); SDValue X = CastToInt.getOperand(0); - // TODO: Allow size-changing from source to dest (double -> i32 -> float) - if (X.getSimpleValueType() != VT) + MVT SrcVT = X.getSimpleValueType(); + if (SrcVT != MVT::f32 && SrcVT != MVT::f64) return SDValue(); // See if we have 128-bit vector cast instructions for this type of cast. - // We need cvttps2dq + cvtdq2ps or cvttpd2dq + cvtdq2pd. + // We need cvttps2dq/cvttpd2dq and cvtdq2ps/cvtdq2pd. if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || IntVT != MVT::i32) return SDValue(); - unsigned NumFPEltsInXMM = 128 / VT.getScalarSizeInBits(); - unsigned NumIntEltsInXMM = 128 / IntVT.getScalarSizeInBits(); - MVT VecFPVT = MVT::getVectorVT(VT, NumFPEltsInXMM); - MVT VecIntVT = MVT::getVectorVT(IntVT, NumIntEltsInXMM); + unsigned SrcSize = SrcVT.getSizeInBits(); + unsigned IntSize = IntVT.getSizeInBits(); + unsigned VTSize = VT.getSizeInBits(); + MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize); + MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize); + MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize); // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64. - bool NeedX86Opcodes = VT.getSizeInBits() != IntVT.getSizeInBits(); unsigned ToIntOpcode = - NeedX86Opcodes ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; + SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; unsigned ToFPOpcode = - NeedX86Opcodes ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; + IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0 // @@ -19211,9 +19212,9 @@ // penalties) with cast ops. SDLoc DL(CastToFP); SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); - SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecFPVT, X); + SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, X); SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX); - SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecFPVT, VCastToInt); + SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecVT, VCastToInt); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx); } diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll --- a/llvm/test/CodeGen/X86/ftrunc.ll +++ b/llvm/test/CodeGen/X86/ftrunc.ll @@ -301,15 +301,14 @@ define double @trunc_f32_signed32_f64_no_fast_math(float %x) { ; SSE-LABEL: trunc_f32_signed32_f64_no_fast_math: ; SSE: # %bb.0: -; SSE-NEXT: cvttss2si %xmm0, %eax -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math: ; AVX1: # %bb.0: -; AVX1-NEXT: vcvttss2si %xmm0, %eax -; AVX1-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0 +; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX1-NEXT: retq %i = fptosi float %x to i32 %r = sitofp i32 %i to double @@ -319,15 +318,14 @@ define double @trunc_f32_signed32_f64_nsz(float %x) #0 { ; SSE-LABEL: trunc_f32_signed32_f64_nsz: ; SSE: # %bb.0: -; SSE-NEXT: cvttss2si %xmm0, %eax -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: trunc_f32_signed32_f64_nsz: ; AVX1: # %bb.0: -; AVX1-NEXT: vcvttss2si %xmm0, %eax -; AVX1-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0 +; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX1-NEXT: retq %i = fptosi float %x to i32 %r = sitofp i32 %i to double @@ -337,15 +335,14 @@ define float @trunc_f64_signed32_f32_no_fast_math(double %x) { ; SSE-LABEL: trunc_f64_signed32_f32_no_fast_math: ; SSE: # %bb.0: -; SSE-NEXT: cvttsd2si %xmm0, %eax -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math: ; AVX1: # %bb.0: -; AVX1-NEXT: vcvttsd2si %xmm0, %eax -; AVX1-NEXT: vcvtsi2ss %eax, %xmm1, %xmm0 +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX1-NEXT: retq %i = fptosi double %x to i32 %r = sitofp i32 %i to float @@ -355,15 +352,14 @@ define float @trunc_f64_signed32_f32_nsz(double %x) #0 { ; SSE-LABEL: trunc_f64_signed32_f32_nsz: ; SSE: # %bb.0: -; SSE-NEXT: cvttsd2si %xmm0, %eax -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: trunc_f64_signed32_f32_nsz: ; AVX1: # %bb.0: -; AVX1-NEXT: vcvttsd2si %xmm0, %eax -; AVX1-NEXT: vcvtsi2ss %eax, %xmm1, %xmm0 +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX1-NEXT: retq %i = fptosi double %x to i32 %r = sitofp i32 %i to float