diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -19178,17 +19178,25 @@ MVT IntVT = CastToInt.getSimpleValueType(); SDValue X = CastToInt.getOperand(0); // TODO: Allow size-changing from source to dest (double -> i32 -> float) - if (X.getSimpleValueType() != VT || - VT.getSizeInBits() != IntVT.getSizeInBits()) + if (X.getSimpleValueType() != VT) return SDValue(); - // See if we have a 128-bit vector cast op for this type of cast. - unsigned NumEltsInXMM = 128 / VT.getScalarSizeInBits(); - MVT VecFPVT = MVT::getVectorVT(VT, NumEltsInXMM); - MVT VecIntVT = MVT::getVectorVT(IntVT, NumEltsInXMM); - if (!useVectorCast(CastToFP.getOpcode(), VecIntVT, VecFPVT, Subtarget)) + // See if we have 128-bit vector cast instructions for this type of cast. + // We need cvttps2dq + cvtdq2ps or cvttpd2dq + cvtdq2pd. + if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || + IntVT != MVT::i32) return SDValue(); + unsigned NumFPEltsInXMM = 128 / VT.getScalarSizeInBits(); + unsigned NumIntEltsInXMM = 128 / IntVT.getScalarSizeInBits(); + MVT VecFPVT = MVT::getVectorVT(VT, NumFPEltsInXMM); + MVT VecIntVT = MVT::getVectorVT(IntVT, NumIntEltsInXMM); + + // We need target-specific opcodes if this is v2f64 -> v4i32 -> v2f64. + bool NeedX86Opcodes = VT.getSizeInBits() != IntVT.getSizeInBits(); + unsigned ToIntOpcode = NeedX86Opcodes ? X86ISD::CVTTP2SI : ISD::FP_TO_SINT; + unsigned ToFPOpcode = NeedX86Opcodes ? X86ISD::CVTSI2P : ISD::SINT_TO_FP; + // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0 // // We are not defining the high elements (for example, zero them) because @@ -19198,8 +19206,8 @@ SDLoc DL(CastToFP); SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecFPVT, X); - SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, VecIntVT, VecX); - SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, VecFPVT, VCastToInt); + SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX); + SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecFPVT, VCastToInt); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx); } diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll --- a/llvm/test/CodeGen/X86/ftrunc.ll +++ b/llvm/test/CodeGen/X86/ftrunc.ll @@ -263,15 +263,14 @@ define double @trunc_signed32_f64_no_fast_math(double %x) { ; SSE-LABEL: trunc_signed32_f64_no_fast_math: ; SSE: # %bb.0: -; SSE-NEXT: cvttsd2si %xmm0, %eax -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE-NEXT: retq ; ; AVX1-LABEL: trunc_signed32_f64_no_fast_math: ; AVX1: # %bb.0: -; AVX1-NEXT: vcvttsd2si %xmm0, %eax -; AVX1-NEXT: vcvtsi2sd %eax, %xmm1, %xmm0 +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX1-NEXT: retq %i = fptosi double %x to i32 %r = sitofp i32 %i to double @@ -281,9 +280,8 @@ define double @trunc_signed32_f64_nsz(double %x) #0 { ; SSE2-LABEL: trunc_signed32_f64_nsz: ; SSE2: # %bb.0: -; SSE2-NEXT: cvttsd2si %xmm0, %eax -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2sd %eax, %xmm0 +; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_signed32_f64_nsz: diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll --- a/llvm/test/CodeGen/X86/isint.ll +++ b/llvm/test/CodeGen/X86/isint.ll @@ -7,8 +7,8 @@ define i32 @isint_return(double %d) nounwind { ; CHECK64-LABEL: isint_return: ; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttsd2si %xmm0, %eax -; CHECK64-NEXT: cvtsi2sd %eax, %xmm1 +; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1 +; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK64-NEXT: cmpeqsd %xmm0, %xmm1 ; CHECK64-NEXT: movq %xmm1, %rax ; CHECK64-NEXT: andl $1, %eax @@ -18,8 +18,8 @@ ; CHECK32-LABEL: isint_return: ; CHECK32: # %bb.0: ; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK32-NEXT: cvttsd2si %xmm0, %eax -; CHECK32-NEXT: cvtsi2sd %eax, %xmm1 +; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1 +; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK32-NEXT: cmpeqsd %xmm0, %xmm1 ; CHECK32-NEXT: movd %xmm1, %eax ; CHECK32-NEXT: andl $1, %eax @@ -62,8 +62,8 @@ define void @isint_branch(double %d) nounwind { ; CHECK64-LABEL: isint_branch: ; CHECK64: # %bb.0: -; CHECK64-NEXT: cvttsd2si %xmm0, %eax -; CHECK64-NEXT: cvtsi2sd %eax, %xmm1 +; CHECK64-NEXT: cvttpd2dq %xmm0, %xmm1 +; CHECK64-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK64-NEXT: ucomisd %xmm1, %xmm0 ; CHECK64-NEXT: jne .LBB2_2 ; CHECK64-NEXT: jp .LBB2_2 @@ -77,8 +77,8 @@ ; CHECK32-LABEL: isint_branch: ; CHECK32: # %bb.0: ; CHECK32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK32-NEXT: cvttsd2si %xmm0, %eax -; CHECK32-NEXT: cvtsi2sd %eax, %xmm1 +; CHECK32-NEXT: cvttpd2dq %xmm0, %xmm1 +; CHECK32-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK32-NEXT: ucomisd %xmm1, %xmm0 ; CHECK32-NEXT: jne .LBB2_2 ; CHECK32-NEXT: jp .LBB2_2 diff --git a/llvm/test/CodeGen/X86/setoeq.ll b/llvm/test/CodeGen/X86/setoeq.ll --- a/llvm/test/CodeGen/X86/setoeq.ll +++ b/llvm/test/CodeGen/X86/setoeq.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvttsd2si %xmm0, %eax -; CHECK-NEXT: cvtsi2sd %eax, %xmm1 +; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1 +; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK-NEXT: cmpeqsd %xmm0, %xmm1 ; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: andl $1, %eax @@ -24,8 +24,8 @@ ; CHECK-LABEL: u: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvttsd2si %xmm0, %eax -; CHECK-NEXT: cvtsi2sd %eax, %xmm1 +; CHECK-NEXT: cvttpd2dq %xmm0, %xmm1 +; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK-NEXT: cmpneqsd %xmm0, %xmm1 ; CHECK-NEXT: movd %xmm1, %eax ; CHECK-NEXT: andl $1, %eax