Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16035,6 +16035,34 @@ return DAG.getMergeValues(Ops, dl); } +// Try to use a packed vector operation to handle i64 on 32-bit targets when +// AVX512DQ is enabled. +static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + assert((Op.getOpcode() == ISD::SINT_TO_FP || + Op.getOpcode() == ISD::UINT_TO_FP) && "Unexpected opcode!"); + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + MVT VT = Op.getSimpleValueType(); + + if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() || + (VT != MVT::f32 && VT != MVT::f64)) + return SDValue(); + + // Pack the i64 into a vector, do the operation and extract. + + // Using 256-bit to ensure result is 128-bits for f32 case. + unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; + MVT VecInVT = MVT::getVectorVT(MVT::i64, NumElts); + MVT VecVT = MVT::getVectorVT(VT, NumElts); + + SDLoc dl(Op); + SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src); + SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, + DAG.getIntPtrConstant(0, dl)); +} + SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue Src = Op.getOperand(0); @@ -16056,15 +16084,17 @@ // These are really Legal; return the operand so the caller accepts it as // Legal. - if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) + if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(VT)) return Op; - if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && - Subtarget.is64Bit()) { + if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && Subtarget.is64Bit()) { return Op; } + if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) + return V; + SDValue ValueToStore = Op.getOperand(0); - if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && + if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(VT) && !Subtarget.is64Bit()) // Bitcasting to f64 here allows us to do a single 64-bit store from // an SSE register, avoiding the store forwarding penalty that would come @@ -16415,6 +16445,9 @@ return Op; } + if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) + return V; + if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64) return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); if (SrcVT == MVT::i32 && X86ScalarSSEf64) @@ -25191,12 +25224,14 @@ case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT; + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); - if (N->getValueType(0) == MVT::v2i32) { + if (VT == MVT::v2i32) { assert((IsSigned || Subtarget.hasAVX512()) && "Can only handle signed conversion without AVX512"); assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); - SDValue Src = N->getOperand(0); if (Src.getValueType() == MVT::v2f64) { MVT ResVT = MVT::v4i32; unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; @@ -25217,7 +25252,7 @@ Results.push_back(Res); return; } - if (Src.getValueType() == MVT::v2f32) { + if (SrcVT == MVT::v2f32) { SDValue Idx = DAG.getIntPtrConstant(0, dl); SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, DAG.getUNDEF(MVT::v2f32)); @@ -25234,11 +25269,30 @@ return; } + if (Subtarget.hasDQI() && VT == MVT::i64 && + (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { + assert(!Subtarget.is64Bit() && "i64 should be legal"); + unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; + // Using a 256-bit input here to guarantee 128-bit input for f32 case. + // TODO: Use 128-bit vectors for f64 case? + // TODO: Use 128-bit vectors for f32 by using CVTTP2SI/CVTTP2UI. + MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts); + MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts); + + SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); + SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, + DAG.getConstantFP(0.0, dl, VecInVT), Src, + ZeroIdx); + Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); + Results.push_back(Res); + return; + } + std::pair Vals = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true); SDValue FIST = Vals.first, StackSlot = Vals.second; if (FIST.getNode()) { - EVT VT = N->getValueType(0); // Return a load from the stack slot. if (StackSlot.getNode()) Results.push_back( Index: llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ llvm/trunk/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -1166,30 +1166,25 @@ define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 { ; X32-LABEL: test_argRetMixTypes: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $16, %esp -; X32-NEXT: vmovd %edx, %xmm2 -; X32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 -; X32-NEXT: movl 8(%ebp), %edx +; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; X32-NEXT: vcvtsi2sdl %eax, %xmm3, %xmm1 +; X32-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1 +; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X32-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; X32-NEXT: vcvtsi2sdl %ecx, %xmm3, %xmm1 +; X32-NEXT: vmovd %edx, %xmm1 +; X32-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 +; X32-NEXT: vcvtqq2pd %ymm1, %ymm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; X32-NEXT: vmovq %xmm2, {{[0-9]+}}(%esp) -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: vaddsd (%esp), %xmm0, %xmm0 -; X32-NEXT: vcvtsi2sdl %esi, %xmm3, %xmm1 +; X32-NEXT: vcvtsi2sdl %esi, %xmm2, %xmm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; X32-NEXT: vcvtsi2sdl (%edx), %xmm3, %xmm1 +; X32-NEXT: vcvtsi2sdl (%ebx), %xmm2, %xmm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X32-NEXT: vcvttsd2si %xmm0, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32-NEXT: popl %ebx +; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; WIN64-LABEL: test_argRetMixTypes: Index: llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll +++ llvm/trunk/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -39,55 +39,91 @@ ; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only). define i64 @f_to_u64(float %a) nounwind { -; AVX512_32_WIN-LABEL: f_to_u64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 -; AVX512_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX512_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: xorl %edx, %edx -; AVX512_32_WIN-NEXT: vucomiss %xmm0, %xmm1 -; AVX512_32_WIN-NEXT: setbe %dl -; AVX512_32_WIN-NEXT: shll $31, %edx -; AVX512_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: f_to_u64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 -; AVX512_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX512_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: xorl %edx, %edx -; AVX512_32_LIN-NEXT: vucomiss %xmm0, %xmm1 -; AVX512_32_LIN-NEXT: setbe %dl -; AVX512_32_LIN-NEXT: shll $31, %edx -; AVX512_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQVL_32_WIN-LABEL: f_to_u64: +; AVX512DQVL_32_WIN: # %bb.0: +; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_WIN-NEXT: vzeroupper +; AVX512DQVL_32_WIN-NEXT: retl +; +; AVX512DQVL_32_LIN-LABEL: f_to_u64: +; AVX512DQVL_32_LIN: # %bb.0: +; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_LIN-NEXT: vzeroupper +; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: f_to_u64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttss2usi %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512DQ_32_WIN-LABEL: f_to_u64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_WIN-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: f_to_u64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_LIN-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl +; +; AVX512F_32_WIN-LABEL: f_to_u64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX512F_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: xorl %edx, %edx +; AVX512F_32_WIN-NEXT: vucomiss %xmm0, %xmm1 +; AVX512F_32_WIN-NEXT: setbe %dl +; AVX512F_32_WIN-NEXT: shll $31, %edx +; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: f_to_u64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX512F_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: xorl %edx, %edx +; AVX512F_32_LIN-NEXT: vucomiss %xmm0, %xmm1 +; AVX512F_32_LIN-NEXT: setbe %dl +; AVX512F_32_LIN-NEXT: shll $31, %edx +; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: f_to_u64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp @@ -304,39 +340,75 @@ } define i64 @f_to_s64(float %a) nounwind { -; AVX512_32_WIN-LABEL: f_to_s64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: f_to_s64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQVL_32_WIN-LABEL: f_to_s64: +; AVX512DQVL_32_WIN: # %bb.0: +; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_WIN-NEXT: vzeroupper +; AVX512DQVL_32_WIN-NEXT: retl +; +; AVX512DQVL_32_LIN-LABEL: f_to_s64: +; AVX512DQVL_32_LIN: # %bb.0: +; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_LIN-NEXT: vzeroupper +; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: f_to_s64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttss2si %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512DQ_32_WIN-LABEL: f_to_s64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_WIN-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: f_to_s64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_LIN-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl +; +; AVX512F_32_WIN-LABEL: f_to_s64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: f_to_s64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: f_to_s64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp @@ -455,55 +527,91 @@ } define i64 @d_to_u64(double %a) nounwind { -; AVX512_32_WIN-LABEL: d_to_u64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX512_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 -; AVX512_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX512_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: xorl %edx, %edx -; AVX512_32_WIN-NEXT: vucomisd %xmm0, %xmm1 -; AVX512_32_WIN-NEXT: setbe %dl -; AVX512_32_WIN-NEXT: shll $31, %edx -; AVX512_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: d_to_u64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX512_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 -; AVX512_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX512_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: xorl %edx, %edx -; AVX512_32_LIN-NEXT: vucomisd %xmm0, %xmm1 -; AVX512_32_LIN-NEXT: setbe %dl -; AVX512_32_LIN-NEXT: shll $31, %edx -; AVX512_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQVL_32_WIN-LABEL: d_to_u64: +; AVX512DQVL_32_WIN: # %bb.0: +; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %ymm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_WIN-NEXT: vzeroupper +; AVX512DQVL_32_WIN-NEXT: retl +; +; AVX512DQVL_32_LIN-LABEL: d_to_u64: +; AVX512DQVL_32_LIN: # %bb.0: +; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %ymm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_LIN-NEXT: vzeroupper +; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: d_to_u64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttsd2usi %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512DQ_32_WIN-LABEL: d_to_u64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_WIN-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: d_to_u64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_LIN-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl +; +; AVX512F_32_WIN-LABEL: d_to_u64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512F_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 +; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: xorl %edx, %edx +; AVX512F_32_WIN-NEXT: vucomisd %xmm0, %xmm1 +; AVX512F_32_WIN-NEXT: setbe %dl +; AVX512F_32_WIN-NEXT: shll $31, %edx +; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: d_to_u64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512F_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 +; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: xorl %edx, %edx +; AVX512F_32_LIN-NEXT: vucomisd %xmm0, %xmm1 +; AVX512F_32_LIN-NEXT: setbe %dl +; AVX512F_32_LIN-NEXT: shll $31, %edx +; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: d_to_u64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp @@ -720,39 +828,75 @@ } define i64 @d_to_s64(double %a) nounwind { -; AVX512_32_WIN-LABEL: d_to_s64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: d_to_s64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQVL_32_WIN-LABEL: d_to_s64: +; AVX512DQVL_32_WIN: # %bb.0: +; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %ymm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_WIN-NEXT: vzeroupper +; AVX512DQVL_32_WIN-NEXT: retl +; +; AVX512DQVL_32_LIN-LABEL: d_to_s64: +; AVX512DQVL_32_LIN: # %bb.0: +; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %ymm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQVL_32_LIN-NEXT: vzeroupper +; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: d_to_s64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttsd2si %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512DQ_32_WIN-LABEL: d_to_s64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_WIN-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: d_to_s64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_LIN-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl +; +; AVX512F_32_WIN-LABEL: d_to_s64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: d_to_s64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: d_to_s64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp Index: llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll +++ llvm/trunk/test/CodeGen/X86/scalar-int-to-fp.ll @@ -300,32 +300,54 @@ } define float @u64_to_f(i64 %a) nounwind { -; AVX512_32-LABEL: u64_to_f: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $16, %esp -; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; AVX512_32-NEXT: xorl %eax, %eax -; AVX512_32-NEXT: cmpl $0, 12(%ebp) -; AVX512_32-NEXT: setns %al -; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) -; AVX512_32-NEXT: fstps {{[0-9]+}}(%esp) -; AVX512_32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32-NEXT: vmovss %xmm0, (%esp) -; AVX512_32-NEXT: flds (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQVL_32-LABEL: u64_to_f: +; AVX512DQVL_32: # %bb.0: +; AVX512DQVL_32-NEXT: pushl %eax +; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQVL_32-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; AVX512DQVL_32-NEXT: vmovss %xmm0, (%esp) +; AVX512DQVL_32-NEXT: flds (%esp) +; AVX512DQVL_32-NEXT: popl %eax +; AVX512DQVL_32-NEXT: vzeroupper +; AVX512DQVL_32-NEXT: retl ; ; AVX512_64-LABEL: u64_to_f: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512DQ_32-LABEL: u64_to_f: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %eax +; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; AVX512DQ_32-NEXT: vmovss %xmm0, (%esp) +; AVX512DQ_32-NEXT: flds (%esp) +; AVX512DQ_32-NEXT: popl %eax +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl +; +; AVX512F_32-LABEL: u64_to_f: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $16, %esp +; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: xorl %eax, %eax +; AVX512F_32-NEXT: cmpl $0, 12(%ebp) +; AVX512F_32-NEXT: setns %al +; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; AVX512F_32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32-NEXT: vmovss %xmm0, (%esp) +; AVX512F_32-NEXT: flds (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: u64_to_f: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp @@ -425,25 +447,21 @@ } define float @s64_to_f_2(i64 %a) nounwind { -; AVX512_32-LABEL: s64_to_f_2: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $16, %esp -; AVX512_32-NEXT: movl 8(%ebp), %eax -; AVX512_32-NEXT: movl 12(%ebp), %ecx -; AVX512_32-NEXT: addl $5, %eax -; AVX512_32-NEXT: adcl $0, %ecx -; AVX512_32-NEXT: vmovd %eax, %xmm0 -; AVX512_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX512_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fstps {{[0-9]+}}(%esp) -; AVX512_32-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQVL_32-LABEL: s64_to_f_2: +; AVX512DQVL_32: # %bb.0: +; AVX512DQVL_32-NEXT: pushl %eax +; AVX512DQVL_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512DQVL_32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512DQVL_32-NEXT: addl $5, %eax +; AVX512DQVL_32-NEXT: adcl $0, %ecx +; AVX512DQVL_32-NEXT: vmovd %eax, %xmm0 +; AVX512DQVL_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512DQVL_32-NEXT: vcvtqq2ps %ymm0, %xmm0 +; AVX512DQVL_32-NEXT: vmovss %xmm0, (%esp) +; AVX512DQVL_32-NEXT: flds (%esp) +; AVX512DQVL_32-NEXT: popl %eax +; AVX512DQVL_32-NEXT: vzeroupper +; AVX512DQVL_32-NEXT: retl ; ; AVX512_64-LABEL: s64_to_f_2: ; AVX512_64: # %bb.0: @@ -451,6 +469,42 @@ ; AVX512_64-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512DQ_32-LABEL: s64_to_f_2: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %eax +; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512DQ_32-NEXT: addl $5, %eax +; AVX512DQ_32-NEXT: adcl $0, %ecx +; AVX512DQ_32-NEXT: vmovd %eax, %xmm0 +; AVX512DQ_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512DQ_32-NEXT: vcvtqq2ps %zmm0, %ymm0 +; AVX512DQ_32-NEXT: vmovss %xmm0, (%esp) +; AVX512DQ_32-NEXT: flds (%esp) +; AVX512DQ_32-NEXT: popl %eax +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl +; +; AVX512F_32-LABEL: s64_to_f_2: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $16, %esp +; AVX512F_32-NEXT: movl 8(%ebp), %eax +; AVX512F_32-NEXT: movl 12(%ebp), %ecx +; AVX512F_32-NEXT: addl $5, %eax +; AVX512F_32-NEXT: adcl $0, %ecx +; AVX512F_32-NEXT: vmovd %eax, %xmm0 +; AVX512F_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512F_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: s64_to_f_2: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp @@ -500,27 +554,57 @@ } define double @u64_to_d(i64 %a) nounwind { -; AVX512_32-LABEL: u64_to_d: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $8, %esp -; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] -; AVX512_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 -; AVX512_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX512_32-NEXT: vmovlpd %xmm0, (%esp) -; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQVL_32-LABEL: u64_to_d: +; AVX512DQVL_32: # %bb.0: +; AVX512DQVL_32-NEXT: pushl %ebp +; AVX512DQVL_32-NEXT: movl %esp, %ebp +; AVX512DQVL_32-NEXT: andl $-8, %esp +; AVX512DQVL_32-NEXT: subl $8, %esp +; AVX512DQVL_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQVL_32-NEXT: vcvtuqq2pd %ymm0, %ymm0 +; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQVL_32-NEXT: fldl (%esp) +; AVX512DQVL_32-NEXT: movl %ebp, %esp +; AVX512DQVL_32-NEXT: popl %ebp +; AVX512DQVL_32-NEXT: vzeroupper +; AVX512DQVL_32-NEXT: retl ; ; AVX512_64-LABEL: u64_to_d: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512DQ_32-LABEL: u64_to_d: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %ebp +; AVX512DQ_32-NEXT: movl %esp, %ebp +; AVX512DQ_32-NEXT: andl $-8, %esp +; AVX512DQ_32-NEXT: subl $8, %esp +; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQ_32-NEXT: fldl (%esp) +; AVX512DQ_32-NEXT: movl %ebp, %esp +; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl +; +; AVX512F_32-LABEL: u64_to_d: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $8, %esp +; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512F_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp) +; AVX512F_32-NEXT: fldl (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: u64_to_d: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp @@ -617,25 +701,25 @@ } define double @s64_to_d_2(i64 %a) nounwind { -; AVX512_32-LABEL: s64_to_d_2: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $16, %esp -; AVX512_32-NEXT: movl 8(%ebp), %eax -; AVX512_32-NEXT: movl 12(%ebp), %ecx -; AVX512_32-NEXT: addl $5, %eax -; AVX512_32-NEXT: adcl $0, %ecx -; AVX512_32-NEXT: vmovd %eax, %xmm0 -; AVX512_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX512_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fstpl (%esp) -; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQVL_32-LABEL: s64_to_d_2: +; AVX512DQVL_32: # %bb.0: +; AVX512DQVL_32-NEXT: pushl %ebp +; AVX512DQVL_32-NEXT: movl %esp, %ebp +; AVX512DQVL_32-NEXT: andl $-8, %esp +; AVX512DQVL_32-NEXT: subl $8, %esp +; AVX512DQVL_32-NEXT: movl 8(%ebp), %eax +; AVX512DQVL_32-NEXT: movl 12(%ebp), %ecx +; AVX512DQVL_32-NEXT: addl $5, %eax +; AVX512DQVL_32-NEXT: adcl $0, %ecx +; AVX512DQVL_32-NEXT: vmovd %eax, %xmm0 +; AVX512DQVL_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512DQVL_32-NEXT: vcvtqq2pd %ymm0, %ymm0 +; AVX512DQVL_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQVL_32-NEXT: fldl (%esp) +; AVX512DQVL_32-NEXT: movl %ebp, %esp +; AVX512DQVL_32-NEXT: popl %ebp +; AVX512DQVL_32-NEXT: vzeroupper +; AVX512DQVL_32-NEXT: retl ; ; AVX512_64-LABEL: s64_to_d_2: ; AVX512_64: # %bb.0: @@ -643,6 +727,46 @@ ; AVX512_64-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512DQ_32-LABEL: s64_to_d_2: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %ebp +; AVX512DQ_32-NEXT: movl %esp, %ebp +; AVX512DQ_32-NEXT: andl $-8, %esp +; AVX512DQ_32-NEXT: subl $8, %esp +; AVX512DQ_32-NEXT: movl 8(%ebp), %eax +; AVX512DQ_32-NEXT: movl 12(%ebp), %ecx +; AVX512DQ_32-NEXT: addl $5, %eax +; AVX512DQ_32-NEXT: adcl $0, %ecx +; AVX512DQ_32-NEXT: vmovd %eax, %xmm0 +; AVX512DQ_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512DQ_32-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQ_32-NEXT: fldl (%esp) +; AVX512DQ_32-NEXT: movl %ebp, %esp +; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl +; +; AVX512F_32-LABEL: s64_to_d_2: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $16, %esp +; AVX512F_32-NEXT: movl 8(%ebp), %eax +; AVX512F_32-NEXT: movl 12(%ebp), %ecx +; AVX512F_32-NEXT: addl $5, %eax +; AVX512F_32-NEXT: adcl $0, %ecx +; AVX512F_32-NEXT: vmovd %eax, %xmm0 +; AVX512F_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512F_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fstpl (%esp) +; AVX512F_32-NEXT: fldl (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: s64_to_d_2: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp