Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1679,6 +1679,8 @@ setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); + setTargetDAGCombine(ISD::FP_TO_SINT); + setTargetDAGCombine(ISD::FP_TO_UINT); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); @@ -36905,6 +36907,32 @@ return SDValue(); } +static SDValue combineIntToFP_AVX512(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + SDValue Op0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT InVT = Op0.getValueType(); + + // AVX512DQ 32-bit targets don't have i64 scalar float/double conversions, + // so perform it entirely as a vector. + if (!Subtarget.hasDQI() || InVT != MVT::i64 || + (VT != MVT::f32 && VT != MVT::f64) || + DAG.getTargetLoweringInfo().isTypeLegal(InVT)) + return SDValue(); + + SDLoc DL(N); + MVT VecInVT = Subtarget.hasVLX() ? MVT::v4i64 : MVT::v8i64; + EVT VecVT = + EVT::getVectorVT(*DAG.getContext(), VT, VecInVT.getVectorNumElements()); + + SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); + SDValue InVec = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecInVT, + getZeroVector(VecInVT, Subtarget, DAG, DL), Op0, ZeroIdx); + SDValue CvtVec = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, InVec); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, CvtVec, ZeroIdx); +} + static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { SDValue Op0 = N->getOperand(0); @@ -36930,6 +36958,11 @@ if (DAG.SignBitIsZero(Op0)) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); + // Attempt AVX512-only combines. + if (Subtarget.hasAVX512()) + if (SDValue Cvt = combineIntToFP_AVX512(N, DAG, Subtarget)) + return Cvt; + return SDValue(); } @@ -36992,9 +37025,41 @@ return FILDChain; } } + + // Attempt AVX512-only combines. + if (Subtarget.hasAVX512()) + if (SDValue Cvt = combineIntToFP_AVX512(N, DAG, Subtarget)) + return Cvt; + return SDValue(); } +static SDValue combineFPToInt(SDNode *N, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + SDValue Op0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT InVT = Op0.getValueType(); + + // AVX512DQ 32-bit targets don't have float/double scalar i64 conversions, + // so perform it entirely as a vector. + if (!Subtarget.hasDQI() || VT != MVT::i64 || + (InVT != MVT::f32 && InVT != MVT::f64) || + DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + + SDLoc DL(N); + MVT VecVT = Subtarget.hasVLX() ? MVT::v4i64 : MVT::v8i64; + MVT VecInVT = + MVT::getVectorVT(InVT.getSimpleVT(), VecVT.getVectorNumElements()); + + SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); + SDValue InVec = + DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecInVT, + getZeroVector(VecInVT, Subtarget, DAG, DL), Op0, ZeroIdx); + SDValue CvtVec = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, InVec); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, CvtVec, ZeroIdx); +} + static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { if (SDValue Flags = combineCarryThroughADD(N->getOperand(2))) { MVT VT = N->getSimpleValueType(0); @@ -37956,6 +38021,8 @@ case ISD::MSTORE: return combineMaskedStore(N, DAG, Subtarget); case ISD::SINT_TO_FP: return combineSIntToFP(N, DAG, Subtarget); case ISD::UINT_TO_FP: return combineUIntToFP(N, DAG, Subtarget); + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: return combineFPToInt(N, DAG, Subtarget); case ISD::FADD: case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); case ISD::FNEG: return combineFneg(N, DAG, Subtarget); Index: test/CodeGen/X86/avx512-regcall-NoMask.ll =================================================================== --- test/CodeGen/X86/avx512-regcall-NoMask.ll +++ test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -1149,30 +1149,28 @@ define x86_regcallcc i32 @test_argRetMixTypes(double, float, i8 signext, i32, i64, i16 signext, i32*) #0 { ; X32-LABEL: test_argRetMixTypes: ; X32: # %bb.0: -; X32-NEXT: pushl %ebp -; X32-NEXT: movl %esp, %ebp -; X32-NEXT: andl $-8, %esp -; X32-NEXT: subl $16, %esp -; X32-NEXT: vmovd %edx, %xmm2 -; X32-NEXT: vpinsrd $1, %edi, %xmm2, %xmm2 -; X32-NEXT: movl 8(%ebp), %edx +; X32-NEXT: pushl %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 ; X32-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; X32-NEXT: vcvtsi2sdl %eax, %xmm3, %xmm1 +; X32-NEXT: vcvtsi2sdl %eax, %xmm2, %xmm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; X32-NEXT: vcvtsi2sdl %ecx, %xmm3, %xmm1 +; X32-NEXT: vcvtsi2sdl %ecx, %xmm2, %xmm1 +; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; X32-NEXT: vmovd %edi, %xmm1 +; X32-NEXT: vmovd %edx, %xmm2 +; X32-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X32-NEXT: vmovq {{.*#+}} xmm1 = xmm1[0],zero +; X32-NEXT: vmovdqa %xmm1, %xmm1 +; X32-NEXT: vcvtqq2pd %ymm1, %ymm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; X32-NEXT: vmovq %xmm2, {{[0-9]+}}(%esp) -; X32-NEXT: fildll {{[0-9]+}}(%esp) -; X32-NEXT: fstpl (%esp) -; X32-NEXT: vaddsd (%esp), %xmm0, %xmm0 ; X32-NEXT: vcvtsi2sdl %esi, %xmm3, %xmm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; X32-NEXT: vcvtsi2sdl (%edx), %xmm3, %xmm1 +; X32-NEXT: vcvtsi2sdl (%ebx), %xmm3, %xmm1 ; X32-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; X32-NEXT: vcvttsd2si %xmm0, %eax -; X32-NEXT: movl %ebp, %esp -; X32-NEXT: popl %ebp +; X32-NEXT: popl %ebx +; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; WIN64-LABEL: test_argRetMixTypes: Index: test/CodeGen/X86/scalar-fp-to-i64.ll =================================================================== --- test/CodeGen/X86/scalar-fp-to-i64.ll +++ test/CodeGen/X86/scalar-fp-to-i64.ll @@ -35,55 +35,73 @@ ; SSE2 (cvtts[ds]2si) and vanilla X87 (fnstcw+fist, 32-bit only). define i64 @f_to_u64(float %a) nounwind { -; AVX512_32_WIN-LABEL: f_to_u64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 -; AVX512_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX512_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: xorl %edx, %edx -; AVX512_32_WIN-NEXT: vucomiss %xmm0, %xmm1 -; AVX512_32_WIN-NEXT: setbe %dl -; AVX512_32_WIN-NEXT: shll $31, %edx -; AVX512_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: f_to_u64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 -; AVX512_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; AVX512_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: xorl %edx, %edx -; AVX512_32_LIN-NEXT: vucomiss %xmm0, %xmm1 -; AVX512_32_LIN-NEXT: setbe %dl -; AVX512_32_LIN-NEXT: shll $31, %edx -; AVX512_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQ_32_WIN-LABEL: f_to_u64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_WIN-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: f_to_u64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_LIN-NEXT: vcvttps2uqq %ymm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: f_to_u64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttss2usi %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512F_32_WIN-LABEL: f_to_u64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX512F_32_WIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; AVX512F_32_WIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; AVX512F_32_WIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_WIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: xorl %edx, %edx +; AVX512F_32_WIN-NEXT: vucomiss %xmm0, %xmm1 +; AVX512F_32_WIN-NEXT: setbe %dl +; AVX512F_32_WIN-NEXT: shll $31, %edx +; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: f_to_u64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX512F_32_LIN-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; AVX512F_32_LIN-NEXT: vsubss %xmm1, %xmm0, %xmm2 +; AVX512F_32_LIN-NEXT: vmovss %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_LIN-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: xorl %edx, %edx +; AVX512F_32_LIN-NEXT: vucomiss %xmm0, %xmm1 +; AVX512F_32_LIN-NEXT: setbe %dl +; AVX512F_32_LIN-NEXT: shll $31, %edx +; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: f_to_u64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp @@ -300,39 +318,57 @@ } define i64 @f_to_s64(float %a) nounwind { -; AVX512_32_WIN-LABEL: f_to_s64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: f_to_s64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQ_32_WIN-LABEL: f_to_s64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_WIN-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: f_to_s64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512DQ_32_LIN-NEXT: vcvttps2qq %ymm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: f_to_s64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttss2si %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512F_32_WIN-LABEL: f_to_s64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_WIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: f_to_s64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32_LIN-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: f_to_s64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp @@ -451,55 +487,73 @@ } define i64 @d_to_u64(double %a) nounwind { -; AVX512_32_WIN-LABEL: d_to_u64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX512_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 -; AVX512_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX512_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: xorl %edx, %edx -; AVX512_32_WIN-NEXT: vucomisd %xmm0, %xmm1 -; AVX512_32_WIN-NEXT: setbe %dl -; AVX512_32_WIN-NEXT: shll $31, %edx -; AVX512_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: d_to_u64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX512_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 -; AVX512_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 -; AVX512_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} -; AVX512_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: xorl %edx, %edx -; AVX512_32_LIN-NEXT: vucomisd %xmm0, %xmm1 -; AVX512_32_LIN-NEXT: setbe %dl -; AVX512_32_LIN-NEXT: shll $31, %edx -; AVX512_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQ_32_WIN-LABEL: d_to_u64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_WIN-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: d_to_u64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_LIN-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: d_to_u64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttsd2usi %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512F_32_WIN-LABEL: d_to_u64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512F_32_WIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 +; AVX512F_32_WIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; AVX512F_32_WIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_WIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: xorl %edx, %edx +; AVX512F_32_WIN-NEXT: vucomisd %xmm0, %xmm1 +; AVX512F_32_WIN-NEXT: setbe %dl +; AVX512F_32_WIN-NEXT: shll $31, %edx +; AVX512F_32_WIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: d_to_u64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX512F_32_LIN-NEXT: vcmpltsd %xmm1, %xmm0, %k1 +; AVX512F_32_LIN-NEXT: vsubsd %xmm1, %xmm0, %xmm2 +; AVX512F_32_LIN-NEXT: vmovsd %xmm0, %xmm0, %xmm2 {%k1} +; AVX512F_32_LIN-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: xorl %edx, %edx +; AVX512F_32_LIN-NEXT: vucomisd %xmm0, %xmm1 +; AVX512F_32_LIN-NEXT: setbe %dl +; AVX512F_32_LIN-NEXT: shll $31, %edx +; AVX512F_32_LIN-NEXT: xorl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: d_to_u64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp @@ -716,39 +770,57 @@ } define i64 @d_to_s64(double %a) nounwind { -; AVX512_32_WIN-LABEL: d_to_s64: -; AVX512_32_WIN: # %bb.0: -; AVX512_32_WIN-NEXT: pushl %ebp -; AVX512_32_WIN-NEXT: movl %esp, %ebp -; AVX512_32_WIN-NEXT: andl $-8, %esp -; AVX512_32_WIN-NEXT: subl $16, %esp -; AVX512_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_WIN-NEXT: fisttpll (%esp) -; AVX512_32_WIN-NEXT: movl (%esp), %eax -; AVX512_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_WIN-NEXT: movl %ebp, %esp -; AVX512_32_WIN-NEXT: popl %ebp -; AVX512_32_WIN-NEXT: retl -; -; AVX512_32_LIN-LABEL: d_to_s64: -; AVX512_32_LIN: # %bb.0: -; AVX512_32_LIN-NEXT: subl $20, %esp -; AVX512_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) -; AVX512_32_LIN-NEXT: fisttpll (%esp) -; AVX512_32_LIN-NEXT: movl (%esp), %eax -; AVX512_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx -; AVX512_32_LIN-NEXT: addl $20, %esp -; AVX512_32_LIN-NEXT: retl +; AVX512DQ_32_WIN-LABEL: d_to_s64: +; AVX512DQ_32_WIN: # %bb.0: +; AVX512DQ_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_WIN-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512DQ_32_WIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_WIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_WIN-NEXT: vzeroupper +; AVX512DQ_32_WIN-NEXT: retl +; +; AVX512DQ_32_LIN-LABEL: d_to_s64: +; AVX512DQ_32_LIN: # %bb.0: +; AVX512DQ_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32_LIN-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512DQ_32_LIN-NEXT: vmovd %xmm0, %eax +; AVX512DQ_32_LIN-NEXT: vpextrd $1, %xmm0, %edx +; AVX512DQ_32_LIN-NEXT: vzeroupper +; AVX512DQ_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: d_to_s64: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvttsd2si %xmm0, %rax ; AVX512_64-NEXT: retq ; +; AVX512F_32_WIN-LABEL: d_to_s64: +; AVX512F_32_WIN: # %bb.0: +; AVX512F_32_WIN-NEXT: pushl %ebp +; AVX512F_32_WIN-NEXT: movl %esp, %ebp +; AVX512F_32_WIN-NEXT: andl $-8, %esp +; AVX512F_32_WIN-NEXT: subl $16, %esp +; AVX512F_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_WIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_WIN-NEXT: fisttpll (%esp) +; AVX512F_32_WIN-NEXT: movl (%esp), %eax +; AVX512F_32_WIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_WIN-NEXT: movl %ebp, %esp +; AVX512F_32_WIN-NEXT: popl %ebp +; AVX512F_32_WIN-NEXT: retl +; +; AVX512F_32_LIN-LABEL: d_to_s64: +; AVX512F_32_LIN: # %bb.0: +; AVX512F_32_LIN-NEXT: subl $20, %esp +; AVX512F_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32_LIN-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fldl {{[0-9]+}}(%esp) +; AVX512F_32_LIN-NEXT: fisttpll (%esp) +; AVX512F_32_LIN-NEXT: movl (%esp), %eax +; AVX512F_32_LIN-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F_32_LIN-NEXT: addl $20, %esp +; AVX512F_32_LIN-NEXT: retl +; ; SSE3_32_WIN-LABEL: d_to_s64: ; SSE3_32_WIN: # %bb.0: ; SSE3_32_WIN-NEXT: pushl %ebp Index: test/CodeGen/X86/scalar-int-to-fp.ll =================================================================== --- test/CodeGen/X86/scalar-int-to-fp.ll +++ test/CodeGen/X86/scalar-int-to-fp.ll @@ -298,32 +298,43 @@ } define float @u64_to_f(i64 %a) nounwind { -; AVX512_32-LABEL: u64_to_f: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $16, %esp -; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) -; AVX512_32-NEXT: xorl %eax, %eax -; AVX512_32-NEXT: cmpl $0, 12(%ebp) -; AVX512_32-NEXT: setns %al -; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) -; AVX512_32-NEXT: fstps {{[0-9]+}}(%esp) -; AVX512_32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512_32-NEXT: vmovss %xmm0, (%esp) -; AVX512_32-NEXT: flds (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQ_32-LABEL: u64_to_f: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %eax +; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; AVX512DQ_32-NEXT: vmovss %xmm0, (%esp) +; AVX512DQ_32-NEXT: flds (%esp) +; AVX512DQ_32-NEXT: popl %eax +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl ; ; AVX512_64-LABEL: u64_to_f: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512F_32-LABEL: u64_to_f: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $16, %esp +; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: xorl %eax, %eax +; AVX512F_32-NEXT: cmpl $0, 12(%ebp) +; AVX512F_32-NEXT: setns %al +; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; AVX512F_32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX512F_32-NEXT: vmovss %xmm0, (%esp) +; AVX512F_32-NEXT: flds (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: u64_to_f: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp @@ -386,20 +397,31 @@ } define float @s64_to_f(i64 %a) nounwind { -; AVX512_32-LABEL: s64_to_f: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %eax -; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fstps (%esp) -; AVX512_32-NEXT: flds (%esp) -; AVX512_32-NEXT: popl %eax -; AVX512_32-NEXT: retl +; AVX512DQ_32-LABEL: s64_to_f: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %eax +; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32-NEXT: vcvtqq2ps %zmm0, %ymm0 +; AVX512DQ_32-NEXT: vmovss %xmm0, (%esp) +; AVX512DQ_32-NEXT: flds (%esp) +; AVX512DQ_32-NEXT: popl %eax +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl ; ; AVX512_64-LABEL: s64_to_f: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512F_32-LABEL: s64_to_f: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %eax +; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fstps (%esp) +; AVX512F_32-NEXT: flds (%esp) +; AVX512F_32-NEXT: popl %eax +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: s64_to_f: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %eax @@ -423,25 +445,24 @@ } define float @s64_to_f_2(i64 %a) nounwind { -; AVX512_32-LABEL: s64_to_f_2: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $16, %esp -; AVX512_32-NEXT: movl 8(%ebp), %eax -; AVX512_32-NEXT: movl 12(%ebp), %ecx -; AVX512_32-NEXT: addl $5, %eax -; AVX512_32-NEXT: adcl $0, %ecx -; AVX512_32-NEXT: vmovd %eax, %xmm0 -; AVX512_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX512_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fstps {{[0-9]+}}(%esp) -; AVX512_32-NEXT: flds {{[0-9]+}}(%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQ_32-LABEL: s64_to_f_2: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %eax +; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %eax +; AVX512DQ_32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; AVX512DQ_32-NEXT: addl $5, %eax +; AVX512DQ_32-NEXT: adcl $0, %ecx +; AVX512DQ_32-NEXT: vmovd %ecx, %xmm0 +; AVX512DQ_32-NEXT: vmovd %eax, %xmm1 +; AVX512DQ_32-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512DQ_32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ_32-NEXT: vmovdqa %xmm0, %xmm0 +; AVX512DQ_32-NEXT: vcvtqq2ps %zmm0, %ymm0 +; AVX512DQ_32-NEXT: vmovss %xmm0, (%esp) +; AVX512DQ_32-NEXT: flds (%esp) +; AVX512DQ_32-NEXT: popl %eax +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl ; ; AVX512_64-LABEL: s64_to_f_2: ; AVX512_64: # %bb.0: @@ -449,6 +470,26 @@ ; AVX512_64-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512F_32-LABEL: s64_to_f_2: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $16, %esp +; AVX512F_32-NEXT: movl 8(%ebp), %eax +; AVX512F_32-NEXT: movl 12(%ebp), %ecx +; AVX512F_32-NEXT: addl $5, %eax +; AVX512F_32-NEXT: adcl $0, %ecx +; AVX512F_32-NEXT: vmovd %eax, %xmm0 +; AVX512F_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512F_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: flds {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: s64_to_f_2: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp @@ -498,27 +539,42 @@ } define double @u64_to_d(i64 %a) nounwind { -; AVX512_32-LABEL: u64_to_d: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $8, %esp -; AVX512_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] -; AVX512_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 -; AVX512_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 -; AVX512_32-NEXT: vmovlpd %xmm0, (%esp) -; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQ_32-LABEL: u64_to_d: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %ebp +; AVX512DQ_32-NEXT: movl %esp, %ebp +; AVX512DQ_32-NEXT: andl $-8, %esp +; AVX512DQ_32-NEXT: subl $8, %esp +; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQ_32-NEXT: fldl (%esp) +; AVX512DQ_32-NEXT: movl %ebp, %esp +; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl ; ; AVX512_64-LABEL: u64_to_d: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512F_32-LABEL: u64_to_d: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $8, %esp +; AVX512F_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512F_32-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX512F_32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX512F_32-NEXT: vhaddpd %xmm0, %xmm0, %xmm0 +; AVX512F_32-NEXT: vmovlpd %xmm0, (%esp) +; AVX512F_32-NEXT: fldl (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: u64_to_d: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp @@ -570,24 +626,39 @@ } define double @s64_to_d(i64 %a) nounwind { -; AVX512_32-LABEL: s64_to_d: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $8, %esp -; AVX512_32-NEXT: fildll 8(%ebp) -; AVX512_32-NEXT: fstpl (%esp) -; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQ_32-LABEL: s64_to_d: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %ebp +; AVX512DQ_32-NEXT: movl %esp, %ebp +; AVX512DQ_32-NEXT: andl $-8, %esp +; AVX512DQ_32-NEXT: subl $8, %esp +; AVX512DQ_32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX512DQ_32-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQ_32-NEXT: fldl (%esp) +; AVX512DQ_32-NEXT: movl %ebp, %esp +; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl ; ; AVX512_64-LABEL: s64_to_d: ; AVX512_64: # %bb.0: ; AVX512_64-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512F_32-LABEL: s64_to_d: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $8, %esp +; AVX512F_32-NEXT: fildll 8(%ebp) +; AVX512F_32-NEXT: fstpl (%esp) +; AVX512F_32-NEXT: fldl (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: s64_to_d: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp @@ -615,25 +686,28 @@ } define double @s64_to_d_2(i64 %a) nounwind { -; AVX512_32-LABEL: s64_to_d_2: -; AVX512_32: # %bb.0: -; AVX512_32-NEXT: pushl %ebp -; AVX512_32-NEXT: movl %esp, %ebp -; AVX512_32-NEXT: andl $-8, %esp -; AVX512_32-NEXT: subl $16, %esp -; AVX512_32-NEXT: movl 8(%ebp), %eax -; AVX512_32-NEXT: movl 12(%ebp), %ecx -; AVX512_32-NEXT: addl $5, %eax -; AVX512_32-NEXT: adcl $0, %ecx -; AVX512_32-NEXT: vmovd %eax, %xmm0 -; AVX512_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 -; AVX512_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fildll {{[0-9]+}}(%esp) -; AVX512_32-NEXT: fstpl (%esp) -; AVX512_32-NEXT: fldl (%esp) -; AVX512_32-NEXT: movl %ebp, %esp -; AVX512_32-NEXT: popl %ebp -; AVX512_32-NEXT: retl +; AVX512DQ_32-LABEL: s64_to_d_2: +; AVX512DQ_32: # %bb.0: +; AVX512DQ_32-NEXT: pushl %ebp +; AVX512DQ_32-NEXT: movl %esp, %ebp +; AVX512DQ_32-NEXT: andl $-8, %esp +; AVX512DQ_32-NEXT: subl $8, %esp +; AVX512DQ_32-NEXT: movl 8(%ebp), %eax +; AVX512DQ_32-NEXT: movl 12(%ebp), %ecx +; AVX512DQ_32-NEXT: addl $5, %eax +; AVX512DQ_32-NEXT: adcl $0, %ecx +; AVX512DQ_32-NEXT: vmovd %ecx, %xmm0 +; AVX512DQ_32-NEXT: vmovd %eax, %xmm1 +; AVX512DQ_32-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512DQ_32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ_32-NEXT: vmovdqa %xmm0, %xmm0 +; AVX512DQ_32-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ_32-NEXT: vmovlps %xmm0, (%esp) +; AVX512DQ_32-NEXT: fldl (%esp) +; AVX512DQ_32-NEXT: movl %ebp, %esp +; AVX512DQ_32-NEXT: popl %ebp +; AVX512DQ_32-NEXT: vzeroupper +; AVX512DQ_32-NEXT: retl ; ; AVX512_64-LABEL: s64_to_d_2: ; AVX512_64: # %bb.0: @@ -641,6 +715,26 @@ ; AVX512_64-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 ; AVX512_64-NEXT: retq ; +; AVX512F_32-LABEL: s64_to_d_2: +; AVX512F_32: # %bb.0: +; AVX512F_32-NEXT: pushl %ebp +; AVX512F_32-NEXT: movl %esp, %ebp +; AVX512F_32-NEXT: andl $-8, %esp +; AVX512F_32-NEXT: subl $16, %esp +; AVX512F_32-NEXT: movl 8(%ebp), %eax +; AVX512F_32-NEXT: movl 12(%ebp), %ecx +; AVX512F_32-NEXT: addl $5, %eax +; AVX512F_32-NEXT: adcl $0, %ecx +; AVX512F_32-NEXT: vmovd %eax, %xmm0 +; AVX512F_32-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 +; AVX512F_32-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX512F_32-NEXT: fstpl (%esp) +; AVX512F_32-NEXT: fldl (%esp) +; AVX512F_32-NEXT: movl %ebp, %esp +; AVX512F_32-NEXT: popl %ebp +; AVX512F_32-NEXT: retl +; ; SSE2_32-LABEL: s64_to_d_2: ; SSE2_32: # %bb.0: ; SSE2_32-NEXT: pushl %ebp