diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1334,6 +1334,7 @@ SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned, SDValue &Chain) const; + SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const; @@ -1357,6 +1358,7 @@ SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -270,6 +270,16 @@ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); + + setOperationAction(ISD::LRINT, MVT::f32, Custom); + setOperationAction(ISD::LRINT, MVT::f64, Custom); + setOperationAction(ISD::LLRINT, MVT::f32, Custom); + setOperationAction(ISD::LLRINT, MVT::f64, Custom); + + if (!Subtarget.is64Bit()) { + setOperationAction(ISD::LRINT, MVT::i64, Custom); + setOperationAction(ISD::LLRINT, MVT::i64, Custom); + } } // Handle address space casts between mixed sized pointers. @@ -663,8 +673,8 @@ setOperationAction(ISD::FMA, MVT::f80, Expand); setOperationAction(ISD::LROUND, MVT::f80, Expand); setOperationAction(ISD::LLROUND, MVT::f80, Expand); - setOperationAction(ISD::LRINT, MVT::f80, Expand); - setOperationAction(ISD::LLRINT, MVT::f80, Expand); + setOperationAction(ISD::LRINT, MVT::f80, Custom); + setOperationAction(ISD::LLRINT, MVT::f80, Custom); // Handle constrained floating-point operations of scalar. setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); @@ -20306,6 +20316,63 @@ llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases."); } +SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op, + SelectionDAG &DAG) const { + SDValue Src = Op.getOperand(0); + MVT SrcVT = Src.getSimpleValueType(); + + // If the source is in an SSE register, the node is Legal. + if (isScalarFPTypeInSSEReg(SrcVT)) + return Op; + + return LRINT_LLRINTHelper(Op.getNode(), DAG); +} + +SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N, + SelectionDAG &DAG) const { + EVT DstVT = N->getValueType(0); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) { + // f16 must be promoted before using the lowering in this routine. + // fp128 does not use this lowering. + return SDValue(); + } + + SDLoc DL(N); + SDValue Chain = DAG.getEntryNode(); + + bool UseSSE = isScalarFPTypeInSSEReg(SrcVT); + + // If we're converting from SSE, the stack slot needs to hold both types. + // Otherwise it only needs to hold the DstVT. + EVT OtherVT = UseSSE ? SrcVT : DstVT; + SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + + if (UseSSE) { + assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!"); + Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI); + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); + SDValue Ops[] = { Chain, StackPtr }; + + Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI, + /*Align*/0, MachineMemOperand::MOLoad); + Chain = Src.getValue(1); + } + + SDValue StoreOps[] = { Chain, Src, StackPtr }; + Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, + DAG.getVTList(MVT::Other), StoreOps, + DstVT, MPI, /*Align*/0, + MachineMemOperand::MOStore); + + return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI); +} + SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { bool IsStrict = Op->isStrictFPOpcode(); @@ -28637,6 +28704,8 @@ case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); + case ISD::LRINT: + case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG); case ISD::SETCC: case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG); @@ -29191,6 +29260,13 @@ } return; } + case ISD::LRINT: + case ISD::LLRINT: { + if (SDValue V = LRINT_LLRINTHelper(N, DAG)) + Results.push_back(V); + return; + } + case ISD::SINT_TO_FP: case ISD::STRICT_SINT_TO_FP: case ISD::UINT_TO_FP: diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7233,6 +7233,45 @@ X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +multiclass avx512_cvt_s opc, string asm, X86VectorVTInfo SrcVT, + X86VectorVTInfo DstVT, SDNode OpNode, + X86FoldableSchedWrite sched, + string aliasStr> { + let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { + let isCodeGenOnly = 1 in { + def rr : AVX512, + EVEX, VEX_LIG, Sched<[sched]>, SIMD_EXC; + def rm : AVX512, + EVEX, VEX_LIG, Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC; + } + } // Predicates = [HasAVX512] +} + +defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info, + lrint, WriteCvtSS2I, + "{l}">, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info, + llrint, WriteCvtSS2I, + "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>; +defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info, + lrint, WriteCvtSD2I, + "{l}">, XD, EVEX_CD8<64, CD8VT1>; +defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info, + llrint, WriteCvtSD2I, + "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>; + +let Predicates = [HasAVX512] in { + def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>; + def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64Zrm addr:$src)>; + + def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64Zrr FR64:$src)>; + def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64Zrm addr:$src)>; +} + // Patterns used for matching vcvtsi2s{s,d} intrinsic sequences from clang // which produce unnecessary vmovs{s,d} instructions let Predicates = [HasAVX512] in { diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -74,6 +74,11 @@ return cast(N)->getMemoryVT() == MVT::i64; }]>; +def X86fist32 : PatFrag<(ops node:$val, node:$ptr), + (X86fist node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + def X86fist64 : PatFrag<(ops node:$val, node:$ptr), (X86fist node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i64; @@ -525,14 +530,20 @@ let mayStore = 1, hasSideEffects = 0 in { def IST_Fp16m32 : FpIf32<(outs), (ins i16mem:$op, RFP32:$src), OneArgFP, []>; -def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, []>; -def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, []>; +def IST_Fp32m32 : FpIf32<(outs), (ins i32mem:$op, RFP32:$src), OneArgFP, + [(X86fist32 RFP32:$src, addr:$op)]>; +def IST_Fp64m32 : FpIf32<(outs), (ins i64mem:$op, RFP32:$src), OneArgFP, + [(X86fist64 RFP32:$src, addr:$op)]>; def IST_Fp16m64 : FpIf64<(outs), (ins i16mem:$op, RFP64:$src), OneArgFP, []>; -def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, []>; -def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, []>; +def IST_Fp32m64 : FpIf64<(outs), (ins i32mem:$op, RFP64:$src), OneArgFP, + [(X86fist32 RFP64:$src, addr:$op)]>; +def IST_Fp64m64 : FpIf64<(outs), (ins i64mem:$op, RFP64:$src), OneArgFP, + [(X86fist64 RFP64:$src, addr:$op)]>; def IST_Fp16m80 : FpI_<(outs), (ins i16mem:$op, RFP80:$src), OneArgFP, []>; -def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, []>; -def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, []>; +def IST_Fp32m80 : FpI_<(outs), (ins i32mem:$op, RFP80:$src), OneArgFP, + [(X86fist32 RFP80:$src, addr:$op)]>; +def IST_Fp64m80 : FpI_<(outs), (ins i64mem:$op, RFP80:$src), OneArgFP, + [(X86fist64 RFP80:$src, addr:$op)]>; } // mayStore } // SchedRW, Uses = [FPCW] @@ -791,9 +802,6 @@ def : Pat<(f80 fpimmneg0), (CHS_Fp80 (LD_Fp080))>; def : Pat<(f80 fpimmneg1), (CHS_Fp80 (LD_Fp180))>; -// Used to conv. between f80 and i64 for i64 atomic loads. -def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; - // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. def : Pat<(f64 (any_fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -884,6 +884,23 @@ "cvttsd2si", "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; + +defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, + "cvtss2si", "cvtss2si", + WriteCvtSS2I, SSEPackedSingle>, + XS, VEX, VEX_LIG; +defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, + "cvtss2si", "cvtss2si", + WriteCvtSS2I, SSEPackedSingle>, + XS, VEX, VEX_W, VEX_LIG; +defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, + "cvtsd2si", "cvtsd2si", + WriteCvtSD2I, SSEPackedDouble>, + XD, VEX, VEX_LIG; +defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, + "cvtsd2si", "cvtsd2si", + WriteCvtSD2I, SSEPackedDouble>, + XD, VEX, VEX_W, VEX_LIG; } // The assembler can recognize rr 64-bit instructions by seeing a rxx @@ -923,6 +940,12 @@ (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; def : Pat<(f64 (any_sint_to_fp GR64:$src)), (VCVTSI642SDrr (f64 (IMPLICIT_DEF)), GR64:$src)>; + + def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64rr FR32:$src)>; + def : Pat<(i64 (lrint (loadf32 addr:$src))), (VCVTSS2SI64rm addr:$src)>; + + def : Pat<(i64 (lrint FR64:$src)), (VCVTSD2SI64rr FR64:$src)>; + def : Pat<(i64 (lrint (loadf64 addr:$src))), (VCVTSD2SI64rm addr:$src)>; } let isCodeGenOnly = 1 in { @@ -938,6 +961,20 @@ defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; + +defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32, + "cvtss2si", "cvtss2si", + WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; +defm CVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32, + "cvtss2si", "cvtss2si", + WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; +defm CVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64, + "cvtsd2si", "cvtsd2si", + WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; +defm CVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64, + "cvtsd2si", "cvtsd2si", + WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; + defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, any_sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; @@ -952,6 +989,16 @@ WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; } // isCodeGenOnly = 1 +let Predicates = [UseSSE1] in { + def : Pat<(i64 (lrint FR32:$src)), (CVTSS2SI64rr FR32:$src)>; + def : Pat<(i64 (lrint (loadf32 addr:$src))), (CVTSS2SI64rm addr:$src)>; +} + +let Predicates = [UseSSE2] in { + def : Pat<(i64 (lrint FR64:$src)), (CVTSD2SI64rr FR64:$src)>; + def : Pat<(i64 (lrint (loadf64 addr:$src))), (CVTSD2SI64rm addr:$src)>; +} + // Conversion Instructions Intrinsics - Match intrinsics which expect MM // and/or XMM operand(s). diff --git a/llvm/test/CodeGen/X86/llrint-conv.ll b/llvm/test/CodeGen/X86/llrint-conv.ll --- a/llvm/test/CodeGen/X86/llrint-conv.ll +++ b/llvm/test/CodeGen/X86/llrint-conv.ll @@ -1,65 +1,153 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefix=SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX +; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX define i64 @testmsxs(float %x) { -; X86-LABEL: testmsxs: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %eax -; X86-NEXT: .cfi_def_cfa_offset 8 -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fstps (%esp) -; X86-NEXT: calll llrintf -; X86-NEXT: popl %ecx -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86-NOSSE-LABEL: testmsxs: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: .cfi_offset %ebp, -8 +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: flds 8(%ebp) +; X86-NOSSE-NEXT: fistpll (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4 +; X86-NOSSE-NEXT: retl ; -; SSE2-LABEL: testmsxs: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: pushl %eax -; SSE2-NEXT: .cfi_def_cfa_offset 8 -; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; SSE2-NEXT: movss %xmm0, (%esp) -; SSE2-NEXT: calll llrintf -; SSE2-NEXT: popl %ecx -; SSE2-NEXT: .cfi_def_cfa_offset 4 -; SSE2-NEXT: retl +; X86-SSE2-LABEL: testmsxs: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-SSE2-NEXT: movss %xmm0, (%esp) +; X86-SSE2-NEXT: flds (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: movl (%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl ; -; X64-LABEL: testmsxs: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp llrintf # TAILCALL +; X86-AVX-LABEL: testmsxs: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: .cfi_offset %ebp, -8 +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-AVX-NEXT: vmovss %xmm0, (%esp) +; X86-AVX-NEXT: flds (%esp) +; X86-AVX-NEXT: fistpll (%esp) +; X86-AVX-NEXT: movl (%esp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: testmsxs: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: cvtss2si %xmm0, %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: testmsxs: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: vcvtss2si %xmm0, %rax +; X64-AVX-NEXT: retq entry: %0 = tail call i64 @llvm.llrint.f32(float %x) ret i64 %0 } define i64 @testmsxd(double %x) { -; X86-LABEL: testmsxd: -; X86: # %bb.0: # %entry -; X86-NEXT: subl $8, %esp -; X86-NEXT: .cfi_def_cfa_offset 12 -; X86-NEXT: fldl {{[0-9]+}}(%esp) -; X86-NEXT: fstpl (%esp) -; X86-NEXT: calll llrint -; X86-NEXT: addl $8, %esp -; X86-NEXT: .cfi_def_cfa_offset 4 -; X86-NEXT: retl +; X86-NOSSE-LABEL: testmsxd: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: .cfi_offset %ebp, -8 +; X86-NOSSE-NEXT: movl %esp, %ebp +; X86-NOSSE-NEXT: .cfi_def_cfa_register %ebp +; X86-NOSSE-NEXT: andl $-8, %esp +; X86-NOSSE-NEXT: subl $8, %esp +; X86-NOSSE-NEXT: fldl 8(%ebp) +; X86-NOSSE-NEXT: fistpll (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl %ebp, %esp +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: .cfi_def_cfa %esp, 4 +; X86-NOSSE-NEXT: retl ; -; SSE2-LABEL: testmsxd: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: subl $8, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 12 -; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE2-NEXT: movsd %xmm0, (%esp) -; SSE2-NEXT: calll llrint -; SSE2-NEXT: addl $8, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 4 -; SSE2-NEXT: retl +; X86-SSE2-LABEL: testmsxd: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: pushl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 +; X86-SSE2-NEXT: .cfi_offset %ebp, -8 +; X86-SSE2-NEXT: movl %esp, %ebp +; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp +; X86-SSE2-NEXT: andl $-8, %esp +; X86-SSE2-NEXT: subl $8, %esp +; X86-SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-SSE2-NEXT: movsd %xmm0, (%esp) +; X86-SSE2-NEXT: fldl (%esp) +; X86-SSE2-NEXT: fistpll (%esp) +; X86-SSE2-NEXT: movl (%esp), %eax +; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE2-NEXT: movl %ebp, %esp +; X86-SSE2-NEXT: popl %ebp +; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 +; X86-SSE2-NEXT: retl ; -; X64-LABEL: testmsxd: -; X64: # %bb.0: # %entry -; X64-NEXT: jmp llrint # TAILCALL +; X86-AVX-LABEL: testmsxd: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: pushl %ebp +; X86-AVX-NEXT: .cfi_def_cfa_offset 8 +; X86-AVX-NEXT: .cfi_offset %ebp, -8 +; X86-AVX-NEXT: movl %esp, %ebp +; X86-AVX-NEXT: .cfi_def_cfa_register %ebp +; X86-AVX-NEXT: andl $-8, %esp +; X86-AVX-NEXT: subl $8, %esp +; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X86-AVX-NEXT: vmovsd %xmm0, (%esp) +; X86-AVX-NEXT: fldl (%esp) +; X86-AVX-NEXT: fistpll (%esp) +; X86-AVX-NEXT: movl (%esp), %eax +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-AVX-NEXT: movl %ebp, %esp +; X86-AVX-NEXT: popl %ebp +; X86-AVX-NEXT: .cfi_def_cfa %esp, 4 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: testmsxd: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: cvtsd2si %xmm0, %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: testmsxd: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: vcvtsd2si %xmm0, %rax +; X64-AVX-NEXT: retq entry: %0 = tail call i64 @llvm.llrint.f64(double %x) ret i64 %0 @@ -68,29 +156,28 @@ define i64 @testmsll(x86_fp80 %x) { ; X86-LABEL: testmsll: ; X86: # %bb.0: # %entry -; X86-NEXT: subl $12, %esp -; X86-NEXT: .cfi_def_cfa_offset 16 -; X86-NEXT: fldt {{[0-9]+}}(%esp) -; X86-NEXT: fstpt (%esp) -; X86-NEXT: calll llrintl -; X86-NEXT: addl $12, %esp -; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: fldt 8(%ebp) +; X86-NEXT: fistpll (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 ; X86-NEXT: retl ; -; SSE2-LABEL: testmsll: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: subl $12, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 16 -; SSE2-NEXT: fldt {{[0-9]+}}(%esp) -; SSE2-NEXT: fstpt (%esp) -; SSE2-NEXT: calll llrintl -; SSE2-NEXT: addl $12, %esp -; SSE2-NEXT: .cfi_def_cfa_offset 4 -; SSE2-NEXT: retl -; ; X64-LABEL: testmsll: ; X64: # %bb.0: # %entry -; X64-NEXT: jmp llrintl # TAILCALL +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fistpll -{{[0-9]+}}(%rsp) +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; X64-NEXT: retq entry: %0 = tail call i64 @llvm.llrint.f80(x86_fp80 %x) ret i64 %0 diff --git a/llvm/test/CodeGen/X86/lrint-conv-i32.ll b/llvm/test/CodeGen/X86/lrint-conv-i32.ll --- a/llvm/test/CodeGen/X86/lrint-conv-i32.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i32.ll @@ -1,30 +1,102 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=CHECK,X86 -; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,X64 +; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE +; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2 +; RUN: llc < %s -mtriple=i686-unknown -mattr=avx | FileCheck %s --check-prefixes=X86,X86-AVX +; RUN: llc < %s -mtriple=i686-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X86,X86-AVX +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=X64,X64-AVX define i32 @testmsws(float %x) { -; CHECK-LABEL: testmsws: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lrintf # TAILCALL +; X86-NOSSE-LABEL: testmsws: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: flds {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fistpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: testmsws: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: cvtss2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: testmsws: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: vcvtss2si {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: testmsws: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: cvtss2si %xmm0, %eax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: testmsws: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: vcvtss2si %xmm0, %eax +; X64-AVX-NEXT: retq entry: %0 = tail call i32 @llvm.lrint.i32.f32(float %x) ret i32 %0 } define i32 @testmswd(double %x) { -; CHECK-LABEL: testmswd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lrint # TAILCALL +; X86-NOSSE-LABEL: testmswd: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %eax +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 +; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fistpl (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: popl %ecx +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 +; X86-NOSSE-NEXT: retl +; +; X86-SSE2-LABEL: testmswd: +; X86-SSE2: # %bb.0: # %entry +; X86-SSE2-NEXT: cvtsd2si {{[0-9]+}}(%esp), %eax +; X86-SSE2-NEXT: retl +; +; X86-AVX-LABEL: testmswd: +; X86-AVX: # %bb.0: # %entry +; X86-AVX-NEXT: vcvtsd2si {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: testmswd: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: cvtsd2si %xmm0, %eax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: testmswd: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: vcvtsd2si %xmm0, %eax +; X64-AVX-NEXT: retq entry: %0 = tail call i32 @llvm.lrint.i32.f64(double %x) ret i32 %0 } define i32 @testmsll(x86_fp80 %x) { -; CHECK-LABEL: testmsll: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lrintl # TAILCALL +; X86-LABEL: testmsll: +; X86: # %bb.0: # %entry +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fistpl (%esp) +; X86-NEXT: movl (%esp), %eax +; X86-NEXT: popl %ecx +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: testmsll: +; X64: # %bb.0: # %entry +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fistpl -{{[0-9]+}}(%rsp) +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: retq entry: %0 = tail call i32 @llvm.lrint.i32.f80(x86_fp80 %x) ret i32 %0 diff --git a/llvm/test/CodeGen/X86/lrint-conv-i64.ll b/llvm/test/CodeGen/X86/lrint-conv-i64.ll --- a/llvm/test/CodeGen/X86/lrint-conv-i64.ll +++ b/llvm/test/CodeGen/X86/lrint-conv-i64.ll @@ -1,19 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=avx512f | FileCheck %s --check-prefixes=CHECK,AVX define i64 @testmsxs(float %x) { -; CHECK-LABEL: testmsxs: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lrintf # TAILCALL +; SSE-LABEL: testmsxs: +; SSE: # %bb.0: # %entry +; SSE-NEXT: cvtss2si %xmm0, %rax +; SSE-NEXT: retq +; +; AVX-LABEL: testmsxs: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtss2si %xmm0, %rax +; AVX-NEXT: retq entry: %0 = tail call i64 @llvm.lrint.i64.f32(float %x) ret i64 %0 } define i64 @testmsxd(double %x) { -; CHECK-LABEL: testmsxd: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lrint # TAILCALL +; SSE-LABEL: testmsxd: +; SSE: # %bb.0: # %entry +; SSE-NEXT: cvtsd2si %xmm0, %rax +; SSE-NEXT: retq +; +; AVX-LABEL: testmsxd: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vcvtsd2si %xmm0, %rax +; AVX-NEXT: retq entry: %0 = tail call i64 @llvm.lrint.i64.f64(double %x) ret i64 %0 @@ -22,7 +36,10 @@ define i64 @testmsll(x86_fp80 %x) { ; CHECK-LABEL: testmsll: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: jmp lrintl # TAILCALL +; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) +; CHECK-NEXT: fistpll -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: retq entry: %0 = tail call i64 @llvm.lrint.i64.f80(x86_fp80 %x) ret i64 %0