diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1561,6 +1561,9 @@ SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, + SDValue &Chain) const; + SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2167,6 +2167,14 @@ setOperationAction(ISD::UDIV, MVT::i128, Custom); setOperationAction(ISD::SREM, MVT::i128, Custom); setOperationAction(ISD::UREM, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom); + setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom); + setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom); } // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)` @@ -20445,6 +20453,9 @@ MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); + if (Subtarget.isTargetWin64() && SrcVT == MVT::i128) + return LowerWin64_INT128_TO_FP(Op, DAG); + if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) return Extract; @@ -20944,6 +20955,9 @@ if (DstVT.isVector()) return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); + if (Subtarget.isTargetWin64() && SrcVT == MVT::i128) + return LowerWin64_INT128_TO_FP(Op, DAG); + if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) return Extract; @@ -28686,6 +28700,77 @@ return DAG.getBitcast(VT, CallInfo.first); } +SDValue X86TargetLowering::LowerWin64_FP_TO_INT128(SDValue Op, + SelectionDAG &DAG, + SDValue &Chain) const { + assert(Subtarget.isTargetWin64() && "Unexpected target"); + EVT VT = Op.getValueType(); + bool IsStrict = Op->isStrictFPOpcode(); + + SDValue Arg = Op.getOperand(IsStrict ? 1 : 0); + EVT ArgVT = Arg.getValueType(); + + assert(VT.isInteger() && VT.getSizeInBits() == 128 && + "Unexpected return type for lowering"); + + RTLIB::Libcall LC; + if (Op->getOpcode() == ISD::FP_TO_SINT || + Op->getOpcode() == ISD::STRICT_FP_TO_SINT) + LC = RTLIB::getFPTOSINT(ArgVT, VT); + else + LC = RTLIB::getFPTOUINT(ArgVT, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!"); + + SDLoc dl(Op); + MakeLibCallOptions CallOptions; + Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); + + SDValue Result; + // Expect the i128 argument returned as a v2i64 in xmm0, cast back to the + // expected VT (i128). + std::tie(Result, Chain) = + makeLibCall(DAG, LC, MVT::v2i64, Arg, CallOptions, dl, Chain); + Result = DAG.getBitcast(VT, Result); + return Result; +} + +SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op, + SelectionDAG &DAG) const { + assert(Subtarget.isTargetWin64() && "Unexpected target"); + EVT VT = Op.getValueType(); + bool IsStrict = Op->isStrictFPOpcode(); + + SDValue Arg = Op.getOperand(IsStrict ? 1 : 0); + EVT ArgVT = Arg.getValueType(); + + assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && + "Unexpected argument type for lowering"); + + RTLIB::Libcall LC; + if (Op->getOpcode() == ISD::SINT_TO_FP || + Op->getOpcode() == ISD::STRICT_SINT_TO_FP) + LC = RTLIB::getSINTTOFP(ArgVT, VT); + else + LC = RTLIB::getUINTTOFP(ArgVT, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!"); + + SDLoc dl(Op); + MakeLibCallOptions CallOptions; + SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); + + // Pass the i128 argument as an indirect argument on the stack. + SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + Chain = DAG.getStore(Chain, dl, Arg, StackPtr, MPI, Align(16)); + + SDValue Result; + std::tie(Result, Chain) = + makeLibCall(DAG, LC, VT, StackPtr, CallOptions, dl, Chain); + return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result; +} + // Return true if the required (according to Opcode) shift-imm form is natively // supported by the Subtarget static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget, @@ -31665,6 +31750,15 @@ return; } + if (VT == MVT::i128 && Subtarget.isTargetWin64()) { + SDValue Chain; + SDValue V = LowerWin64_FP_TO_INT128(SDValue(N, 0), DAG, Chain); + Results.push_back(V); + if (IsStrict) + Results.push_back(Chain); + return; + } + SDValue Chain; if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) { Results.push_back(V); diff --git a/llvm/test/CodeGen/X86/i128-fpconv-win64-strict.ll b/llvm/test/CodeGen/X86/i128-fpconv-win64-strict.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/i128-fpconv-win64-strict.ll @@ -0,0 +1,208 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=WIN64 + +define i64 @double_to_i128(double %d) nounwind { +; WIN64-LABEL: double_to_i128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixdfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = tail call i128 @llvm.experimental.constrained.fptosi.i128.f64(double %d, metadata !"fpexcept.strict") + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @double_to_ui128(double %d) nounwind { +; WIN64-LABEL: double_to_ui128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixunsdfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = tail call i128 @llvm.experimental.constrained.fptoui.i128.f64(double %d, metadata !"fpexcept.strict") + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @float_to_i128(float %d) nounwind { +; WIN64-LABEL: float_to_i128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixsfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = tail call i128 @llvm.experimental.constrained.fptosi.i128.f32(float %d, metadata !"fpexcept.strict") + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @float_to_ui128(float %d) nounwind { +; WIN64-LABEL: float_to_ui128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixunssfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = tail call i128 @llvm.experimental.constrained.fptoui.i128.f32(float %d, metadata !"fpexcept.strict") + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @longdouble_to_i128(x86_fp80* nocapture readonly %0) nounwind { +; WIN64-LABEL: longdouble_to_i128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: fldt (%rcx) +; WIN64-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __fixxfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load x86_fp80, x86_fp80* %0, align 16 + %3 = tail call i128 @llvm.experimental.constrained.fptosi.i128.f80(x86_fp80 %2, metadata !"fpexcept.strict") + %4 = trunc i128 %3 to i64 + ret i64 %4 +} + +define i64 @longdouble_to_ui128(x86_fp80* nocapture readonly %0) nounwind { +; WIN64-LABEL: longdouble_to_ui128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: fldt (%rcx) +; WIN64-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __fixunsxfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load x86_fp80, x86_fp80* %0, align 16 + %3 = tail call i128 @llvm.experimental.constrained.fptoui.i128.f80(x86_fp80 %2, metadata !"fpexcept.strict") + %4 = trunc i128 %3 to i64 + ret i64 %4 +} + +define double @i128_to_double(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: i128_to_double: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floattidf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = tail call double @llvm.experimental.constrained.sitofp.f64.i128(i128 %2, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret double %3 +} + +define double @ui128_to_double(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: ui128_to_double: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floatuntidf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = tail call double @llvm.experimental.constrained.uitofp.f64.i128(i128 %2, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret double %3 +} + +define float @i128_to_float(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: i128_to_float: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floattisf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = tail call float @llvm.experimental.constrained.sitofp.f32.i128(i128 %2, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %3 +} + +define float @ui128_to_float(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: ui128_to_float: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floatuntisf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = tail call float @llvm.experimental.constrained.uitofp.f32.i128(i128 %2, metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %3 +} + +define void @i128_to_longdouble(x86_fp80* noalias nocapture sret(x86_fp80) align 16 %agg.result, i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: i128_to_longdouble: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: subq $64, %rsp +; WIN64-NEXT: movq %rcx, %rsi +; WIN64-NEXT: movaps (%rdx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __floattixf +; WIN64-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN64-NEXT: fstpt (%rsi) +; WIN64-NEXT: movq %rsi, %rax +; WIN64-NEXT: addq $64, %rsp +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = tail call x86_fp80 @llvm.experimental.constrained.sitofp.f80.i128(i128 %2, metadata !"round.dynamic", metadata !"fpexcept.strict") + store x86_fp80 %3, x86_fp80* %agg.result, align 16 + ret void +} + +define void @ui128_to_longdouble(x86_fp80* noalias nocapture sret(x86_fp80) align 16 %agg.result, i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: ui128_to_longdouble: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: subq $64, %rsp +; WIN64-NEXT: movq %rcx, %rsi +; WIN64-NEXT: movaps (%rdx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __floatuntixf +; WIN64-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN64-NEXT: fstpt (%rsi) +; WIN64-NEXT: movq %rsi, %rax +; WIN64-NEXT: addq $64, %rsp +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = tail call x86_fp80 @llvm.experimental.constrained.uitofp.f80.i128(i128 %2, metadata !"round.dynamic", metadata !"fpexcept.strict") + store x86_fp80 %3, x86_fp80* %agg.result, align 16 + ret void +} + +declare i128 @llvm.experimental.constrained.fptosi.i128.f64(double, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f64(double, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f32(float, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f32(float, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f80(x86_fp80, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f80(x86_fp80, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i128(i128, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i128(i128, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i128(i128, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i128(i128, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.sitofp.f80.i128(i128, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.uitofp.f80.i128(i128, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/i128-fpconv-win64.ll b/llvm/test/CodeGen/X86/i128-fpconv-win64.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/i128-fpconv-win64.ll @@ -0,0 +1,195 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=WIN64 +; RUN: llc < %s -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=WIN64 + +define i64 @double_to_i128(double %d) nounwind { +; WIN64-LABEL: double_to_i128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixdfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = fptosi double %d to i128 + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @double_to_ui128(double %d) nounwind { +; WIN64-LABEL: double_to_ui128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixunsdfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = fptoui double %d to i128 + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @float_to_i128(float %d) nounwind { +; WIN64-LABEL: float_to_i128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixsfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = fptosi float %d to i128 + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @float_to_ui128(float %d) nounwind { +; WIN64-LABEL: float_to_ui128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $40, %rsp +; WIN64-NEXT: callq __fixunssfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $40, %rsp +; WIN64-NEXT: retq + %1 = fptoui float %d to i128 + %2 = trunc i128 %1 to i64 + ret i64 %2 +} + +define i64 @longdouble_to_i128(x86_fp80* nocapture readonly %0) nounwind { +; WIN64-LABEL: longdouble_to_i128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: fldt (%rcx) +; WIN64-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __fixxfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load x86_fp80, x86_fp80* %0, align 16 + %3 = fptosi x86_fp80 %2 to i128 + %4 = trunc i128 %3 to i64 + ret i64 %4 +} + +define i64 @longdouble_to_ui128(x86_fp80* nocapture readonly %0) nounwind { +; WIN64-LABEL: longdouble_to_ui128: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: fldt (%rcx) +; WIN64-NEXT: fstpt {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __fixunsxfti +; WIN64-NEXT: movq %xmm0, %rax +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load x86_fp80, x86_fp80* %0, align 16 + %3 = fptoui x86_fp80 %2 to i128 + %4 = trunc i128 %3 to i64 + ret i64 %4 +} + +define double @i128_to_double(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: i128_to_double: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floattidf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = sitofp i128 %2 to double + ret double %3 +} + +define double @ui128_to_double(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: ui128_to_double: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floatuntidf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = uitofp i128 %2 to double + ret double %3 +} + +define float @i128_to_float(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: i128_to_float: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floattisf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = sitofp i128 %2 to float + ret float %3 +} + +define float @ui128_to_float(i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: ui128_to_float: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $56, %rsp +; WIN64-NEXT: movaps (%rcx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: callq __floatuntisf +; WIN64-NEXT: addq $56, %rsp +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = uitofp i128 %2 to float + ret float %3 +} + +define void @i128_to_longdouble(x86_fp80* noalias nocapture sret(x86_fp80) align 16 %agg.result, i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: i128_to_longdouble: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: subq $64, %rsp +; WIN64-NEXT: movq %rcx, %rsi +; WIN64-NEXT: movaps (%rdx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __floattixf +; WIN64-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN64-NEXT: fstpt (%rsi) +; WIN64-NEXT: movq %rsi, %rax +; WIN64-NEXT: addq $64, %rsp +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = sitofp i128 %2 to x86_fp80 + store x86_fp80 %3, x86_fp80* %agg.result, align 16 + ret void +} + +define void @ui128_to_longdouble(x86_fp80* noalias nocapture sret(x86_fp80) align 16 %agg.result, i128* nocapture readonly %0) nounwind { +; WIN64-LABEL: ui128_to_longdouble: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rsi +; WIN64-NEXT: subq $64, %rsp +; WIN64-NEXT: movq %rcx, %rsi +; WIN64-NEXT: movaps (%rdx), %xmm0 +; WIN64-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN64-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN64-NEXT: callq __floatuntixf +; WIN64-NEXT: fldt {{[0-9]+}}(%rsp) +; WIN64-NEXT: fstpt (%rsi) +; WIN64-NEXT: movq %rsi, %rax +; WIN64-NEXT: addq $64, %rsp +; WIN64-NEXT: popq %rsi +; WIN64-NEXT: retq + %2 = load i128, i128* %0, align 16 + %3 = uitofp i128 %2 to x86_fp80 + store x86_fp80 %3, x86_fp80* %agg.result, align 16 + ret void +}