diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -25249,6 +25249,81 @@ modes. +'``llvm.get.fpenv``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare @llvm.get.fpenv() + +Overview: +""""""""" + +The '``llvm.get.fpenv``' intrinsic returns bits of the current floating-point +environment. The return value type is platform-specific. + +Semantics: +"""""""""" + +The '``llvm.get.fpenv``' intrinsic reads the current floating-point environment +and returns it as an integer value. + + +'``llvm.set.fpenv``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.set.fpenv( ) + +Overview: +""""""""" + +The '``llvm.set.fpenv``' intrinsic sets the current floating-point environment. + +Arguments: +"""""""""" + +The argument is an integer representing the new floating-point environment. The +integer type is platform-specific. + +Semantics: +"""""""""" + +The '``llvm.set.fpenv``' intrinsic sets the current floating-point environment +to the state specified by the argument. The state may be previously obtained by a +call to '``llvm.get.fpenv``' or synthesised in a platform-dependent way. + + +'``llvm.reset.fpenv``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare void @llvm.reset.fpenv() + +Overview: +""""""""" + +The '``llvm.reset.fpenv``' intrinsic sets the default floating-point environment. + +Semantics: +"""""""""" + +The '``llvm.reset.fpenv``' intrinsic sets the current floating-point environment +to default state. It is similar to the call 'fesetenv(FE_DFL_ENV)', except it +does not return any value. + + Floating-Point Test Intrinsics ------------------------------ diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -971,6 +971,30 @@ /// FSINCOS - Compute both fsin and fcos as a single operation. FSINCOS, + /// Gets the current floating-point environment. The first operand is a token + /// chain. The results are FP environment, represented by an integer value, + /// and a token chain. + GET_FPENV, + + /// Sets the current floating-point environment. The first operand is a token + /// chain, the second is FP environment, represented by an integer value. The + /// result is a token chain. + SET_FPENV, + + /// Set floating-point environment to default state. The first operand and the + /// result are token chains. + RESET_FPENV, + + /// Gets the current floating-point environment. The first operand is a token + /// chain, the second is a pointer to memory, where FP environment is stored + /// to. The result is a token chain. + GET_FPENV_MEM, + + /// Sets the current floating point environment. The first operand is a token + /// chain, the second is a pointer to memory, where FP environment is loaded + /// from. The result is a token chain. + SET_FPENV_MEM, + /// LOAD and STORE have token chains as their first operand, then the same /// operands as an LLVM load/store instruction, then an offset node that /// is added / subtracted from the base pointer to form the address (for diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1582,6 +1582,11 @@ ISD::MemIndexType IndexType, bool IsTruncating = false); + SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO); + SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO); + /// Construct a node to track a Value* through the backend. SDValue getSrcValue(const Value *v); @@ -2344,6 +2349,9 @@ } } + SDValue makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, SDValue InChain, + const SDLoc &DLoc); + private: void InsertNode(SDNode *N); bool RemoveNodeFromCSEMaps(SDNode *N); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1438,6 +1438,8 @@ case ISD::VP_SCATTER: case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + case ISD::GET_FPENV_MEM: + case ISD::SET_FPENV_MEM: return true; default: return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); @@ -2900,6 +2902,23 @@ } }; +class FPStateAccessSDNode : public MemSDNode { +public: + friend class SelectionDAG; + + FPStateAccessSDNode(unsigned NodeTy, unsigned Order, const DebugLoc &dl, + SDVTList VTs, EVT MemVT, MachineMemOperand *MMO) + : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { + assert((NodeTy == ISD::GET_FPENV_MEM || NodeTy == ISD::SET_FPENV_MEM) && + "Expected FP state access node"); + } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::GET_FPENV_MEM || + N->getOpcode() == ISD::SET_FPENV_MEM; + } +}; + /// An SDNode that represents everything that will be needed /// to construct a MachineInstr. These nodes are created during the /// instruction selection proper phase. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1071,6 +1071,9 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in { def int_get_rounding : DefaultAttrsIntrinsic<[llvm_i32_ty], []>; def int_set_rounding : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>; + def int_get_fpenv : DefaultAttrsIntrinsic<[llvm_anyint_ty], []>; + def int_set_fpenv : DefaultAttrsIntrinsic<[], [llvm_anyint_ty]>; + def int_reset_fpenv : DefaultAttrsIntrinsic<[], []>; } //===--------------- Floating Point Properties ----------------------------===// diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def --- a/llvm/include/llvm/IR/RuntimeLibcalls.def +++ b/llvm/include/llvm/IR/RuntimeLibcalls.def @@ -280,6 +280,10 @@ HANDLE_LIBCALL(LLRINT_F128, "llrintl") HANDLE_LIBCALL(LLRINT_PPCF128, "llrintl") +// Floating point environment +HANDLE_LIBCALL(FEGETENV, "fegetenv") +HANDLE_LIBCALL(FESETENV, "fesetenv") + // Conversion HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq") HANDLE_LIBCALL(FPEXT_F64_PPCF128, "__gcc_dtoq") diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -999,6 +999,10 @@ if (Action != TargetLowering::Promote) Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; + case ISD::SET_FPENV: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); + break; case ISD::FP_TO_FP16: case ISD::FP_TO_BF16: case ISD::SINT_TO_FP: @@ -4460,6 +4464,29 @@ break; } break; + case ISD::RESET_FPENV: { + // It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets + // FE_DFL_ENV is defined as '((const fenv_t *) -1)' in glibc. + SDValue Ptr = DAG.getIntPtrConstant(-1LL, dl); + SDValue Chain = Node->getOperand(0); + Results.push_back( + DAG.makeStateFunctionCall(RTLIB::FESETENV, Ptr, Chain, dl)); + break; + } + case ISD::GET_FPENV_MEM: { + SDValue Chain = Node->getOperand(0); + SDValue EnvPtr = Node->getOperand(1); + Results.push_back( + DAG.makeStateFunctionCall(RTLIB::FEGETENV, EnvPtr, Chain, dl)); + break; + } + case ISD::SET_FPENV_MEM: { + SDValue Chain = Node->getOperand(0); + SDValue EnvPtr = Node->getOperand(1); + Results.push_back( + DAG.makeStateFunctionCall(RTLIB::FESETENV, EnvPtr, Chain, dl)); + break; + } } // Replace the original node with the legalized result. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -9222,6 +9222,60 @@ return V; } +SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, + EVT MemVT, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = {Chain, Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::GET_FPENV_MEM, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + ISD::GET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode(ISD::GET_FPENV_MEM, dl.getIROrder(), + dl.getDebugLoc(), VTs, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, + EVT MemVT, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = {Chain, Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SET_FPENV_MEM, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData( + ISD::SET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode(ISD::SET_FPENV_MEM, dl.getIROrder(), + dl.getDebugLoc(), VTs, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { // select undef, T, F --> T (if T is a constant), otherwise F // select, ?, undef, F --> F @@ -12342,6 +12396,38 @@ } } +/// Helper used to make a call to a library function that has one argument of +/// pointer type. +/// +/// Such functions include 'fegetmode', 'fesetenv' and some others, which are +/// used to get or set floating-point state. They have one argument of pointer +/// type, which points to the memory region containing bits of the +/// floating-point state. The value returned by such function is ignored in the +/// created call. +/// +/// \param LibFunc Reference to library function (value of RTLIB::Libcall). +/// \param Ptr Pointer used to save/load state. +/// \param InChain Ingoing token chain. +/// \returns Outgoing chain token. +SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, + SDValue InChain, + const SDLoc &DLoc) { + assert(InChain.getValueType() == MVT::Other && "Expected token chain"); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Ptr; + Entry.Ty = Ptr.getValueType().getTypeForEVT(*getContext()); + Args.push_back(Entry); + RTLIB::Libcall LC = static_cast(LibFunc); + SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC), + TLI->getPointerTy(getDataLayout())); + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee( + TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee, + std::move(Args)); + return TLI->LowerCallTo(CLI).second; +} + void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { assert(From && To && "Invalid SDNode; empty source SDValue?"); auto I = SDEI.find(From); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6571,6 +6571,64 @@ setValue(&I, V); return; } + case Intrinsic::get_fpenv: { + const DataLayout DLayout = DAG.getDataLayout(); + EVT EnvVT = TLI.getValueType(DLayout, I.getType()); + Align TempAlign = DAG.getEVTAlign(EnvVT); + SDValue Chain = DAG.getRoot(); + // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node + // and temporary storage in stack. + if (TLI.isOperationLegalOrCustom(ISD::SET_FPENV, EnvVT)) { + Res = DAG.getNode( + ISD::GET_FPENV, sdl, + DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()), + MVT::Other), + Chain); + } else { + SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value()); + int SPFI = cast(Temp.getNode())->getIndex(); + auto MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, + TempAlign); + Chain = DAG.getGetFPEnv(Chain, sdl, Temp, EnvVT, MMO); + Res = DAG.getLoad(EnvVT, sdl, Chain, Temp, MPI); + } + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } + case Intrinsic::set_fpenv: { + const DataLayout DLayout = DAG.getDataLayout(); + SDValue Env = getValue(I.getArgOperand(0)); + EVT EnvVT = Env.getValueType(); + Align TempAlign = DAG.getEVTAlign(EnvVT); + SDValue Chain = getRoot(); + // If SET_FPENV is custom or legal, use it. Otherwise use loading + // environment from memory. + if (TLI.isOperationLegalOrCustom(ISD::SET_FPENV, EnvVT)) { + Chain = DAG.getNode(ISD::SET_FPENV, sdl, MVT::Other, Chain, Env); + } else { + // Allocate space in stack, copy environment bits into it and use this + // memory in SET_FPENV_MEM. + SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value()); + int SPFI = cast(Temp.getNode())->getIndex(); + auto MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + Chain = DAG.getStore(Chain, sdl, Env, Temp, MPI, TempAlign, + MachineMemOperand::MOStore); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, + TempAlign); + Chain = DAG.getSetFPEnv(Chain, sdl, Temp, EnvVT, MMO); + } + DAG.setRoot(Chain); + return; + } + case Intrinsic::reset_fpenv: + DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot())); + return; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -431,6 +431,11 @@ // Floating point environment manipulation case ISD::GET_ROUNDING: return "get_rounding"; case ISD::SET_ROUNDING: return "set_rounding"; + case ISD::GET_FPENV: return "get_fpenv"; + case ISD::SET_FPENV: return "set_fpenv"; + case ISD::RESET_FPENV: return "reset_fpenv"; + case ISD::GET_FPENV_MEM: return "get_fpenv_mem"; + case ISD::SET_FPENV_MEM: return "set_fpenv_mem"; // Bit manipulation case ISD::ABS: return "abs"; diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -879,6 +879,11 @@ #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ setOperationAction(ISD::SDOPC, VT, Expand); #include "llvm/IR/VPIntrinsics.def" + + // FP environment operations default to expand. + setOperationAction(ISD::GET_FPENV, VT, Expand); + setOperationAction(ISD::SET_FPENV, VT, Expand); + setOperationAction(ISD::RESET_FPENV, VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -909,6 +914,9 @@ setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand); + + setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Expand); + setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Expand); } MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, diff --git a/llvm/test/CodeGen/ARM/fpenv.ll b/llvm/test/CodeGen/ARM/fpenv.ll --- a/llvm/test/CodeGen/ARM/fpenv.ll +++ b/llvm/test/CodeGen/ARM/fpenv.ll @@ -61,5 +61,59 @@ ret void } +define i32 @get_fpenv_01() #0 { +; CHECK-LABEL: get_fpenv_01: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: add r0, sp, #4 +; CHECK-NEXT: bl fegetenv +; CHECK-NEXT: ldr r0, [sp, #4] +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + %fpenv = call i32 @llvm.get.fpenv.i32() + ret i32 %fpenv +} + +define void @set_fpenv_01(i32 %fpenv) #0 { +; CHECK-LABEL: set_fpenv_01: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, sp, #8 +; CHECK-NEXT: str r0, [sp, #4] +; CHECK-NEXT: add r0, sp, #4 +; CHECK-NEXT: bl fesetenv +; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.set.fpenv.i32(i32 %fpenv) + ret void +} + +define void @reset_fpenv_01() #0 { +; CHECK-LABEL: reset_fpenv_01: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r11, lr} +; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: mvn r0, #0 +; CHECK-NEXT: bl fesetenv +; CHECK-NEXT: pop {r11, lr} +; CHECK-NEXT: mov pc, lr +entry: + call void @llvm.reset.fpenv() + ret void +} + +attributes #0 = { nounwind "use-soft-float"="true" } declare void @llvm.set.rounding(i32) +declare i32 @llvm.get.fpenv.i32() +declare void @llvm.set.fpenv.i32(i32 %fpenv) +declare void @llvm.reset.fpenv() diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll --- a/llvm/test/CodeGen/X86/fpenv.ll +++ b/llvm/test/CodeGen/X86/fpenv.ll @@ -4,6 +4,9 @@ ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s -check-prefix=X64 declare void @llvm.set.rounding(i32 %x) +declare i256 @llvm.get.fpenv.i256() +declare void @llvm.set.fpenv.i256(i256 %fpenv) +declare void @llvm.reset.fpenv() define void @func_01() nounwind { ; X86-NOSSE-LABEL: func_01: @@ -242,3 +245,230 @@ call void @llvm.set.rounding(i32 %x) ; Downward ret void } + +define void @get_fpenv_01(ptr %ptr) #0 { +; X86-NOSSE-LABEL: get_fpenv_01: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: pushl %edi +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: subl $60, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll fegetenv +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, 24(%esi) +; X86-NOSSE-NEXT: movl %eax, 28(%esi) +; X86-NOSSE-NEXT: movl %ebp, 16(%esi) +; X86-NOSSE-NEXT: movl %ebx, 20(%esi) +; X86-NOSSE-NEXT: movl %edi, 8(%esi) +; X86-NOSSE-NEXT: movl %edx, 12(%esi) +; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOSSE-NEXT: movl %eax, (%esi) +; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOSSE-NEXT: movl %eax, 4(%esi) +; X86-NOSSE-NEXT: addl $60, %esp +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: popl %edi +; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: get_fpenv_01: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $60, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esp) +; X86-SSE-NEXT: calll fegetenv +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: movl %ecx, 24(%esi) +; X86-SSE-NEXT: movl %eax, 28(%esi) +; X86-SSE-NEXT: movl %ebp, 16(%esi) +; X86-SSE-NEXT: movl %ebx, 20(%esi) +; X86-SSE-NEXT: movl %edi, 8(%esi) +; X86-SSE-NEXT: movl %edx, 12(%esi) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, (%esi) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, 4(%esi) +; X86-SSE-NEXT: addl $60, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl +; +; X64-LABEL: get_fpenv_01: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $32, %rsp +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: callq fegetenv@PLT +; X64-NEXT: movq (%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: movq %rsi, 16(%rbx) +; X64-NEXT: movq %rdx, 24(%rbx) +; X64-NEXT: movq %rax, (%rbx) +; X64-NEXT: movq %rcx, 8(%rbx) +; X64-NEXT: addq $32, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: retq +entry: + %env = call i256 @llvm.get.fpenv.i256() + store i256 %env, ptr %ptr + ret void +} + +define void @set_fpenv_01(ptr %ptr) #0 { +; X86-NOSSE-LABEL: set_fpenv_01: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: pushl %ebp +; X86-NOSSE-NEXT: pushl %ebx +; X86-NOSSE-NEXT: pushl %edi +; X86-NOSSE-NEXT: pushl %esi +; X86-NOSSE-NEXT: subl $44, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl (%eax), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOSSE-NEXT: movl 4(%eax), %edx +; X86-NOSSE-NEXT: movl 12(%eax), %esi +; X86-NOSSE-NEXT: movl 8(%eax), %edi +; X86-NOSSE-NEXT: movl 20(%eax), %ebx +; X86-NOSSE-NEXT: movl 16(%eax), %ebp +; X86-NOSSE-NEXT: movl 28(%eax), %ecx +; X86-NOSSE-NEXT: movl 24(%eax), %eax +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl %eax, (%esp) +; X86-NOSSE-NEXT: calll fesetenv +; X86-NOSSE-NEXT: addl $44, %esp +; X86-NOSSE-NEXT: popl %esi +; X86-NOSSE-NEXT: popl %edi +; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: set_fpenv_01: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: pushl %ebp +; X86-SSE-NEXT: pushl %ebx +; X86-SSE-NEXT: pushl %edi +; X86-SSE-NEXT: pushl %esi +; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl (%eax), %ecx +; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-SSE-NEXT: movl 4(%eax), %edx +; X86-SSE-NEXT: movl 12(%eax), %esi +; X86-SSE-NEXT: movl 8(%eax), %edi +; X86-SSE-NEXT: movl 20(%eax), %ebx +; X86-SSE-NEXT: movl 16(%eax), %ebp +; X86-SSE-NEXT: movl 28(%eax), %ecx +; X86-SSE-NEXT: movl 24(%eax), %eax +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl %eax, (%esp) +; X86-SSE-NEXT: calll fesetenv +; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: popl %esi +; X86-SSE-NEXT: popl %edi +; X86-SSE-NEXT: popl %ebx +; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: retl +; +; X64-LABEL: set_fpenv_01: +; X64: # %bb.0: # %entry +; X64-NEXT: subq $40, %rsp +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 24(%rdi), %rdx +; X64-NEXT: movq 16(%rdi), %rsi +; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq fesetenv@PLT +; X64-NEXT: addq $40, %rsp +; X64-NEXT: retq +entry: + %env = load i256, ptr %ptr + call void @llvm.set.fpenv.i256(i256 %env) + ret void +} + + +define void @reset_fpenv_01() #0 { +; X86-NOSSE-LABEL: reset_fpenv_01: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: movl $-1, (%esp) +; X86-NOSSE-NEXT: calll fesetenv +; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: reset_fpenv_01: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: movl $-1, (%esp) +; X86-SSE-NEXT: calll fesetenv +; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: retl +; +; X64-LABEL: reset_fpenv_01: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rax +; X64-NEXT: movq $-1, %rdi +; X64-NEXT: callq fesetenv@PLT +; X64-NEXT: popq %rax +; X64-NEXT: retq +entry: + call void @llvm.reset.fpenv() + ret void +} + +attributes #0 = { nounwind "use-soft-float"="true" }