diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -779,6 +779,12 @@ // Store FP control world into i16 memory. FNSTCW16m, + // Store x87 FPU environment into memory. + FNSTENV, + + // Load x87 FPU environment from memory. + FLDENV, + /// This instruction implements FP_TO_SINT with the /// integer destination in memory and a FP reg source. This corresponds /// to the X86::FIST*m instructions and the rounding mode change stuff. It @@ -1511,6 +1517,9 @@ SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -332,6 +332,9 @@ setOperationAction(ISD::FREM , MVT::f80 , Expand); setOperationAction(ISD::FREM , MVT::f128 , Expand); setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); + setOperationAction(ISD::GET_FPENV , MVT::Other, Custom); + setOperationAction(ISD::SET_FPENV , MVT::Other, Custom); + setOperationAction(ISD::RESET_FPENV , MVT::Other, Custom); // Promote the i8 variants and force them on up to i32 which has a shorter // encoding. @@ -2007,6 +2010,7 @@ setTargetDAGCombine(ISD::FP_EXTEND); setTargetDAGCombine(ISD::STRICT_FP_EXTEND); setTargetDAGCombine(ISD::FP_ROUND); + setTargetDAGCombine(ISD::SET_FPENV); computeRegisterProperties(Subtarget.getRegisterInfo()); @@ -26479,6 +26483,223 @@ return DAG.getMergeValues({RetVal, Chain}, DL); } +const unsigned X87StateSize = 28; +const unsigned FPStateSize = 32; +const unsigned FPStateSizeInBits = FPStateSize * 8; + +/// Lowers GET_FPENV node. +/// +/// \param[in] Op GET_FPENV node to lower. +/// \param[in] Store If not nullptr points to the STORE node that saves the +/// environment read by the GET_FPENV node into memory. +/// +/// There are two main cases when this function is used: when GET_FPENV is +/// lowered in selector, and when DAGCombiner works. They are distinguished by +/// the value of Store. If lowering is made by DAGCombiner, Store is not null. +SDValue lowerGetFPEnv(SDValue Op, SelectionDAG &DAG, StoreSDNode *Store) { + assert(Op->getOpcode() == ISD::GET_FPENV); + MachineFunction &MF = DAG.getMachineFunction(); + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + EVT EnvTy = Op.getValueType(); + assert(EnvTy.isInteger()); + assert(EnvTy.getSizeInBits() == FPStateSizeInBits); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + + SDValue Memory; + MachineMemOperand *MMO; + if (Store) { + // Save FP environment to store location. + assert(Store->getMemoryVT() == EnvTy); + assert(Store->getValue() == Op.getValue(0)); + Memory = Store->getBasePtr(); + MMO = Store->getMemOperand(); + } else { + // Save FP environment to stack slot. + Align StkAlign = DAG.getDataLayout().getStackAlignment(); + int SSFI = + MF.getFrameInfo().CreateStackObject(FPStateSize, StkAlign, false); + Memory = DAG.getFrameIndex(SSFI, PtrVT); + MMO = MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, SSFI), + MachineMemOperand::MOStore, X87StateSize, + StkAlign); + } + + // Get x87 state. + Chain = + DAG.getMemIntrinsicNode(X86ISD::FNSTENV, DL, DAG.getVTList(MVT::Other), + {Chain, Memory}, MVT::i32, MMO); + // FNSTENV changes the exception mask, so load back the stored environment. + MachineMemOperand::Flags NewFlags = + MachineMemOperand::MOLoad | + (MMO->getFlags() & ~MachineMemOperand::MOStore); + MMO = MF.getMachineMemOperand(MMO, NewFlags); + Chain = DAG.getMemIntrinsicNode(X86ISD::FLDENV, DL, DAG.getVTList(MVT::Other), + {Chain, Memory}, MVT::i32, MMO); + + // If target supports SSE, get MXCSR as well. + if (DAG.getSubtarget().hasFeature(X86::FeatureSSE1)) { + SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Memory, + DAG.getConstant(X87StateSize, DL, PtrVT)); + // Store MXCSR into memory. + Chain = DAG.getNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, + DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32), + MXCSRAddr); + } + + // The loaded environment may be used in operation other than the store, so + // load the environment into a value. + MMO = MF.getMachineMemOperand(MMO, 0, FPStateSize); + SDValue LoadedValue = DAG.getLoad(EnvTy, DL, Chain, Memory, MMO); + + // If this is lowering of GET_FPENV, the loaded value replaces the original + // node. + if (!Store) + return LoadedValue; + + // If this is DAG optimization, the returned value replaces STORE node. In + // this case all uses of GET_FPENV must be replaced here. + DAG.ReplaceAllUsesOfValueWith(Op, LoadedValue); + return Chain; +} + +static SDValue lowerSetFPEnv(SDValue Env, SDValue Chain, SDLoc DL, + MachineMemOperand *MMO, SelectionDAG &DAG) { + EVT PtrVT = Env.getValueType(); + + // Load x87 FP environment. + Chain = DAG.getMemIntrinsicNode(X86ISD::FLDENV, DL, DAG.getVTList(MVT::Other), + {Chain, Env}, MVT::i32, MMO); + + // If target supports SSE, set MXCSR as well. + if (DAG.getSubtarget().hasFeature(X86::FeatureSSE1)) { + // Get pointer to the MXCSR location in memory. + SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Env, + DAG.getConstant(X87StateSize, DL, PtrVT)); + // Load MXCSR from memory. + Chain = DAG.getNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, + DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32), + MXCSRAddr); + } + + return Chain; +} + +// Check if the GET_FPENV node can be merged with the given STORE that writes +// the result of GET_FPENV to memory. +// This is possible if the chain between GET_FPENV and STORE does not contain +// load/store operations accessing the same memory that the STORE uses. +static bool canStoreBeMergedWithGetFPEnv(StoreSDNode *Store, SDNode *FPEnv) { + // FIXME: This is the simplest case. More general implementation would scan + // the chain. + return Store->getChain() == SDValue(FPEnv, 1); +} + +// Checks if the LOAD that reads FP state can be merged with SET_FPENV that uses +// it. +// This is possible if the chain between LOAD and SET_FPENV does not contain +// load/store operations accessing the same memory that the load uses. +static bool canLoadBeMergedWithSetFPEnv(LoadSDNode *Load, SDNode *SetEnv) { + // FIXME: This is the simplest case. More general implementation would scan + // the chain. + SDValue Chain = SetEnv->getOperand(1); + assert(Chain->getValueType(1) == MVT::Other); + return Chain.getValue(1) == SDValue(Load, 1); +} + +SDValue X86TargetLowering::LowerGET_FPENV(SDValue Op, SelectionDAG &DAG) const { + return lowerGetFPEnv(Op, DAG, nullptr); +} + +SDValue X86TargetLowering::LowerSET_FPENV(SDValue Op, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + SDValue Env = Op.getOperand(1); + EVT EnvTy = Env.getValueType(); + assert(EnvTy.isInteger()); + assert(EnvTy.getSizeInBits() == FPStateSizeInBits); + + // Save the specified FP environment to stack slot. + Align StkAlign = DAG.getDataLayout().getStackAlignment(); + int SSFI = MF.getFrameInfo().CreateStackObject(FPStateSize, StkAlign, false); + SDValue StackSlot = + DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MPI, MachineMemOperand::MOStore, X87StateSize, StkAlign); + Chain = DAG.getStore(Chain, DL, Env, StackSlot, MPI); + + MMO = MF.getMachineMemOperand(MMO, MachineMemOperand::MOLoad | + ~MachineMemOperand::MOStore); + return lowerSetFPEnv(Env, Chain, DL, MMO, DAG); +} + +SDValue X86TargetLowering::LowerRESET_FPENV(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SDLoc DL(Op); + SDValue Chain = Op.getNode()->getOperand(0); + + const auto &Subtarget = static_cast(DAG.getSubtarget()); + IntegerType *ItemTy = Type::getInt32Ty(*DAG.getContext()); + ArrayType *FPEnvTy = ArrayType::get(ItemTy, 8); + SmallVector FPEnvVals; + + // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to + // nearest. FPU precision is set to 53 bits on Windows and 64 bits otherwise + // for compatibility with glibc. + unsigned X87CW = Subtarget.isTargetWindowsMSVC() ? 0x27F : 0x37F; + FPEnvVals.push_back(ConstantInt::get(ItemTy, X87CW)); + Constant *Zero = ConstantInt::get(ItemTy, 0); + for (unsigned I = 0; I < 6; ++I) + FPEnvVals.push_back(Zero); + + // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear + // all exceptions, sets DAZ and FTZ to 0. + FPEnvVals.push_back(ConstantInt::get(ItemTy, 0x1F80)); + Constant *FPEnvBits = ConstantArray::get(FPEnvTy, FPEnvVals); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue Env = DAG.getConstantPool(FPEnvBits, PtrVT); + MachinePointerInfo MPI = + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MPI, MachineMemOperand::MOStore, X87StateSize, Align(4)); + + return lowerSetFPEnv(Env, Chain, DL, MMO, DAG); +} + +static SDValue combineSET_FPENV(SDNode *N, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + SDLoc DL(N); + SDValue Chain = N->getOperand(0); + SDValue FPEnv = N->getOperand(1); + + // If the FP state is loaded from memory, that memory could be used in + // load state instructions directly. + if (FPEnv->getOpcode() == ISD::LOAD) { + LoadSDNode *Ld = cast(FPEnv); + if (canLoadBeMergedWithSetFPEnv(Ld, N)) + return lowerSetFPEnv(Ld->getBasePtr(), Chain, DL, Ld->getMemOperand(), + DAG); + } + + // Save the specified FP environment to stack slot. + Align StkAlign = DAG.getDataLayout().getStackAlignment(); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + int SSFI = MF.getFrameInfo().CreateStackObject(256, StkAlign, false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + MachineMemOperand::MOStore, X87StateSize, Align(4)); + MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); + Chain = DAG.getStore(Chain, DL, FPEnv, StackSlot, MPI); + + return lowerSetFPEnv(StackSlot, Chain, DL, MMO, DAG); +} + /// Lower a vector CTLZ using native supported vector CTLZ instruction. // // i8/i16 vector implemented using dword LZCNT vector instruction @@ -29622,6 +29843,9 @@ case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); + case ISD::GET_FPENV: return LowerGET_FPENV(Op, DAG); + case ISD::SET_FPENV: return LowerSET_FPENV(Op, DAG); + case ISD::RESET_FPENV: return LowerRESET_FPENV(Op, DAG); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG); case ISD::CTTZ: @@ -30588,6 +30812,11 @@ Results.push_back(V); return; } + case ISD::GET_FPENV: { + SDValue Res = LowerGET_FPENV(SDValue(N, 0), DAG); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + } } } @@ -30675,6 +30904,8 @@ NODE_NAME_CASE(EH_RETURN) NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(FNSTCW16m) + NODE_NAME_CASE(FNSTENV) + NODE_NAME_CASE(FLDENV) NODE_NAME_CASE(LCMPXCHG_DAG) NODE_NAME_CASE(LCMPXCHG8_DAG) NODE_NAME_CASE(LCMPXCHG16_DAG) @@ -44855,6 +45086,32 @@ } } + if (StoredVal->getOpcode() == ISD::GET_FPENV && + canStoreBeMergedWithGetFPEnv(St, StoredVal.getNode())) { + // Scan users of the FP environment. Stores are replaced with memcpy. All + // other users should reference the loaded value. + for (SDNode::use_iterator UI = StoredVal->use_begin(), + UE = StoredVal->use_end(); + UI != UE;) { + SDNode *User = *UI++; + if (User == N) + continue; + if (User->getOpcode() == ISD::STORE) { + StoreSDNode *StUser = cast(User); + SDValue SizeNode = + DAG.getConstant(StUser->getMemoryVT().getStoreSize(), dl, MVT::i32); + SDValue Copy = + DAG.getMemcpy(StUser->getChain(), dl, StUser->getBasePtr(), + St->getBasePtr(), SizeNode, StUser->getAlign(), + /*isVolatile*/ false, /*AlwaysInline=*/true, + /*isTailCall*/ false, StUser->getPointerInfo(), + St->getPointerInfo()); + DAG.ReplaceAllUsesWith(StUser, Copy.getNode()); + } + } + return lowerGetFPEnv(StoredVal, DAG, St); + } + // Turn load->store of MMX types into GPR load/stores. This avoids clobbering // the FP state in cases where an emms may be missing. // A preferable solution to the general problem is to figure out the right @@ -49633,6 +49890,7 @@ case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget); case X86ISD::VBROADCAST_LOAD: return combineVBROADCAST_LOAD(N, DAG, DCI); case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); + case ISD::SET_FPENV: return combineSET_FPENV(N, DAG); } return SDValue(); diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -23,7 +23,7 @@ def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; -def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -35,9 +35,15 @@ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, +def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86FPEnv, [SDNPHasChain, SDNPMayStore, SDNPSideEffect, SDNPMemOperand]>; +def X86fpenv_get : SDNode<"X86ISD::FNSTENV", SDTX86FPEnv, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect, + SDNPMemOperand]>; +def X86fpenv_set : SDNode<"X86ISD::FLDENV", SDTX86FPEnv, + [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, + SDNPMemOperand]>; def X86fstf32 : PatFrag<(ops node:$val, node:$ptr), (X86fst node:$val, node:$ptr), [{ @@ -392,12 +398,14 @@ let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW, FPCW], mayLoad = 1 in { -def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src">; +def FLDENVm : I<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src", + [(X86fpenv_set addr:$src)]>; def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins anymem:$src), "frstor\t$src">; } let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in { -def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst">; +def FSTENVm : I<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst", + [(X86fpenv_get addr:$dst)]>; def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins anymem:$dst), "fnsave\t$dst">; } diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fpenv.ll @@ -0,0 +1,214 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,X86-NOSSE +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,X86-SSE +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,X64 + + +declare i256 @llvm.get.fpenv.i256() +declare void @llvm.set.fpenv.i256(i256 %fpenv) +declare void @llvm.reset.fpenv() + + +; FPEnv is written to the specified location in memory. There must be no +; uses of temporary memory. + +define void @func_01(i8* %fpenv) { +; X86-NOSSE-LABEL: func_01: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: fnstenv (%eax) +; X86-NOSSE-NEXT: fldenv (%eax) +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: func_01: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: fnstenv (%eax) +; X86-SSE-NEXT: fldenv (%eax) +; X86-SSE-NEXT: stmxcsr 28(%eax) +; X86-SSE-NEXT: retl +; +; X64-LABEL: func_01: +; X64: # %bb.0: # %entry +; X64-NEXT: fnstenv (%rdi) +; X64-NEXT: fldenv (%rdi) +; X64-NEXT: stmxcsr 28(%rdi) +; X64-NEXT: retq +entry: + %ptr = bitcast i8* %fpenv to i256* + %env = call i256 @llvm.get.fpenv.i256() + store i256 %env, i256* %ptr + ret void +} + +; FPEnv is written to two location in memory. Hardware must be accessed only +; once, the second location is set by memcpy. + +define void @func_02(i8* %fpenv, i8* %fpenv2) { +; X86-NOSSE-LABEL: func_02: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: fnstenv (%ecx) +; X86-NOSSE-NEXT: fldenv (%ecx) +; X86-NOSSE-NEXT: movl 28(%ecx), %edx +; X86-NOSSE-NEXT: movl %edx, 28(%eax) +; X86-NOSSE-NEXT: movl 24(%ecx), %edx +; X86-NOSSE-NEXT: movl %edx, 24(%eax) +; X86-NOSSE-NEXT: movl 20(%ecx), %edx +; X86-NOSSE-NEXT: movl %edx, 20(%eax) +; X86-NOSSE-NEXT: movl 16(%ecx), %edx +; X86-NOSSE-NEXT: movl %edx, 16(%eax) +; X86-NOSSE-NEXT: movl 12(%ecx), %edx +; X86-NOSSE-NEXT: movl %edx, 12(%eax) +; X86-NOSSE-NEXT: movl 8(%ecx), %edx +; X86-NOSSE-NEXT: movl %edx, 8(%eax) +; X86-NOSSE-NEXT: movl (%ecx), %edx +; X86-NOSSE-NEXT: movl 4(%ecx), %ecx +; X86-NOSSE-NEXT: movl %ecx, 4(%eax) +; X86-NOSSE-NEXT: movl %edx, (%eax) +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: func_02: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-SSE-NEXT: fnstenv (%ecx) +; X86-SSE-NEXT: fldenv (%ecx) +; X86-SSE-NEXT: stmxcsr 28(%ecx) +; X86-SSE-NEXT: movl 28(%ecx), %edx +; X86-SSE-NEXT: movl %edx, 28(%eax) +; X86-SSE-NEXT: movl 24(%ecx), %edx +; X86-SSE-NEXT: movl %edx, 24(%eax) +; X86-SSE-NEXT: movl 20(%ecx), %edx +; X86-SSE-NEXT: movl %edx, 20(%eax) +; X86-SSE-NEXT: movl 16(%ecx), %edx +; X86-SSE-NEXT: movl %edx, 16(%eax) +; X86-SSE-NEXT: movl 12(%ecx), %edx +; X86-SSE-NEXT: movl %edx, 12(%eax) +; X86-SSE-NEXT: movl 8(%ecx), %edx +; X86-SSE-NEXT: movl %edx, 8(%eax) +; X86-SSE-NEXT: movl (%ecx), %edx +; X86-SSE-NEXT: movl 4(%ecx), %ecx +; X86-SSE-NEXT: movl %ecx, 4(%eax) +; X86-SSE-NEXT: movl %edx, (%eax) +; X86-SSE-NEXT: retl +; +; X64-LABEL: func_02: +; X64: # %bb.0: # %entry +; X64-NEXT: fnstenv (%rdi) +; X64-NEXT: fldenv (%rdi) +; X64-NEXT: stmxcsr 28(%rdi) +; X64-NEXT: movq 24(%rdi), %rax +; X64-NEXT: movq %rax, 24(%rsi) +; X64-NEXT: movq 16(%rdi), %rax +; X64-NEXT: movq %rax, 16(%rsi) +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq %rcx, 8(%rsi) +; X64-NEXT: movq %rax, (%rsi) +; X64-NEXT: retq +entry: + %ptr = bitcast i8* %fpenv to i256* + %env = call i256 @llvm.get.fpenv.i256() + store i256 %env, i256* %ptr + %ptr2 = bitcast i8* %fpenv2 to i256* + store i256 %env, i256* %ptr2 + ret void +} + +define i32 @func_03() { +; X86-NOSSE-LABEL: func_03: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: subl $44, %esp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 48 +; X86-NOSSE-NEXT: fnstenv (%esp) +; X86-NOSSE-NEXT: fldenv (%esp) +; X86-NOSSE-NEXT: movl (%esp), %eax +; X86-NOSSE-NEXT: addl $44, %esp +; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: func_03: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 48 +; X86-SSE-NEXT: fnstenv (%esp) +; X86-SSE-NEXT: fldenv (%esp) +; X86-SSE-NEXT: stmxcsr {{[0-9]+}}(%esp) +; X86-SSE-NEXT: movl (%esp), %eax +; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 4 +; X86-SSE-NEXT: retl +; +; X64-LABEL: func_03: +; X64: # %bb.0: # %entry +; X64-NEXT: fnstenv -{{[0-9]+}}(%rsp) +; X64-NEXT: fldenv -{{[0-9]+}}(%rsp) +; X64-NEXT: stmxcsr -{{[0-9]+}}(%rsp) +; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax +; X64-NEXT: retq +entry: + %env = call i256 @llvm.get.fpenv.i256() + %t = trunc i256 %env to i32 + ret i32 %t +} + +define void @func_04(i8* %fpenv) { +; X86-NOSSE-LABEL: func_04: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: fldenv (%eax) +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: func_04: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: fldenv (%eax) +; X86-SSE-NEXT: ldmxcsr 28(%eax) +; X86-SSE-NEXT: retl +; +; X64-LABEL: func_04: +; X64: # %bb.0: # %entry +; X64-NEXT: fldenv (%rdi) +; X64-NEXT: ldmxcsr 28(%rdi) +; X64-NEXT: retq +entry: + %ptr = bitcast i8* %fpenv to i256* + %f = load i256, i256* %ptr + call void @llvm.set.fpenv.i256(i256 %f) + ret void +} + +; CHECK: .LCPI{{.*}}: +; CHECK: .long 895 # 0x37f +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 8064 # 0x1f80 + +define void @func_05() { +; X86-NOSSE-LABEL: func_05: +; X86-NOSSE: # %bb.0: # %entry +; X86-NOSSE-NEXT: fldenv {{\.LCPI.*}} +; X86-NOSSE-NEXT: retl +; +; X86-SSE-LABEL: func_05: +; X86-SSE: # %bb.0: # %entry +; X86-SSE-NEXT: fldenv {{\.LCPI.*}} +; X86-SSE-NEXT: ldmxcsr {{\.LCPI.*}}+28 +; X86-SSE-NEXT: retl +; +; X64-LABEL: func_05: +; X64: # %bb.0: # %entry +; X64-NEXT: fldenv {{.*}}(%rip) +; X64-NEXT: ldmxcsr {{\.LCPI.*}}+{{.*}}(%rip) +; X64-NEXT: retq +entry: + call void @llvm.reset.fpenv() + ret void +} +