diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -774,6 +774,12 @@ // Store FP control world into i16 memory. FNSTCW16m, + // Store x87 FPU environment into memory. + FNSTENV, + + // Load x87 FPU environment from memory. + FLDENV, + /// This instruction implements FP_TO_SINT with the /// integer destination in memory and a FP reg source. This corresponds /// to the X86::FIST*m instructions and the rounding mode change stuff. It diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -30240,6 +30240,8 @@ NODE_NAME_CASE(EH_RETURN) NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(FNSTCW16m) + NODE_NAME_CASE(FNSTENV) + NODE_NAME_CASE(FLDENV) NODE_NAME_CASE(LCMPXCHG_DAG) NODE_NAME_CASE(LCMPXCHG8_DAG) NODE_NAME_CASE(LCMPXCHG16_DAG) diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -23,7 +23,7 @@ def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; -def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; @@ -35,9 +35,15 @@ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; -def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore, +def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86FPEnv, [SDNPHasChain, SDNPMayStore, SDNPSideEffect, SDNPMemOperand]>; +def X86fpenv_get : SDNode<"X86ISD::FNSTENV", SDTX86FPEnv, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect, + SDNPMemOperand]>; +def X86fpenv_set : SDNode<"X86ISD::FLDENV", SDTX86FPEnv, + [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, + SDNPMemOperand]>; def X86fstf32 : PatFrag<(ops node:$val, node:$ptr), (X86fst node:$val, node:$ptr), [{ @@ -392,13 +398,15 @@ let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW, FPCW], mayLoad = 1 in { -def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src">; -def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst">; +def FLDENVm : I<0xD9, MRM4m, (outs), (ins f32mem:$src), "fldenv\t$src", + [(X86fpenv_set addr:$src)]>; +def FRSTORm : I<0xDD, MRM4m, (outs), (ins f32mem:$dst), "frstor\t$dst", []>; } let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in { -def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst">; -def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst">; +def FSTENVm : I<0xD9, MRM6m, (outs), (ins f32mem:$dst), "fnstenv\t$dst", + [(X86fpenv_get addr:$dst)]>; +def FSAVEm : I<0xDD, MRM6m, (outs), (ins f32mem:$dst), "fnsave\t$dst", []>; } let Uses = [FPSW], mayStore = 1 in diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.h b/llvm/lib/Target/X86/X86SelectionDAGInfo.h --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.h +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.h @@ -37,6 +37,17 @@ bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; + + SDValue emitTargetCodeForGetFPEnv(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Addr, + MachinePointerInfo PtrInfo) const override; + + SDValue emitTargetCodeForSetFPEnv(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain, SDValue Addr, + MachinePointerInfo PtrInfo) const override; + + SDValue emitTargetCodeForResetFPEnv(SelectionDAG &DAG, const SDLoc &DL, + SDValue Chain) const override; }; } diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -19,6 +19,8 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IntrinsicsX86.h" +#include "llvm/IR/Module.h" using namespace llvm; @@ -316,3 +318,93 @@ return SDValue(); } + +// Size of X87 environment. +const unsigned X87StateSize = 28; + +SDValue X86SelectionDAGInfo::emitTargetCodeForGetFPEnv( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Addr, + MachinePointerInfo PtrInfo) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Get X87 state. + MachineMemOperand *X87EnvMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, X87StateSize, Align(4)); + Chain = + DAG.getMemIntrinsicNode(X86ISD::FNSTENV, DL, DAG.getVTList(MVT::Other), + {Chain, Addr}, MVT::i32, X87EnvMMO); + // FNSTENV changes the exception mask, so load back the stored environment. + Chain = DAG.getMemIntrinsicNode(X86ISD::FLDENV, DL, DAG.getVTList(MVT::Other), + {Chain, Addr}, MVT::i32, X87EnvMMO); + + // If target supports SSE, get MXCSR as well. + if (DAG.getSubtarget().hasFeature(X86::FeatureSSE1)) { + SDValue MXCSRAddr = + DAG.getNode(ISD::ADD, DL, Addr.getValueType(), Addr, + DAG.getConstant(X87StateSize, DL, Addr.getValueType())); + // Store MXCSR into memory. + Chain = DAG.getNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, + DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32), + MXCSRAddr); + } + + return Chain; +} + +SDValue X86SelectionDAGInfo::emitTargetCodeForSetFPEnv( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Addr, + MachinePointerInfo PtrInfo) const { + MachineFunction &MF = DAG.getMachineFunction(); + + // Get X87 state. + SDValue Ops[] = {Chain, Addr}; + MachineMemOperand *X87EnvMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, X87StateSize, Align(4)); + Chain = DAG.getMemIntrinsicNode(X86ISD::FLDENV, DL, DAG.getVTList(MVT::Other), + Ops, MVT::i32, X87EnvMMO); + + // If target supports SSE, set MXCSR as well. + if (DAG.getSubtarget().hasFeature(X86::FeatureSSE1)) { + // Get pointer to the MXCSR location in memory. + auto PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits()); + SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Addr, + DAG.getConstant(X87StateSize, DL, PtrVT)); + // Load MXCSR from memory. + Chain = DAG.getNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, + DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32), + MXCSRAddr); + } + + return Chain; +} + +SDValue X86SelectionDAGInfo::emitTargetCodeForResetFPEnv(SelectionDAG &DAG, + const SDLoc &DL, + SDValue Chain) const { + const auto &Subtarget = static_cast(DAG.getSubtarget()); + IntegerType *ItemTy = Type::getInt32Ty(*DAG.getContext()); + ArrayType *FPEnvTy = ArrayType::get(ItemTy, 8); + SmallVector FPEnvVals; + + // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to + // nearest. FPU precision is set to 53 bits on Windows and 64 bits otherwise + // for compatibility with glibc. + unsigned X87CW = Subtarget.isTargetWindowsMSVC() ? 0x27F : 0x37F; + FPEnvVals.push_back(ConstantInt::get(ItemTy, X87CW)); + Constant *Zero = ConstantInt::get(ItemTy, 0); + for (unsigned I = 0; I < 6; ++I) + FPEnvVals.push_back(Zero); + + // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear + // all exceptions, sets DAZ and FTZ to 0. + FPEnvVals.push_back(ConstantInt::get(ItemTy, 0x1F80)); + Constant *FPEnvBits = ConstantArray::get(FPEnvTy, FPEnvVals); + MVT PVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue CP = DAG.getConstantPool(FPEnvBits, PVT); + MachinePointerInfo MPI = + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); + + return emitTargetCodeForSetFPEnv(DAG, DL, Chain, CP, MPI); +} diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fpenv.ll @@ -0,0 +1,78 @@ +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=-sse -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,X86-NOSSE +; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+sse -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,X86-SSE +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,X64 + + +define void @func_01(i8* %fpenv) { +entry: + call void @llvm.get.fpenv(i8* %fpenv) + ret void +} +; CHECK-LABEL: func_01: + +; X86-NOSSE: fnstenv (%eax) +; X86-NOSSE-NEXT: fldenv (%eax) +; X86-NOSSE-NEXT: retl + +; X86-SSE: fnstenv (%eax) +; X86-SSE-NEXT: fldenv (%eax) +; X86-SSE-NEXT: stmxcsr 28(%eax) +; X86-SSE-NEXT: retl + +; X64: fnstenv (%rdi) +; X64-NEXT: fldenv (%rdi) +; X64-NEXT: stmxcsr 28(%rdi) +; X64-NEXT: retq + + +define void @func_02(i8* %fpenv) { +entry: + call void @llvm.set.fpenv(i8* %fpenv) + ret void +} +; CHECK-LABEL: func_02: + +; X86-NOSSE: fldenv (%eax) +; X86-NOSSE-NEXT: retl + +; X86-SSE: fldenv (%eax) +; X86-SSE-NEXT: ldmxcsr 28(%eax) +; X86-SSE-NEXT: retl + +; X64: fldenv (%rdi) +; X64-NEXT: ldmxcsr 28(%rdi) +; X64-NEXT: retq + + +define void @func_03() { +entry: + call void @llvm.reset.fpenv() + ret void +} +; CHECK: .LCPI{{.*}}: +; CHECK: .long 895 # 0x37f +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 0 # 0x0 +; CHECK: .long 8064 # 0x1f80 + +; CHECK-LABEL: func_03: + +; X86-NOSSE: fldenv .LCPI +; X86-NOSSE-NEXT: retl + +; X86-SSE: fldenv .LCPI +; X86-SSE-NEXT: ldmxcsr .LCPI{{.*}}+28 +; X86-SSE-NEXT: retl + +; X64: fldenv .LCPI{{.*}} +; X64-NEXT: ldmxcsr .LCPI{{.*}}+28 +; X64-NEXT: retq + + +declare void @llvm.get.fpenv(i8* %fpenv) +declare void @llvm.set.fpenv(i8* %fpenv) +declare void @llvm.reset.fpenv()