diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -833,6 +833,12 @@ // Load FP control word from i16 memory. FLDCW16m, + // Store x87 FPU environment into memory. + FNSTENVm, + + // Load x87 FPU environment from memory. + FLDENVm, + /// This instruction implements FP_TO_SINT with the /// integer destination in memory and a FP reg source. This corresponds /// to the X86::FIST*m instructions and the rounding mode change stuff. It @@ -1663,6 +1669,9 @@ SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG, SDValue &Chain) const; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -396,6 +396,9 @@ if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) { setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom); setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom); + setOperationAction(ISD::GET_FPENV_MEM , MVT::Other, Custom); + setOperationAction(ISD::SET_FPENV_MEM , MVT::Other, Custom); + setOperationAction(ISD::RESET_FPENV , MVT::Other, Custom); } // Promote the i8 variants and force them on up to i32 which has a shorter @@ -30068,6 +30071,122 @@ return Chain; } +const unsigned X87StateSize = 28; +const unsigned FPStateSize = 32; +const unsigned FPStateSizeInBits = FPStateSize * 8; + +SDValue X86TargetLowering::LowerGET_FPENV_MEM(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + SDValue Ptr = Op->getOperand(1); + auto *Node = cast(Op); + EVT MemVT = Node->getMemoryVT(); + assert(MemVT.getSizeInBits() == FPStateSizeInBits); + MachineMemOperand *MMO = cast(Op)->getMemOperand(); + + // Get x87 state, if it presents. + if (Subtarget.hasX87()) { + Chain = + DAG.getMemIntrinsicNode(X86ISD::FNSTENVm, DL, DAG.getVTList(MVT::Other), + {Chain, Ptr}, MemVT, MMO); + + // FNSTENV changes the exception mask, so load back the stored environment. + MachineMemOperand::Flags NewFlags = + MachineMemOperand::MOLoad | + (MMO->getFlags() & ~MachineMemOperand::MOStore); + MMO = MF.getMachineMemOperand(MMO, NewFlags); + Chain = + DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other), + {Chain, Ptr}, MemVT, MMO); + } + + // If target supports SSE, get MXCSR as well. + if (Subtarget.hasSSE1()) { + // Get pointer to the MXCSR location in memory. + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, + DAG.getConstant(X87StateSize, DL, PtrVT)); + // Store MXCSR into memory. + Chain = DAG.getNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, + DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32), + MXCSRAddr); + } + + return Chain; +} + +static SDValue createSetFPEnvNodes(SDValue Ptr, SDValue Chain, SDLoc DL, + EVT MemVT, MachineMemOperand *MMO, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + // Set x87 state, if it presents. + if (Subtarget.hasX87()) + Chain = + DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other), + {Chain, Ptr}, MemVT, MMO); + // If target supports SSE, set MXCSR as well. + if (Subtarget.hasSSE1()) { + // Get pointer to the MXCSR location in memory. + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, + DAG.getConstant(X87StateSize, DL, PtrVT)); + // Load MXCSR from memory. + Chain = DAG.getNode( + ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, + DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32), + MXCSRAddr); + } + return Chain; +} + +SDValue X86TargetLowering::LowerSET_FPENV_MEM(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Chain = Op->getOperand(0); + SDValue Ptr = Op->getOperand(1); + auto *Node = cast(Op); + EVT MemVT = Node->getMemoryVT(); + assert(MemVT.getSizeInBits() == FPStateSizeInBits); + MachineMemOperand *MMO = cast(Op)->getMemOperand(); + return createSetFPEnvNodes(Ptr, Chain, DL, MemVT, MMO, DAG, Subtarget); +} + +SDValue X86TargetLowering::LowerRESET_FPENV(SDValue Op, + SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + SDLoc DL(Op); + SDValue Chain = Op.getNode()->getOperand(0); + + IntegerType *ItemTy = Type::getInt32Ty(*DAG.getContext()); + ArrayType *FPEnvTy = ArrayType::get(ItemTy, 8); + SmallVector FPEnvVals; + + // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to + // nearest. FPU precision is set to 53 bits on Windows and 64 bits otherwise + // for compatibility with glibc. + unsigned X87CW = Subtarget.isTargetWindowsMSVC() ? 0x27F : 0x37F; + FPEnvVals.push_back(ConstantInt::get(ItemTy, X87CW)); + Constant *Zero = ConstantInt::get(ItemTy, 0); + for (unsigned I = 0; I < 6; ++I) + FPEnvVals.push_back(Zero); + + // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear + // all exceptions, sets DAZ and FTZ to 0. + FPEnvVals.push_back(ConstantInt::get(ItemTy, 0x1F80)); + Constant *FPEnvBits = ConstantArray::get(FPEnvTy, FPEnvVals); + MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue Env = DAG.getConstantPool(FPEnvBits, PtrVT); + MachinePointerInfo MPI = + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MPI, MachineMemOperand::MOStore, X87StateSize, Align(4)); + + return createSetFPEnvNodes(Env, Chain, DL, MVT::i32, MMO, DAG, Subtarget); +} + /// Lower a vector CTLZ using native supported vector CTLZ instruction. // // i8/i16 vector implemented using dword LZCNT vector instruction @@ -34320,6 +34439,9 @@ case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG); + case ISD::GET_FPENV_MEM: return LowerGET_FPENV_MEM(Op, DAG); + case ISD::SET_FPENV_MEM: return LowerSET_FPENV_MEM(Op, DAG); + case ISD::RESET_FPENV: return LowerRESET_FPENV(Op, DAG); case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG); case ISD::CTTZ: @@ -35562,6 +35684,8 @@ NODE_NAME_CASE(TC_RETURN) NODE_NAME_CASE(FNSTCW16m) NODE_NAME_CASE(FLDCW16m) + NODE_NAME_CASE(FNSTENVm) + NODE_NAME_CASE(FLDENVm) NODE_NAME_CASE(LCMPXCHG_DAG) NODE_NAME_CASE(LCMPXCHG8_DAG) NODE_NAME_CASE(LCMPXCHG16_DAG) diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -25,6 +25,7 @@ def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>; def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp, @@ -49,6 +50,12 @@ def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad, [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, SDNPMemOperand]>; +def X86fpenv_get : SDNode<"X86ISD::FNSTENVm", SDTX86FPEnv, + [SDNPHasChain, SDNPMayStore, SDNPSideEffect, + SDNPMemOperand]>; +def X86fpenv_set : SDNode<"X86ISD::FLDENVm", SDTX86FPEnv, + [SDNPHasChain, SDNPMayLoad, SDNPSideEffect, + SDNPMemOperand]>; def X86fstf32 : PatFrag<(ops node:$val, node:$ptr), (X86fst node:$val, node:$ptr), [{ @@ -418,13 +425,17 @@ let SchedRW = [WriteMicrocoded] in { let Defs = [FPSW, FPCW], mayLoad = 1 in { -def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src">; def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins anymem:$src), "frstor\t$src">; +let Predicates = [HasX87] in +def FLDENVm : I<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src", + [(X86fpenv_set addr:$src)]>; } let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in { -def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst">; def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins anymem:$dst), "fnsave\t$dst">; +let Predicates = [HasX87] in +def FSTENVm : I<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst", + [(X86fpenv_get addr:$dst)]>; } let Uses = [FPSW], mayStore = 1 in diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll --- a/llvm/test/CodeGen/X86/fpenv.ll +++ b/llvm/test/CodeGen/X86/fpenv.ll @@ -280,27 +280,28 @@ define void @get_fpenv_01_native(ptr %ptr) nounwind { ; X86-NOSSE-LABEL: get_fpenv_01_native: ; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: subl $44, %esp +; X86-NOSSE-NEXT: subl $36, %esp ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: calll fegetenv -; X86-NOSSE-NEXT: addl $44, %esp +; X86-NOSSE-NEXT: fnstenv (%eax) +; X86-NOSSE-NEXT: fldenv (%eax) +; X86-NOSSE-NEXT: addl $36, %esp ; X86-NOSSE-NEXT: retl ; ; X86-SSE-LABEL: get_fpenv_01_native: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: subl $36, %esp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll fegetenv -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: fnstenv (%eax) +; X86-SSE-NEXT: fldenv (%eax) +; X86-SSE-NEXT: stmxcsr 28(%eax) +; X86-SSE-NEXT: addl $36, %esp ; X86-SSE-NEXT: retl ; ; X64-LABEL: get_fpenv_01_native: ; X64: # %bb.0: # %entry -; X64-NEXT: subq $40, %rsp -; X64-NEXT: callq fegetenv@PLT -; X64-NEXT: addq $40, %rsp +; X64-NEXT: fnstenv (%rdi) +; X64-NEXT: fldenv (%rdi) +; X64-NEXT: stmxcsr 28(%rdi) ; X64-NEXT: retq entry: %env = call i256 @llvm.get.fpenv.i256() @@ -342,27 +343,25 @@ define void @set_fpenv_01_native(ptr %ptr) nounwind { ; X86-NOSSE-LABEL: set_fpenv_01_native: ; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: subl $44, %esp +; X86-NOSSE-NEXT: subl $36, %esp ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl %eax, (%esp) -; X86-NOSSE-NEXT: calll fesetenv -; X86-NOSSE-NEXT: addl $44, %esp +; X86-NOSSE-NEXT: fldenv (%eax) +; X86-NOSSE-NEXT: addl $36, %esp ; X86-NOSSE-NEXT: retl ; ; X86-SSE-LABEL: set_fpenv_01_native: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: subl $36, %esp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, (%esp) -; X86-SSE-NEXT: calll fesetenv -; X86-SSE-NEXT: addl $44, %esp +; X86-SSE-NEXT: fldenv (%eax) +; X86-SSE-NEXT: ldmxcsr 28(%eax) +; X86-SSE-NEXT: addl $36, %esp ; X86-SSE-NEXT: retl ; ; X64-LABEL: set_fpenv_01_native: ; X64: # %bb.0: # %entry -; X64-NEXT: subq $40, %rsp -; X64-NEXT: callq fesetenv@PLT -; X64-NEXT: addq $40, %rsp +; X64-NEXT: fldenv (%rdi) +; X64-NEXT: ldmxcsr 28(%rdi) ; X64-NEXT: retq entry: %env = load i256, ptr %ptr @@ -402,26 +401,19 @@ define void @reset_fpenv_01_native() nounwind { ; X86-NOSSE-LABEL: reset_fpenv_01_native: ; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: subl $12, %esp -; X86-NOSSE-NEXT: movl $-1, (%esp) -; X86-NOSSE-NEXT: calll fesetenv -; X86-NOSSE-NEXT: addl $12, %esp +; X86-NOSSE-NEXT: fldenv {{\.?LCPI[0-9]+_[0-9]+}} ; X86-NOSSE-NEXT: retl ; ; X86-SSE-LABEL: reset_fpenv_01_native: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: subl $12, %esp -; X86-SSE-NEXT: movl $-1, (%esp) -; X86-SSE-NEXT: calll fesetenv -; X86-SSE-NEXT: addl $12, %esp +; X86-SSE-NEXT: fldenv {{\.?LCPI[0-9]+_[0-9]+}} +; X86-SSE-NEXT: ldmxcsr {{\.?LCPI[0-9]+_[0-9]+}}+28 ; X86-SSE-NEXT: retl ; ; X64-LABEL: reset_fpenv_01_native: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rax -; X64-NEXT: movq $-1, %rdi -; X64-NEXT: callq fesetenv@PLT -; X64-NEXT: popq %rax +; X64-NEXT: fldenv {{\.?LCPI[0-9]+_[0-9]+}}(%rip) +; X64-NEXT: ldmxcsr {{\.?LCPI[0-9]+_[0-9]+}}+28(%rip) ; X64-NEXT: retq entry: call void @llvm.reset.fpenv()