diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -537,6 +537,8 @@ SDValue visitFP_TO_BF16(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); + SDValue visitGET_FPENV_MEM(SDNode *N); + SDValue visitSET_FPENV_MEM(SDNode *N); template SDValue visitFADDForFMACombine(SDNode *N); @@ -1993,6 +1995,8 @@ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); case ISD::FREEZE: return visitFREEZE(N); + case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N); + case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -25565,6 +25569,96 @@ return SDValue(); } +SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + + // Check if the memory, where FP state is written to, is used only in a single + // load operation. + LoadSDNode *LdNode = nullptr; + for (auto *U : Ptr->uses()) { + if (U == N) + continue; + if (auto *Ld = dyn_cast(U)) { + if (LdNode && LdNode != Ld) + return SDValue(); + LdNode = Ld; + continue; + } + return SDValue(); + } + if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() || + !LdNode->getOffset().isUndef() || + !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0))) + return SDValue(); + + // Check if the loaded value is used only in a store operation. + StoreSDNode *StNode = nullptr; + for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) { + SDUse &U = I.getUse(); + if (U.getResNo() == 0) { + if (auto *St = dyn_cast(U.getUser())) { + if (StNode) + return SDValue(); + StNode = St; + } else { + return SDValue(); + } + } + } + if (!StNode || !StNode->isSimple() || StNode->isIndexed() || + !StNode->getOffset().isUndef() || + StNode->getMemoryVT() != LdNode->getMemoryVT() || + !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1))) + return SDValue(); + + // Create new node GET_FPENV_MEM, which uses the store address to write FP + // environment. + SDValue Res = DAG.getNode(ISD::GET_FPENV_MEM, SDLoc(N), MVT::Other, Chain, + StNode->getBasePtr()); + CombineTo(StNode, Res, false); + return Res; +} + +SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + + // Check if the address of FP state is used only in a store operation. + StoreSDNode *StNode = nullptr; + for (auto *U : Ptr->uses()) { + if (U == N) + continue; + if (auto *St = dyn_cast(U)) { + if (StNode && StNode != St) + return SDValue(); + StNode = St; + continue; + } + return SDValue(); + } + if (!StNode || !StNode->isSimple() || StNode->isIndexed() || + !StNode->getOffset().isUndef() || + !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0))) + return SDValue(); + + // Check if the stored value is loaded from some location and the loaded + // value is used only in the store operation. + SDValue StValue = StNode->getValue(); + auto *LdNode = dyn_cast(StValue); + if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() || + !LdNode->getOffset().isUndef() || + !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1))) + return SDValue(); + + // Create new node SET_FPENV_MEM, which uses the load address to read FP + // environment. + SDValue Res = DAG.getNode(ISD::SET_FPENV_MEM, SDLoc(N), MVT::Other, + LdNode->getChain(), LdNode->getBasePtr()); + CombineTo(StNode, Res, false); + return Res; +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll --- a/llvm/test/CodeGen/X86/fpenv.ll +++ b/llvm/test/CodeGen/X86/fpenv.ll @@ -249,97 +249,27 @@ define void @fpenv_01(ptr %ptr) #0 { ; X86-NOSSE-LABEL: fpenv_01: ; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: pushl %ebp -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: pushl %edi -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: subl $60, %esp -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: subl $44, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: calll fegetenv -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, 24(%esi) -; X86-NOSSE-NEXT: movl %eax, 28(%esi) -; X86-NOSSE-NEXT: movl %ebp, 16(%esi) -; X86-NOSSE-NEXT: movl %ebx, 20(%esi) -; X86-NOSSE-NEXT: movl %edi, 8(%esi) -; X86-NOSSE-NEXT: movl %edx, 12(%esi) -; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NOSSE-NEXT: movl %eax, (%esi) -; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NOSSE-NEXT: movl %eax, 4(%esi) -; X86-NOSSE-NEXT: addl $60, %esp -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx -; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: addl $44, %esp ; X86-NOSSE-NEXT: retl ; ; X86-SSE-LABEL: fpenv_01: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi -; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $60, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll fegetenv -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl %ecx, 24(%esi) -; X86-SSE-NEXT: movl %eax, 28(%esi) -; X86-SSE-NEXT: movl %ebp, 16(%esi) -; X86-SSE-NEXT: movl %ebx, 20(%esi) -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 12(%esi) -; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SSE-NEXT: movl %eax, (%esi) -; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SSE-NEXT: movl %eax, 4(%esi) -; X86-SSE-NEXT: addl $60, %esp -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: addl $44, %esp ; X86-SSE-NEXT: retl ; ; X64-LABEL: fpenv_01: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $32, %rsp -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: subq $40, %rsp ; X64-NEXT: callq fegetenv@PLT -; X64-NEXT: movq (%rsp), %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: movq %rsi, 16(%rbx) -; X64-NEXT: movq %rdx, 24(%rbx) -; X64-NEXT: movq %rax, (%rbx) -; X64-NEXT: movq %rcx, 8(%rbx) -; X64-NEXT: addq $32, %rsp -; X64-NEXT: popq %rbx +; X64-NEXT: addq $40, %rsp ; X64-NEXT: retq entry: %env = call i256 @llvm.get.fpenv.i256() @@ -350,88 +280,25 @@ define void @fpenv_02(ptr %ptr) #0 { ; X86-NOSSE-LABEL: fpenv_02: ; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: pushl %ebp -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: pushl %edi -; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: subl $76, %esp ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl (%eax), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOSSE-NEXT: movl 4(%eax), %edx -; X86-NOSSE-NEXT: movl 12(%eax), %esi -; X86-NOSSE-NEXT: movl 8(%eax), %edi -; X86-NOSSE-NEXT: movl 20(%eax), %ebx -; X86-NOSSE-NEXT: movl 16(%eax), %ebp -; X86-NOSSE-NEXT: movl 28(%eax), %ecx -; X86-NOSSE-NEXT: movl 24(%eax), %eax -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: calll fesetenv ; X86-NOSSE-NEXT: addl $76, %esp -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx -; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; ; X86-SSE-LABEL: fpenv_02: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi -; X86-SSE-NEXT: pushl %esi ; X86-SSE-NEXT: subl $76, %esp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl (%eax), %ecx -; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movl 4(%eax), %edx -; X86-SSE-NEXT: movl 12(%eax), %esi -; X86-SSE-NEXT: movl 8(%eax), %edi -; X86-SSE-NEXT: movl 20(%eax), %ebx -; X86-SSE-NEXT: movl 16(%eax), %ebp -; X86-SSE-NEXT: movl 28(%eax), %ecx -; X86-SSE-NEXT: movl 24(%eax), %eax -; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll fesetenv ; X86-SSE-NEXT: addl $76, %esp -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp ; X86-SSE-NEXT: retl ; ; X64-LABEL: fpenv_02: ; X64: # %bb.0: # %entry ; X64-NEXT: subq $72, %rsp -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 24(%rdi), %rdx -; X64-NEXT: movq 16(%rdi), %rsi -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: callq fesetenv@PLT ; X64-NEXT: addq $72, %rsp ; X64-NEXT: retq