diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -539,6 +539,8 @@ SDValue visitFP_TO_BF16(SDNode *N); SDValue visitVECREDUCE(SDNode *N); SDValue visitVPOp(SDNode *N); + SDValue visitGET_FPENV_MEM(SDNode *N); + SDValue visitSET_FPENV_MEM(SDNode *N); template SDValue visitFADDForFMACombine(SDNode *N); @@ -2001,6 +2003,8 @@ case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); case ISD::FP_TO_BF16: return visitFP_TO_BF16(N); case ISD::FREEZE: return visitFREEZE(N); + case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N); + case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -25714,6 +25718,97 @@ return SDValue(); } +SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + EVT MemVT = cast(N)->getMemoryVT(); + + // Check if the memory, where FP state is written to, is used only in a single + // load operation. + LoadSDNode *LdNode = nullptr; + for (auto *U : Ptr->uses()) { + if (U == N) + continue; + if (auto *Ld = dyn_cast(U)) { + if (LdNode && LdNode != Ld) + return SDValue(); + LdNode = Ld; + continue; + } + return SDValue(); + } + if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() || + !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT || + !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0))) + return SDValue(); + + // Check if the loaded value is used only in a store operation. + StoreSDNode *StNode = nullptr; + for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) { + SDUse &U = I.getUse(); + if (U.getResNo() == 0) { + if (auto *St = dyn_cast(U.getUser())) { + if (StNode) + return SDValue(); + StNode = St; + } else { + return SDValue(); + } + } + } + if (!StNode || !StNode->isSimple() || StNode->isIndexed() || + !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT || + !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1))) + return SDValue(); + + // Create new node GET_FPENV_MEM, which uses the store address to write FP + // environment. + SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT, + StNode->getMemOperand()); + CombineTo(StNode, Res, false); + return Res; +} + +SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + EVT MemVT = cast(N)->getMemoryVT(); + + // Check if the address of FP state is used also in a store operation only. + StoreSDNode *StNode = nullptr; + for (auto *U : Ptr->uses()) { + if (U == N) + continue; + if (auto *St = dyn_cast(U)) { + if (StNode && StNode != St) + return SDValue(); + StNode = St; + continue; + } + return SDValue(); + } + if (!StNode || !StNode->isSimple() || StNode->isIndexed() || + !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT || + !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0))) + return SDValue(); + + // Check if the stored value is loaded from some location and the loaded + // value is used only in the store operation. + SDValue StValue = StNode->getValue(); + auto *LdNode = dyn_cast(StValue); + if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() || + !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT || + !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1))) + return SDValue(); + + // Create new node SET_FPENV_MEM, which uses the load address to read FP + // environment. + SDValue Res = + DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT, + LdNode->getMemOperand()); + return Res; +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> diff --git a/llvm/test/CodeGen/X86/fpenv-combine.ll b/llvm/test/CodeGen/X86/fpenv-combine.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fpenv-combine.ll @@ -0,0 +1,200 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s -check-prefix=X64 + +declare i256 @llvm.get.fpenv.i256() +declare void @llvm.set.fpenv.i256(i256 %fpenv) +declare void @llvm.reset.fpenv() + +; Cannot fold get_fpenv+load+store because loaded value is used in +; more than one instruction. +define void @get_fpenv_02(ptr %ptr1, ptr %ptr2) #0 { +; X64-LABEL: get_fpenv_02: +; X64: # %bb.0: +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $40, %rsp +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq fegetenv@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: movq %rsi, 24(%r14) +; X64-NEXT: movq %rcx, (%r14) +; X64-NEXT: movq %rdx, 8(%r14) +; X64-NEXT: movq %rax, 16(%r14) +; X64-NEXT: movq %rax, 16(%rbx) +; X64-NEXT: movq %rsi, 24(%rbx) +; X64-NEXT: movq %rcx, (%rbx) +; X64-NEXT: movq %rdx, 8(%rbx) +; X64-NEXT: addq $40, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r14 +; X64-NEXT: retq + %fpenv = call i256 @llvm.get.fpenv.i256() + store i256 %fpenv, ptr %ptr1 + store i256 %fpenv, ptr %ptr2 + ret void +} + +; Cannot fold get_fpenv+load+store because load and store have different type. +define void @get_fpenv_03(ptr %ptr) #0 { +; X64-LABEL: get_fpenv_03: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $32, %rsp +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: callq fegetenv@PLT +; X64-NEXT: movl (%rsp), %eax +; X64-NEXT: movl %eax, (%rbx) +; X64-NEXT: addq $32, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: retq + %fpenv = call i256 @llvm.get.fpenv.i256() + %part = trunc i256 %fpenv to i32 + store i32 %part, ptr %ptr + ret void +} + +; Cannot fold get_fpenv+load+store because loaded value is not +; immediately stored. +define void @get_fpenv_04(ptr %ptr) #0 { +; X64-LABEL: get_fpenv_04: +; X64: # %bb.0: +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $32, %rsp +; X64-NEXT: movq %rdi, %rbx +; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: callq fegetenv@PLT +; X64-NEXT: movq (%rsp), %rax +; X64-NEXT: andl $1, %eax +; X64-NEXT: movq %rax, (%rbx) +; X64-NEXT: movq $0, 16(%rbx) +; X64-NEXT: movq $0, 24(%rbx) +; X64-NEXT: movq $0, 8(%rbx) +; X64-NEXT: addq $32, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: retq + %fpenv = call i256 @llvm.get.fpenv.i256() + %masked = and i256 %fpenv, 1 + store i256 %masked, ptr %ptr + ret void +} + +; Cannot fold get_fpenv+load+store because there is a memory operation +; between load and store. +define void @get_fpenv_05(ptr %ptr1, ptr %ptr2) #0 { +; X64-LABEL: get_fpenv_05: +; X64: # %bb.0: +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $40, %rsp +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq %rdi, %r14 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq fegetenv@PLT +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx +; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi +; X64-NEXT: movl $0, (%r14) +; X64-NEXT: movq %rsi, 24(%rbx) +; X64-NEXT: movq %rdx, 16(%rbx) +; X64-NEXT: movq %rcx, 8(%rbx) +; X64-NEXT: movq %rax, (%rbx) +; X64-NEXT: addq $40, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r14 +; X64-NEXT: retq + %fpenv = call i256 @llvm.get.fpenv.i256() + store i32 0, ptr %ptr1 + store i256 %fpenv, ptr %ptr2 + ret void +} + +; Cannot fold load+save+set_fpenv because there is a memory operation +; between load and store. +define void @set_fpenv_02(ptr %ptr1, ptr %ptr2) #0 { +; X64-LABEL: set_fpenv_02: +; X64: # %bb.0: +; X64-NEXT: subq $40, %rsp +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: movq 8(%rdi), %rcx +; X64-NEXT: movq 16(%rdi), %rdx +; X64-NEXT: movq 24(%rdi), %rdi +; X64-NEXT: movl $0, (%rsi) +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq fesetenv@PLT +; X64-NEXT: addq $40, %rsp +; X64-NEXT: retq + %fpenv = load i256, ptr %ptr1 + store i32 0, ptr %ptr2 + call void @llvm.set.fpenv.i256(i256 %fpenv) + ret void +} + +; Cannot fold load+save+set_fpenv because loaded value is used in +; more then one store. +define void @set_fpenv_03(ptr %ptr1, ptr %ptr2) #0 { +; X64-LABEL: set_fpenv_03: +; X64: # %bb.0: +; X64-NEXT: pushq %r15 +; X64-NEXT: pushq %r14 +; X64-NEXT: pushq %r13 +; X64-NEXT: pushq %r12 +; X64-NEXT: pushq %rbx +; X64-NEXT: subq $32, %rsp +; X64-NEXT: movq %rsi, %rbx +; X64-NEXT: movq (%rdi), %r14 +; X64-NEXT: movq 8(%rdi), %r15 +; X64-NEXT: movq 16(%rdi), %r12 +; X64-NEXT: movq 24(%rdi), %r13 +; X64-NEXT: callq fesetenv@PLT +; X64-NEXT: movq %r13, 24(%rbx) +; X64-NEXT: movq %r12, 16(%rbx) +; X64-NEXT: movq %r15, 8(%rbx) +; X64-NEXT: movq %r14, (%rbx) +; X64-NEXT: addq $32, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %r12 +; X64-NEXT: popq %r13 +; X64-NEXT: popq %r14 +; X64-NEXT: popq %r15 +; X64-NEXT: retq + %fpenv = load i256, ptr %ptr1 + call void @llvm.set.fpenv.i256(i256 %fpenv) + store i256 %fpenv, ptr %ptr2 + ret void +} + +; Cannot fold load+save+set_fpenv because loaded value is not +; immediately stored. +define void @set_fpenv_04(ptr %ptr) #0 { +; X64-LABEL: set_fpenv_04: +; X64: # %bb.0: +; X64-NEXT: subq $40, %rsp +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: andl $1, %eax +; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: movq $0, {{[0-9]+}}(%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq fesetenv@PLT +; X64-NEXT: addq $40, %rsp +; X64-NEXT: retq + %fpenv = load i256, ptr %ptr + %masked = and i256 %fpenv, 1 + call void @llvm.set.fpenv.i256(i256 %masked) + ret void +} + + +attributes #0 = { nounwind "use-soft-float"="true" } diff --git a/llvm/test/CodeGen/X86/fpenv.ll b/llvm/test/CodeGen/X86/fpenv.ll --- a/llvm/test/CodeGen/X86/fpenv.ll +++ b/llvm/test/CodeGen/X86/fpenv.ll @@ -249,97 +249,27 @@ define void @get_fpenv_01(ptr %ptr) #0 { ; X86-NOSSE-LABEL: get_fpenv_01: ; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: pushl %ebp -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: pushl %edi -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: subl $60, %esp -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: subl $44, %esp +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: calll fegetenv -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NOSSE-NEXT: movl %ecx, 24(%esi) -; X86-NOSSE-NEXT: movl %eax, 28(%esi) -; X86-NOSSE-NEXT: movl %ebp, 16(%esi) -; X86-NOSSE-NEXT: movl %ebx, 20(%esi) -; X86-NOSSE-NEXT: movl %edi, 8(%esi) -; X86-NOSSE-NEXT: movl %edx, 12(%esi) -; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NOSSE-NEXT: movl %eax, (%esi) -; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NOSSE-NEXT: movl %eax, 4(%esi) -; X86-NOSSE-NEXT: addl $60, %esp -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx -; X86-NOSSE-NEXT: popl %ebp +; X86-NOSSE-NEXT: addl $44, %esp ; X86-NOSSE-NEXT: retl ; ; X86-SSE-LABEL: get_fpenv_01: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi -; X86-SSE-NEXT: pushl %esi -; X86-SSE-NEXT: subl $60, %esp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: subl $44, %esp +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll fegetenv -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SSE-NEXT: movl %ecx, 24(%esi) -; X86-SSE-NEXT: movl %eax, 28(%esi) -; X86-SSE-NEXT: movl %ebp, 16(%esi) -; X86-SSE-NEXT: movl %ebx, 20(%esi) -; X86-SSE-NEXT: movl %edi, 8(%esi) -; X86-SSE-NEXT: movl %edx, 12(%esi) -; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SSE-NEXT: movl %eax, (%esi) -; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SSE-NEXT: movl %eax, 4(%esi) -; X86-SSE-NEXT: addl $60, %esp -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp +; X86-SSE-NEXT: addl $44, %esp ; X86-SSE-NEXT: retl ; ; X64-LABEL: get_fpenv_01: ; X64: # %bb.0: # %entry -; X64-NEXT: pushq %rbx -; X64-NEXT: subq $32, %rsp -; X64-NEXT: movq %rdi, %rbx -; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: subq $40, %rsp ; X64-NEXT: callq fegetenv@PLT -; X64-NEXT: movq (%rsp), %rax -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx -; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; X64-NEXT: movq %rsi, 16(%rbx) -; X64-NEXT: movq %rdx, 24(%rbx) -; X64-NEXT: movq %rax, (%rbx) -; X64-NEXT: movq %rcx, 8(%rbx) -; X64-NEXT: addq $32, %rsp -; X64-NEXT: popq %rbx +; X64-NEXT: addq $40, %rsp ; X64-NEXT: retq entry: %env = call i256 @llvm.get.fpenv.i256() @@ -350,88 +280,25 @@ define void @set_fpenv_01(ptr %ptr) #0 { ; X86-NOSSE-LABEL: set_fpenv_01: ; X86-NOSSE: # %bb.0: # %entry -; X86-NOSSE-NEXT: pushl %ebp -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: pushl %edi -; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: subl $44, %esp ; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOSSE-NEXT: movl (%eax), %ecx -; X86-NOSSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOSSE-NEXT: movl 4(%eax), %edx -; X86-NOSSE-NEXT: movl 12(%eax), %esi -; X86-NOSSE-NEXT: movl 8(%eax), %edi -; X86-NOSSE-NEXT: movl 20(%eax), %ebx -; X86-NOSSE-NEXT: movl 16(%eax), %ebp -; X86-NOSSE-NEXT: movl 28(%eax), %ecx -; X86-NOSSE-NEXT: movl 24(%eax), %eax -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-NOSSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: calll fesetenv ; X86-NOSSE-NEXT: addl $44, %esp -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: popl %edi -; X86-NOSSE-NEXT: popl %ebx -; X86-NOSSE-NEXT: popl %ebp ; X86-NOSSE-NEXT: retl ; ; X86-SSE-LABEL: set_fpenv_01: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: pushl %ebx -; X86-SSE-NEXT: pushl %edi -; X86-SSE-NEXT: pushl %esi ; X86-SSE-NEXT: subl $44, %esp ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movl (%eax), %ecx -; X86-SSE-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-SSE-NEXT: movl 4(%eax), %edx -; X86-SSE-NEXT: movl 12(%eax), %esi -; X86-SSE-NEXT: movl 8(%eax), %edi -; X86-SSE-NEXT: movl 20(%eax), %ebx -; X86-SSE-NEXT: movl 16(%eax), %ebp -; X86-SSE-NEXT: movl 28(%eax), %ecx -; X86-SSE-NEXT: movl 24(%eax), %eax -; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %ebp, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %ebx, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %edi, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-SSE-NEXT: movl %eax, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: movl %edx, {{[0-9]+}}(%esp) -; X86-SSE-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: movl %eax, (%esp) ; X86-SSE-NEXT: calll fesetenv ; X86-SSE-NEXT: addl $44, %esp -; X86-SSE-NEXT: popl %esi -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: popl %ebx -; X86-SSE-NEXT: popl %ebp ; X86-SSE-NEXT: retl ; ; X64-LABEL: set_fpenv_01: ; X64: # %bb.0: # %entry ; X64-NEXT: subq $40, %rsp -; X64-NEXT: movq (%rdi), %rax -; X64-NEXT: movq 8(%rdi), %rcx -; X64-NEXT: movq 24(%rdi), %rdx -; X64-NEXT: movq 16(%rdi), %rsi -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi ; X64-NEXT: callq fesetenv@PLT ; X64-NEXT: addq $40, %rsp ; X64-NEXT: retq