Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -608,16 +608,22 @@ FILD, FILD_FLAG, + /// This instruction implements a fp->int store from FP stack + /// slots. This corresponds to the fist instruction. It takes a + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. + FIST, + /// This instruction implements an extending load to FP stack slots. /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain /// operand, and ptr to load from. The memory VT specifies the type to /// load from. FLD, - /// This instruction implements a truncating store to FP stack + /// This instruction implements a truncating store from FP stack /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a - /// chain operand, value to store, and address. The memory VT specifies - /// the type to store as. + /// chain operand, value to store, address, and glue. The memory VT + /// specifies the type to store as. FST, /// This instruction grabs the address of the next argument Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -25584,17 +25584,18 @@ // Note: this turns large loads into lock cmpxchg8b/16b. // TODO: In 32-bit mode, use MOVLPS when SSE1 is available? -// TODO: In 32-bit mode, use FILD/FISTP when X87 is available? TargetLowering::AtomicExpansionKind X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { Type *MemType = LI->getType(); // If this a 64 bit atomic load on a 32-bit target and SSE2 is enabled, we - // can use movq to do the load. + // can use movq to do the load. If we have X87 we can load into an 80-bit + // X87 register and store it to a stack temporary. bool NoImplicitFloatOps = LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && - !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2()) + !Subtarget.useSoftFloat() && !NoImplicitFloatOps && + (Subtarget.hasSSE2() || Subtarget.hasX87())) return AtomicExpansionKind::None; return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg @@ -27440,23 +27441,57 @@ bool NoImplicitFloatOps = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); - if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps && - Subtarget.hasSSE2()) { + if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { auto *Node = cast(N); - // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the lower - // 64-bits. - SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); - SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; - SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, - MVT::i64, Node->getMemOperand()); - SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, - DAG.getIntPtrConstant(0, dl)); - Results.push_back(Res); - Results.push_back(Ld.getValue(1)); - return; + if (Subtarget.hasSSE2()) { + // Use a VZEXT_LOAD which will be selected as MOVQ. Then extract the + // lower 64-bits. + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; + SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, + MVT::i64, Node->getMemOperand()); + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Res); + Results.push_back(Ld.getValue(1)); + return; + } + if (Subtarget.hasX87()) { + // First load this into an 80-bit X87 register. This will put the whole + // integer into the significand. + // FIXME: Do we need to glue? See FIXME comment in BuildFILD. + SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other, MVT::Glue); + SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; + SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD_FLAG, + dl, Tys, Ops, MVT::i64, + Node->getMemOperand()); + SDValue Chain = Result.getValue(1); + SDValue InFlag = Result.getValue(2); + + // Now store the X87 register to a stack temporary and convert to i64. + // This store is not atomic and doesn't need to be. + // FIXME: We don't need a stack temporary if the result of the load + // is already being stored. We could just directly store there. + SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); + int SPFI = cast(StackPtr.getNode())->getIndex(); + MachinePointerInfo MPI = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + SDValue StoreOps[] = { Chain, Result, StackPtr, InFlag }; + Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, dl, + DAG.getVTList(MVT::Other), StoreOps, + MVT::i64, MPI, 0 /*Align*/, + MachineMemOperand::MOStore); + + // Finally load the value back from the stack temporary and return it. + // This load is not atomic and doesn't need to be. + // This load will be further type legalized. + Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI); + Results.push_back(Result); + Results.push_back(Result.getValue(1)); + return; + } } // TODO: Use MOVLPS when SSE1 is available? - // TODO: Use FILD/FISTP when X87 is available? // Delegate to generic TypeLegalization. Situations we can really handle // should have already been dealt with by AtomicExpandPass.cpp. break; @@ -27649,6 +27684,7 @@ case X86ISD::FXOR: return "X86ISD::FXOR"; case X86ISD::FILD: return "X86ISD::FILD"; case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; + case X86ISD::FIST: return "X86ISD::FIST"; case X86ISD::FP_TO_INT_IN_MEM: return "X86ISD::FP_TO_INT_IN_MEM"; case X86ISD::FLD: return "X86ISD::FLD"; case X86ISD::FST: return "X86ISD::FST"; Index: llvm/trunk/lib/Target/X86/X86InstrFPStack.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFPStack.td +++ llvm/trunk/lib/Target/X86/X86InstrFPStack.td @@ -21,6 +21,7 @@ def SDTX86Fst : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fild : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisPtrTy<1>]>; +def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>; def SDTX86Fnstsw : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; @@ -35,6 +36,9 @@ def X86fildflag : SDNode<"X86ISD::FILD_FLAG", SDTX86Fild, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad, SDNPMemOperand]>; +def X86fist : SDNode<"X86ISD::FIST", SDTX86Fist, + [SDNPHasChain, SDNPInGlue, SDNPMayStore, + SDNPMemOperand]>; def X86fp_stsw : SDNode<"X86ISD::FNSTSW16r", SDTX86Fnstsw>; def X86fp_to_mem : SDNode<"X86ISD::FP_TO_INT_IN_MEM", SDTX86Fst, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; @@ -79,6 +83,11 @@ return cast(N)->getMemoryVT() == MVT::i64; }]>; +def X86fist64 : PatFrag<(ops node:$val, node:$ptr), + (X86fist node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i64; +}]>; + def X86fp_to_i16mem : PatFrag<(ops node:$val, node:$ptr), (X86fp_to_mem node:$val, node:$ptr), [{ return cast(N)->getMemoryVT() == MVT::i16; @@ -760,6 +769,10 @@ // Used to conv. i64 to f64 since there isn't a SSE version. def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m64 addr:$src)>; +// Used to conv. between f80 and i64 for i64 atomic loads. +def : Pat<(X86fildflag64 addr:$src), (ILD_Fp64m80 addr:$src)>; +def : Pat<(X86fist64 RFP80:$src, addr:$op), (IST_Fp64m80 addr:$op, RFP80:$src)>; + // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Index: llvm/trunk/test/CodeGen/X86/atomic-fp.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic-fp.ll +++ llvm/trunk/test/CodeGen/X86/atomic-fp.ll @@ -77,14 +77,13 @@ ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $16, %esp +; X86-NOSSE-NEXT: subl $24, %esp ; X86-NOSSE-NEXT: movl 8(%ebp), %esi -; X86-NOSSE-NEXT: xorl %eax, %eax -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: xorl %ebx, %ebx -; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%esi) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) @@ -283,13 +282,12 @@ ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $24, %esp -; X86-NOSSE-NEXT: xorl %eax, %eax -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: xorl %ebx, %ebx -; X86-NOSSE-NEXT: lock cmpxchg8b glob64 -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll glob64 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) @@ -484,13 +482,12 @@ ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $24, %esp -; X86-NOSSE-NEXT: xorl %eax, %eax -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: xorl %ebx, %ebx -; X86-NOSSE-NEXT: lock cmpxchg8b -559038737 -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: subl $32, %esp +; X86-NOSSE-NEXT: fildll -559038737 +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) @@ -691,13 +688,12 @@ ; X86-NOSSE-NEXT: movl %esp, %ebp ; X86-NOSSE-NEXT: pushl %ebx ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $32, %esp -; X86-NOSSE-NEXT: xorl %eax, %eax -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: xorl %ebx, %ebx -; X86-NOSSE-NEXT: lock cmpxchg8b (%esp) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: subl $40, %esp +; X86-NOSSE-NEXT: fildll (%esp) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fld1 ; X86-NOSSE-NEXT: faddl {{[0-9]+}}(%esp) @@ -831,15 +827,14 @@ ; X86-NOSSE-NEXT: pushl %edi ; X86-NOSSE-NEXT: pushl %esi ; X86-NOSSE-NEXT: andl $-8, %esp -; X86-NOSSE-NEXT: subl $24, %esp +; X86-NOSSE-NEXT: subl $32, %esp ; X86-NOSSE-NEXT: movl 20(%ebp), %esi ; X86-NOSSE-NEXT: movl 8(%ebp), %edi -; X86-NOSSE-NEXT: xorl %eax, %eax -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: xorl %ebx, %ebx -; X86-NOSSE-NEXT: lock cmpxchg8b (%edi,%esi,8) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: fildll (%edi,%esi,8) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: faddl 12(%ebp) Index: llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll +++ llvm/trunk/test/CodeGen/X86/atomic-load-store-wide.ll @@ -45,22 +45,21 @@ ; ; NOSSE-LABEL: test2: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebx +; NOSSE-NEXT: pushl %ebp ; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: pushl %esi -; NOSSE-NEXT: .cfi_def_cfa_offset 12 -; NOSSE-NEXT: .cfi_offset %esi, -12 -; NOSSE-NEXT: .cfi_offset %ebx, -8 -; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; NOSSE-NEXT: xorl %eax, %eax -; NOSSE-NEXT: xorl %edx, %edx -; NOSSE-NEXT: xorl %ecx, %ecx -; NOSSE-NEXT: xorl %ebx, %ebx -; NOSSE-NEXT: lock cmpxchg8b (%esi) -; NOSSE-NEXT: popl %esi -; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: popl %ebx -; NOSSE-NEXT: .cfi_def_cfa_offset 4 +; NOSSE-NEXT: .cfi_offset %ebp, -8 +; NOSSE-NEXT: movl %esp, %ebp +; NOSSE-NEXT: .cfi_def_cfa_register %ebp +; NOSSE-NEXT: andl $-8, %esp +; NOSSE-NEXT: subl $8, %esp +; NOSSE-NEXT: movl 8(%ebp), %eax +; NOSSE-NEXT: fildll (%eax) +; NOSSE-NEXT: fistpll (%esp) +; NOSSE-NEXT: movl (%esp), %eax +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; NOSSE-NEXT: movl %ebp, %esp +; NOSSE-NEXT: popl %ebp +; NOSSE-NEXT: .cfi_def_cfa %esp, 4 ; NOSSE-NEXT: retl %val = load atomic i64, i64* %ptr seq_cst, align 8 ret i64 %val @@ -102,22 +101,21 @@ ; ; NOSSE-LABEL: test4: ; NOSSE: # %bb.0: -; NOSSE-NEXT: pushl %ebx -; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: pushl %esi -; NOSSE-NEXT: .cfi_def_cfa_offset 12 -; NOSSE-NEXT: .cfi_offset %esi, -12 -; NOSSE-NEXT: .cfi_offset %ebx, -8 -; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; NOSSE-NEXT: xorl %eax, %eax -; NOSSE-NEXT: xorl %edx, %edx -; NOSSE-NEXT: xorl %ecx, %ecx -; NOSSE-NEXT: xorl %ebx, %ebx -; NOSSE-NEXT: lock cmpxchg8b (%esi) -; NOSSE-NEXT: popl %esi +; NOSSE-NEXT: pushl %ebp ; NOSSE-NEXT: .cfi_def_cfa_offset 8 -; NOSSE-NEXT: popl %ebx -; NOSSE-NEXT: .cfi_def_cfa_offset 4 +; NOSSE-NEXT: .cfi_offset %ebp, -8 +; NOSSE-NEXT: movl %esp, %ebp +; NOSSE-NEXT: .cfi_def_cfa_register %ebp +; NOSSE-NEXT: andl $-8, %esp +; NOSSE-NEXT: subl $8, %esp +; NOSSE-NEXT: movl 8(%ebp), %eax +; NOSSE-NEXT: fildll (%eax) +; NOSSE-NEXT: fistpll (%esp) +; NOSSE-NEXT: movl (%esp), %eax +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx +; NOSSE-NEXT: movl %ebp, %esp +; NOSSE-NEXT: popl %ebp +; NOSSE-NEXT: .cfi_def_cfa %esp, 4 ; NOSSE-NEXT: retl %val = load atomic volatile i64, i64* %ptr seq_cst, align 8 ret i64 %val Index: llvm/trunk/test/CodeGen/X86/atomic-mi.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic-mi.ll +++ llvm/trunk/test/CodeGen/X86/atomic-mi.ll @@ -331,20 +331,22 @@ ; ; X32-LABEL: add_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl $2, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl (%esi), %eax @@ -355,10 +357,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB14_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -375,22 +378,24 @@ ; ; X32-LABEL: add_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: addl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: adcl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: addl 12(%ebp), %ebx +; X32-NEXT: adcl 16(%ebp), %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -399,10 +404,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB15_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'addq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -565,22 +571,24 @@ ; ; X32-LABEL: sub_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: subl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: subl 12(%ebp), %ebx +; X32-NEXT: sbbl 16(%ebp), %ecx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -589,10 +597,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB23_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'subq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -737,19 +746,21 @@ ; ; X32-LABEL: and_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx ; X32-NEXT: andl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx @@ -760,10 +771,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB31_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -780,22 +792,24 @@ ; ; X32-LABEL: and_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: andl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: andl 16(%ebp), %ecx +; X32-NEXT: andl 12(%ebp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -804,10 +818,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB32_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'andq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -973,20 +988,22 @@ ; ; X32-LABEL: or_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: orl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx @@ -996,10 +1013,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB41_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1016,22 +1034,24 @@ ; ; X32-LABEL: or_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: orl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: orl 16(%ebp), %ecx +; X32-NEXT: orl 12(%ebp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -1040,10 +1060,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB42_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'orq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1209,20 +1230,22 @@ ; ; X32-LABEL: xor_64i: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: xorl $2, %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx @@ -1232,10 +1255,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB51_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1252,22 +1276,24 @@ ; ; X32-LABEL: xor_64r: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: xorl {{[0-9]+}}(%esp), %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: xorl 16(%ebp), %ecx +; X32-NEXT: xorl 12(%ebp), %ebx ; X32-NEXT: movl (%esi), %eax ; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 @@ -1276,10 +1302,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB52_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'xorq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1406,20 +1433,22 @@ ; ; X32-LABEL: inc_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl $1, %ebx ; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl (%esi), %eax @@ -1430,10 +1459,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB58_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: inc_64: @@ -1551,20 +1581,22 @@ ; ; X32-LABEL: dec_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: addl $-1, %ebx ; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl (%esi), %eax @@ -1575,10 +1607,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB63_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; ; SLOW_INC-LABEL: dec_64: @@ -1681,20 +1714,22 @@ ; ; X32-LABEL: not_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: .cfi_offset %esi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx -; X32-NEXT: xorl %ecx, %ecx -; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%esi) -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp +; X32-NEXT: .cfi_offset %esi, -16 +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) +; X32-NEXT: movl (%esp), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: notl %ecx ; X32-NEXT: notl %ebx ; X32-NEXT: movl (%esi), %eax @@ -1705,10 +1740,11 @@ ; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB68_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do not check X86-32 as it cannot do 'notq'. %1 = load atomic i64, i64* %p acquire, align 8 @@ -1803,40 +1839,37 @@ ; ; X32-LABEL: neg_64: ; X32: # %bb.0: -; X32-NEXT: pushl %ebx +; X32-NEXT: pushl %ebp ; X32-NEXT: .cfi_def_cfa_offset 8 -; X32-NEXT: pushl %edi -; X32-NEXT: .cfi_def_cfa_offset 12 +; X32-NEXT: .cfi_offset %ebp, -8 +; X32-NEXT: movl %esp, %ebp +; X32-NEXT: .cfi_def_cfa_register %ebp +; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %esi -; X32-NEXT: .cfi_def_cfa_offset 16 +; X32-NEXT: andl $-8, %esp +; X32-NEXT: subl $8, %esp ; X32-NEXT: .cfi_offset %esi, -16 -; X32-NEXT: .cfi_offset %edi, -12 -; X32-NEXT: .cfi_offset %ebx, -8 -; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: xorl %esi, %esi -; X32-NEXT: xorl %eax, %eax -; X32-NEXT: xorl %edx, %edx +; X32-NEXT: .cfi_offset %ebx, -12 +; X32-NEXT: movl 8(%ebp), %esi +; X32-NEXT: fildll (%esi) +; X32-NEXT: fistpll (%esp) ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: xorl %ebx, %ebx -; X32-NEXT: lock cmpxchg8b (%edi) -; X32-NEXT: movl %eax, %ebx -; X32-NEXT: negl %ebx -; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl (%edi), %eax -; X32-NEXT: movl 4(%edi), %edx +; X32-NEXT: subl (%esp), %ebx +; X32-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl (%esi), %eax +; X32-NEXT: movl 4(%esi), %edx ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB73_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: lock cmpxchg8b (%edi) +; X32-NEXT: lock cmpxchg8b (%esi) ; X32-NEXT: jne .LBB73_1 ; X32-NEXT: # %bb.2: # %atomicrmw.end +; X32-NEXT: leal -8(%ebp), %esp ; X32-NEXT: popl %esi -; X32-NEXT: .cfi_def_cfa_offset 12 -; X32-NEXT: popl %edi -; X32-NEXT: .cfi_def_cfa_offset 8 ; X32-NEXT: popl %ebx -; X32-NEXT: .cfi_def_cfa_offset 4 +; X32-NEXT: popl %ebp +; X32-NEXT: .cfi_def_cfa %esp, 4 ; X32-NEXT: retl ; We do neg check X86-32 as it canneg do 'negq'. %1 = load atomic i64, i64* %p acquire, align 8 Index: llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll +++ llvm/trunk/test/CodeGen/X86/atomic-non-integer.ll @@ -448,28 +448,17 @@ ; ; X86-NOSSE-LABEL: load_double: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 -; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: subl $20, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 -; X86-NOSSE-NEXT: .cfi_offset %esi, -12 -; X86-NOSSE-NEXT: .cfi_offset %ebx, -8 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: xorl %eax, %eax -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: xorl %ebx, %ebx -; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: fildll (%eax) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fldl (%esp) -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: addl $20, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; @@ -827,28 +816,17 @@ ; ; X86-NOSSE-LABEL: load_double_seq_cst: ; X86-NOSSE: # %bb.0: -; X86-NOSSE-NEXT: pushl %ebx -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: pushl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 -; X86-NOSSE-NEXT: subl $12, %esp +; X86-NOSSE-NEXT: subl $20, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 24 -; X86-NOSSE-NEXT: .cfi_offset %esi, -12 -; X86-NOSSE-NEXT: .cfi_offset %ebx, -8 -; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NOSSE-NEXT: xorl %eax, %eax -; X86-NOSSE-NEXT: xorl %edx, %edx -; X86-NOSSE-NEXT: xorl %ecx, %ecx -; X86-NOSSE-NEXT: xorl %ebx, %ebx -; X86-NOSSE-NEXT: lock cmpxchg8b (%esi) -; X86-NOSSE-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: fildll (%eax) +; X86-NOSSE-NEXT: fistpll {{[0-9]+}}(%esp) +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOSSE-NEXT: movl %ecx, {{[0-9]+}}(%esp) ; X86-NOSSE-NEXT: movl %eax, (%esp) ; X86-NOSSE-NEXT: fldl (%esp) -; X86-NOSSE-NEXT: addl $12, %esp -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 12 -; X86-NOSSE-NEXT: popl %esi -; X86-NOSSE-NEXT: .cfi_def_cfa_offset 8 -; X86-NOSSE-NEXT: popl %ebx +; X86-NOSSE-NEXT: addl $20, %esp ; X86-NOSSE-NEXT: .cfi_def_cfa_offset 4 ; X86-NOSSE-NEXT: retl ; Index: llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll +++ llvm/trunk/test/CodeGen/X86/misched_phys_reg_assign_order.ll @@ -49,4 +49,4 @@ declare i32 @m() -attributes #0 = { "no-frame-pointer-elim-non-leaf" } +attributes #0 = { noimplicitfloat "no-frame-pointer-elim-non-leaf" }