Index: llvm/trunk/lib/Target/X86/X86CallingConv.td =================================================================== --- llvm/trunk/lib/Target/X86/X86CallingConv.td +++ llvm/trunk/lib/Target/X86/X86CallingConv.td @@ -226,6 +226,7 @@ CCIfType<[i8, i16], CCPromoteToType>, // The 'nest' parameter, if any, is passed in R10. + CCIfNest>>, CCIfNest>, // The first 6 integer arguments are passed in integer registers. Index: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp @@ -1315,7 +1315,7 @@ /// and the properties of the function either one or two registers will be /// needed. Set primary to true for the first register, false for the second. static unsigned -GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) { +GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv(); // Erlang stuff. @@ -1326,8 +1326,12 @@ return Primary ? X86::EBX : X86::EDI; } - if (Is64Bit) - return Primary ? X86::R11 : X86::R12; + if (Is64Bit) { + if (IsLP64) + return Primary ? X86::R11 : X86::R12; + else + return Primary ? X86::R11D : X86::R12D; + } bool IsNested = HasNestArgument(&MF); @@ -1355,10 +1359,11 @@ uint64_t StackSize; const X86Subtarget &STI = MF.getTarget().getSubtarget(); bool Is64Bit = STI.is64Bit(); + const bool IsLP64 = STI.isTarget64BitLP64(); unsigned TlsReg, TlsOffset; DebugLoc DL; - unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true); + unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "Scratch register is live-in"); @@ -1396,7 +1401,7 @@ } if (IsNested) - allocMBB->addLiveIn(X86::R10); + allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); MF.push_front(allocMBB); MF.push_front(checkMBB); @@ -1409,7 +1414,7 @@ if (Is64Bit) { if (STI.isTargetLinux()) { TlsReg = X86::FS; - TlsOffset = 0x70; + TlsOffset = IsLP64 ? 0x70 : 0x40; } else if (STI.isTargetDarwin()) { TlsReg = X86::GS; TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. @@ -1424,12 +1429,12 @@ } if (CompareStackPointer) - ScratchReg = X86::RSP; + ScratchReg = IsLP64 ? X86::RSP : X86::ESP; else - BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP) + BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) .addImm(1).addReg(0).addImm(-StackSize).addReg(0); - BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg) + BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); } else { if (STI.isTargetLinux()) { @@ -1463,11 +1468,11 @@ bool SaveScratch2; if (CompareStackPointer) { // The primary scratch register is available for holding the TLS offset. - ScratchReg2 = GetScratchRegister(Is64Bit, MF, true); + ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); SaveScratch2 = false; } else { // Need to use a second register to hold the TLS offset - ScratchReg2 = GetScratchRegister(Is64Bit, MF, false); + ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); // Unfortunately, with fastcc the second scratch register may hold an // argument. @@ -1505,15 +1510,21 @@ // Functions with nested arguments use R10, so it needs to be saved across // the call to _morestack + const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; + const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; + const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; + const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; + const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; + if (IsNested) - BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10); + BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); - BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10) + BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) .addImm(StackSize); - BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11) + BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) .addImm(X86FI->getArgumentStackSize()); - MF.getRegInfo().setPhysRegUsed(X86::R10); - MF.getRegInfo().setPhysRegUsed(X86::R11); + MF.getRegInfo().setPhysRegUsed(Reg10); + MF.getRegInfo().setPhysRegUsed(Reg11); } else { BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) .addImm(X86FI->getArgumentStackSize()); @@ -1567,6 +1578,7 @@ ->getSlotSize(); const X86Subtarget &STI = MF.getTarget().getSubtarget(); const bool Is64Bit = STI.is64Bit(); + const bool IsLP64 = STI.isTarget64BitLP64(); DebugLoc DL; // HiPE-specific values const unsigned HipeLeafWords = 24; @@ -1660,7 +1672,7 @@ SPLimitOffset = 0x4c; } - ScratchReg = GetScratchRegister(Is64Bit, MF, true); + ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "HiPE prologue scratch register is live-in"); Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -998,8 +998,7 @@ MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, - MachineBasicBlock *BB, - bool Is64Bit) const; + MachineBasicBlock *BB) const; MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, MachineBasicBlock *BB) const; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -14933,7 +14933,7 @@ EVT VT = Op.getNode()->getValueType(0); bool Is64Bit = Subtarget->is64Bit(); - EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32; + EVT SPTy = getPointerTy(); if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -14951,7 +14951,7 @@ } const TargetRegisterClass *AddrRegClass = - getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32); + getRegClassFor(getPointerTy()); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, @@ -14960,7 +14960,7 @@ return DAG.getMergeValues(Ops1, dl); } else { SDValue Flag; - unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); + const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX); Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); @@ -18966,8 +18966,8 @@ } MachineBasicBlock * -X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, - bool Is64Bit) const { +X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, + MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -18975,8 +18975,11 @@ assert(MF->shouldSplitStack()); - unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; - unsigned TlsOffset = Is64Bit ? 0x70 : 0x30; + const bool Is64Bit = Subtarget->is64Bit(); + const bool IsLP64 = Subtarget->isTarget64BitLP64(); + + const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; + const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30; // BB: // ... [Till the alloca] @@ -19000,14 +19003,14 @@ MachineRegisterInfo &MRI = MF->getRegInfo(); const TargetRegisterClass *AddrRegClass = - getRegClassFor(Is64Bit ? MVT::i64:MVT::i32); + getRegClassFor(getPointerTy()); unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), sizeVReg = MI->getOperand(1).getReg(), - physSPReg = Is64Bit ? X86::RSP : X86::ESP; + physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP; MachineFunction::iterator MBBIter = BB; ++MBBIter; @@ -19023,9 +19026,9 @@ // Add code to the main basic block to check if the stack limit has been hit, // and if so, jump to mallocMBB otherwise to bumpMBB. BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); - BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) + BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) .addReg(tmpSPVReg).addReg(sizeVReg); - BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr)) + BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) .addReg(SPLimitVReg); BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB); @@ -19043,7 +19046,7 @@ .getSubtargetImpl() ->getRegisterInfo() ->getCallPreservedMask(CallingConv::C); - if (Is64Bit) { + if (IsLP64) { BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) @@ -19051,6 +19054,14 @@ .addRegMask(RegMask) .addReg(X86::RDI, RegState::Implicit) .addReg(X86::RAX, RegState::ImplicitDefine); + } else if (Is64Bit) { + BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI) + .addReg(sizeVReg); + BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) + .addExternalSymbol("__morestack_allocate_stack_space") + .addRegMask(RegMask) + .addReg(X86::EDI, RegState::Implicit) + .addReg(X86::EAX, RegState::ImplicitDefine); } else { BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) .addImm(12); @@ -19066,7 +19077,7 @@ .addImm(16); BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) - .addReg(Is64Bit ? X86::RAX : X86::EAX); + .addReg(IsLP64 ? X86::RAX : X86::EAX); BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB); // Set up the CFG correctly. @@ -19500,9 +19511,8 @@ case X86::WIN_ALLOCA: return EmitLoweredWinAlloca(MI, BB); case X86::SEG_ALLOCA_32: - return EmitLoweredSegAlloca(MI, BB, false); case X86::SEG_ALLOCA_64: - return EmitLoweredSegAlloca(MI, BB, true); + return EmitLoweredSegAlloca(MI, BB); case X86::TLSCall_32: case X86::TLSCall_64: return EmitLoweredTLSCall(MI, BB); Index: llvm/trunk/lib/Target/X86/X86InstrCompiler.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrCompiler.td +++ llvm/trunk/lib/Target/X86/X86InstrCompiler.td @@ -46,11 +46,11 @@ def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt), "#ADJCALLSTACKDOWN", [(X86callseq_start timm:$amt)]>, - Requires<[Not64BitMode]>; + Requires<[NotLP64]>; def ADJCALLSTACKUP32 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[Not64BitMode]>; + Requires<[NotLP64]>; } // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into @@ -62,11 +62,11 @@ def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt), "#ADJCALLSTACKDOWN", [(X86callseq_start timm:$amt)]>, - Requires<[In64BitMode]>; + Requires<[IsLP64]>; def ADJCALLSTACKUP64 : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2), "#ADJCALLSTACKUP", [(X86callseq_end timm:$amt1, timm:$amt2)]>, - Requires<[In64BitMode]>; + Requires<[IsLP64]>; } @@ -118,7 +118,7 @@ "# variable sized alloca for segmented stacks", [(set GR32:$dst, (X86SegAlloca GR32:$size))]>, - Requires<[Not64BitMode]>; + Requires<[NotLP64]>; let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), Index: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp +++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp @@ -101,8 +101,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) : X86GenInstrInfo( - (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), - (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), + (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), + (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), Subtarget(STI), RI(STI) { static const X86OpTblEntry OpTbl2Addr[] = { Index: llvm/trunk/lib/Target/X86/X86InstrInfo.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrInfo.td +++ llvm/trunk/lib/Target/X86/X86InstrInfo.td @@ -746,6 +746,8 @@ AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">; def In64BitMode : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate<"Mode64Bit", "64-bit mode">; +def IsLP64 : Predicate<"Subtarget->isTarget64BitLP64()">; +def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">; def In16BitMode : Predicate<"Subtarget->is16Bit()">, AssemblerPredicate<"Mode16Bit", "16-bit mode">; def Not16BitMode : Predicate<"!Subtarget->is16Bit()">, Index: llvm/trunk/test/CodeGen/X86/segmented-stacks-dynamic.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ llvm/trunk/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -1,7 +1,9 @@ ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s -check-prefix=X32ABI ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj ; Just to prevent the alloca from being optimized away declare void @dummy_use(i32*, i32) @@ -61,6 +63,26 @@ ; X64-NEXT: callq __morestack_allocate_stack_space ; X64: movq %rax, %rdi +; X32ABI-LABEL: test_basic: + +; X32ABI: cmpl %fs:64, %esp +; X32ABI-NEXT: ja .LBB0_2 + +; X32ABI: movl $24, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: ret + +; X32ABI: movl %esp, %[[EDI:edi|eax]] +; X32ABI: subl %{{.*}}, %[[EDI]] +; X32ABI-NEXT: cmpl %[[EDI]], %fs:64 + +; X32ABI: movl %[[EDI]], %esp + +; X32ABI: movl %{{.*}}, %edi +; X32ABI-NEXT: callq __morestack_allocate_stack_space +; X32ABI: movl %eax, %edi + } attributes #0 = { "split-stack" } Index: llvm/trunk/test/CodeGen/X86/segmented-stacks.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/segmented-stacks.ll +++ llvm/trunk/test/CodeGen/X86/segmented-stacks.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s -check-prefix=X32ABI ; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin ; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW @@ -9,6 +10,7 @@ ; We used to crash with filetype=obj ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj +; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj ; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj ; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj @@ -51,6 +53,16 @@ ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: ret +; X32ABI-LABEL: test_basic: + +; X32ABI: cmpl %fs:64, %esp +; X32ABI-NEXT: ja .LBB0_2 + +; X32ABI: movl $40, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: ret + ; X32-Darwin-LABEL: test_basic: ; X32-Darwin: movl $432, %ecx @@ -129,6 +141,16 @@ ; X64-Linux-NEXT: ret ; X64-Linux-NEXT: movq %rax, %r10 +; X32ABI: cmpl %fs:64, %esp +; X32ABI-NEXT: ja .LBB1_2 + +; X32ABI: movl %r10d, %eax +; X32ABI-NEXT: movl $56, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: ret +; X32ABI-NEXT: movq %rax, %r10 + ; X32-Darwin: movl $432, %edx ; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp ; X32-Darwin-NEXT: ja LBB1_2 @@ -202,6 +224,15 @@ ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: ret +; X32ABI: leal -40008(%rsp), %r11d +; X32ABI-NEXT: cmpl %fs:64, %r11d +; X32ABI-NEXT: ja .LBB2_2 + +; X32ABI: movl $40008, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: ret + ; X32-Darwin: leal -40012(%esp), %ecx ; X32-Darwin-NEXT: movl $432, %eax ; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx @@ -276,6 +307,16 @@ ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: ret +; X32ABI-LABEL: test_fastcc: + +; X32ABI: cmpl %fs:64, %esp +; X32ABI-NEXT: ja .LBB3_2 + +; X32ABI: movl $40, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: ret + ; X32-Darwin-LABEL: test_fastcc: ; X32-Darwin: movl $432, %eax @@ -356,6 +397,17 @@ ; X64-Linux-NEXT: callq __morestack ; X64-Linux-NEXT: ret +; X32ABI-LABEL: test_fastcc_large: + +; X32ABI: leal -40008(%rsp), %r11d +; X32ABI-NEXT: cmpl %fs:64, %r11d +; X32ABI-NEXT: ja .LBB4_2 + +; X32ABI: movl $40008, %r10d +; X32ABI-NEXT: movl $0, %r11d +; X32ABI-NEXT: callq __morestack +; X32ABI-NEXT: ret + ; X32-Darwin-LABEL: test_fastcc_large: ; X32-Darwin: leal -40012(%esp), %eax @@ -446,6 +498,9 @@ ; X64-Linux-LABEL: test_nostack: ; X32-Linux-NOT: callq __morestack +; X32ABI-LABEL: test_nostack: +; X32ABI-NOT: callq __morestack + ; X32-Darwin-LABEL: test_nostack: ; X32-Darwin-NOT: calll __morestack