Index: llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h +++ llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h @@ -245,6 +245,11 @@ bool hasDebugInfo() const { return DbgInfoAvailable; } void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; } + // Returns true if we need to generate precise CFI. Currently + // this is equivalent to hasDebugInfo(), but if we ever implement + // async EH, it will require precise CFI as well. + bool usePreciseUnwindInfo() const { return hasDebugInfo(); } + bool callsEHReturn() const { return CallsEHReturn; } void setCallsEHReturn(bool b) { CallsEHReturn = b; } Index: llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp =================================================================== --- llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ llvm/trunk/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -216,6 +216,9 @@ case MCCFIInstruction::OpDefCfaOffset: OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); break; + case MCCFIInstruction::OpAdjustCfaOffset: + OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset()); + break; case MCCFIInstruction::OpDefCfa: OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); break; Index: llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp +++ llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp @@ -103,7 +103,8 @@ const char *getPassName() const override { return "X86 Optimize Call Frame"; } const TargetInstrInfo *TII; - const TargetFrameLowering *TFL; + const X86FrameLowering *TFL; + const X86Subtarget *STI; const MachineRegisterInfo *MRI; static char ID; }; @@ -127,13 +128,15 @@ // No point in running this in 64-bit mode, since some arguments are // passed in-register in all common calling conventions, so the pattern // we're looking for will never match. - const X86Subtarget &STI = MF.getSubtarget(); - if (STI.is64Bit()) + if (STI->is64Bit()) return false; - // We can't encode multiple DW_CFA_GNU_args_size in the compact - // unwind encoding that Darwin uses. - if (STI.isTargetDarwin() && !MF.getMMI().getLandingPads().empty()) + // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset + // in the compact unwind encoding that Darwin uses. So, bail if there + // is a danger of that being generated. + if (STI->isTargetDarwin() && + (!MF.getMMI().getLandingPads().empty() || + (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF)))) return false; // You would expect straight-line code between call-frame setup and @@ -216,8 +219,9 @@ } bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) { - TII = MF.getSubtarget().getInstrInfo(); - TFL = MF.getSubtarget().getFrameLowering(); + STI = &MF.getSubtarget(); + TII = STI->getInstrInfo(); + TFL = STI->getFrameLowering(); MRI = &MF.getRegInfo(); if (!isLegal(MF)) @@ -312,7 +316,7 @@ // Check that this particular call sequence is amenable to the // transformation. const X86RegisterInfo &RegInfo = *static_cast( - MF.getSubtarget().getRegisterInfo()); + STI->getRegisterInfo()); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); // We expect to enter this at the beginning of a call sequence @@ -455,6 +459,7 @@ for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); + MachineBasicBlock::iterator Push = nullptr; if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a @@ -466,21 +471,20 @@ if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } - BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp); + Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) + .addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); // If PUSHrmm is not slow on this target, try to fold the source of the // push into the instruction. - const X86Subtarget &ST = MF.getSubtarget(); - bool SlowPUSHrmm = ST.isAtom() || ST.isSLM(); + bool SlowPUSHrmm = STI->isAtom() || STI->isSLM(); // Check that this is legal to fold. Right now, we're extremely // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { - MachineInstr *Push = - BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); + Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) @@ -488,12 +492,18 @@ DefMov->eraseFromParent(); } else { - BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) + Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) .addReg(Reg) .getInstr(); } } + // For debugging, when using SP-based CFA, we need to adjust the CFA + // offset after each push. + if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo()) + TFL->BuildCFI(MBB, std::next(Push), DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); + MBB.erase(MOV); } Index: llvm/trunk/lib/Target/X86/X86FrameLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86FrameLowering.h +++ llvm/trunk/lib/Target/X86/X86FrameLowering.h @@ -125,13 +125,13 @@ /// \p MBB will be correctly handled by the target. bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; -private: - uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; - /// Wraps up getting a CFI index and building a MachineInstr for it. void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, MCCFIInstruction CFIInst) const; +private: + uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; + /// Aligns the stack pointer by ANDing it with -MaxAlign. void BuildStackAlignAND(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, Index: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp +++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp @@ -2105,18 +2105,23 @@ unsigned StackAlign = getStackAlignment(); Amount = RoundUpToAlignment(Amount, StackAlign); + MachineModuleInfo &MMI = MF.getMMI(); + const Function *Fn = MF.getFunction(); + bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool DwarfCFI = !WindowsCFI && + (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + // If we have any exception handlers in this function, and we adjust - // the SP before calls, we may need to indicate this to the unwinder, - // using GNU_ARGS_SIZE. Note that this may be necessary - // even when Amount == 0, because the preceding function may have - // set a non-0 GNU_ARGS_SIZE. + // the SP before calls, we may need to indicate this to the unwinder + // using GNU_ARGS_SIZE. Note that this may be necessary even when + // Amount == 0, because the preceding function may have set a non-0 + // GNU_ARGS_SIZE. // TODO: We don't need to reset this between subsequent functions, // if it didn't change. - bool HasDwarfEHHandlers = - !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - !MF.getMMI().getLandingPads().empty(); + bool HasDwarfEHHandlers = !WindowsCFI && + !MF.getMMI().getLandingPads().empty(); - if (HasDwarfEHHandlers && !isDestroy && + if (HasDwarfEHHandlers && !isDestroy && MF.getInfo()->getHasPushSequences()) BuildCFI(MBB, I, DL, MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); @@ -2128,15 +2133,37 @@ // (Pushes of argument for frame setup, callee pops for frame destroy) Amount -= InternalAmt; + // If this is a callee-pop calling convention, and we're emitting precise + // SP-based CFI, emit a CFA adjust for the amount the callee popped. + if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && + MMI.usePreciseUnwindInfo()) + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); + if (Amount) { // Add Amount to SP to destroy a frame, and subtract to setup. int Offset = isDestroy ? Amount : -Amount; - if (!(MF.getFunction()->optForMinSize() && + if (!(Fn->optForMinSize() && adjustStackWithPops(MBB, I, DL, Offset))) BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } + if (DwarfCFI && !hasFP(MF)) { + // If we don't have FP, but need to generate unwind information, + // we need to set the correct CFA offset after the stack adjustment. + // How much we adjust the CFA offset depends on whether we're emitting + // CFI only for EH purposes or for debugging. EH only requires the CFA + // offset to be correct at each call site, while for debugging we want + // it to be more precise. + int CFAOffset = Amount; + if (!MMI.usePreciseUnwindInfo()) + CFAOffset += InternalAmt; + CFAOffset = isDestroy ? -CFAOffset : CFAOffset; + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + } + return; } Index: llvm/trunk/test/CodeGen/X86/debugloc-argsize.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/debugloc-argsize.ll +++ llvm/trunk/test/CodeGen/X86/debugloc-argsize.ll @@ -30,7 +30,7 @@ declare void @__cxa_end_catch() -attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #0 = { optsize "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { optsize } attributes #2 = { nounwind } Index: llvm/trunk/test/CodeGen/X86/fold-push.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/fold-push.ll +++ llvm/trunk/test/CodeGen/X86/fold-push.ll @@ -3,7 +3,7 @@ declare void @foo(i32 %r) -define void @test(i32 %a, i32 %b) optsize { +define void @test(i32 %a, i32 %b) optsize nounwind { ; CHECK-LABEL: test: ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] @@ -22,7 +22,7 @@ ret void } -define void @test_min(i32 %a, i32 %b) minsize { +define void @test_min(i32 %a, i32 %b) minsize nounwind { ; CHECK-LABEL: test_min: ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] Index: llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll +++ llvm/trunk/test/CodeGen/X86/pop-stack-cleanup.ll @@ -9,7 +9,7 @@ declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64) -define void @test() minsize { +define void @test() minsize nounwind { ; CHECK-LABEL: test: ; CHECK: calll _param1 ; CHECK-NEXT: popl %eax @@ -48,7 +48,7 @@ ret void } -define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize { +define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind { ; CHECK-LABEL: spill: ; CHECK-DAG: movl %ecx, ; CHECK-DAG: movl %edx, @@ -63,7 +63,7 @@ ret void } -define void @test_linux64(i32 %size) minsize { +define void @test_linux64(i32 %size) minsize nounwind { ; LINUX64-LABEL: test_linux64: ; LINUX64: pushq %rbp ; LINUX64: callq param8 Index: llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll +++ llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll @@ -0,0 +1,53 @@ +; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s + + +; Function Attrs: optsize +declare void @foo(i32, i32) #0 +declare x86_stdcallcc void @stdfoo(i32, i32) #0 + +; CHECK-LABEL: test1: +; CHECK: subl $8, %esp +; CHECK: .cfi_adjust_cfa_offset 8 +; CHECK: pushl $2 +; CHECK: .cfi_adjust_cfa_offset 4 +; CHECK: pushl $1 +; CHECK: .cfi_adjust_cfa_offset 4 +; CHECK: calll foo +; CHECK: addl $16, %esp +; CHECK: .cfi_adjust_cfa_offset -16 +; CHECK: subl $8, %esp +; CHECK: .cfi_adjust_cfa_offset 8 +; CHECK: pushl $4 +; CHECK: .cfi_adjust_cfa_offset 4 +; CHECK: pushl $3 +; CHECK: .cfi_adjust_cfa_offset 4 +; CHECK: calll stdfoo +; CHECK: .cfi_adjust_cfa_offset -8 +; CHECK: addl $8, %esp +; CHECK: .cfi_adjust_cfa_offset -8 +define void @test1() #0 { +entry: + tail call void @foo(i32 1, i32 2) #1, !dbg !10 + tail call x86_stdcallcc void @stdfoo(i32 3, i32 4) #1, !dbg !11 + ret void, !dbg !12 +} + +attributes #0 = { nounwind optsize } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "foo.c", directory: "foo") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, function: void ()* @test1, variables: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.8.0 (trunk 250289)"} +!10 = !DILocation(line: 4, column: 3, scope: !4) +!11 = !DILocation(line: 5, column: 3, scope: !4) +!12 = !DILocation(line: 6, column: 1, scope: !4) Index: llvm/trunk/test/CodeGen/X86/push-cfi-obj.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/push-cfi-obj.ll +++ llvm/trunk/test/CodeGen/X86/push-cfi-obj.ll @@ -1,36 +1,36 @@ -; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s +; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX ; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s ; On darwin, check that we manage to generate the compact unwind section ; DARWIN: Name: __compact_unwind ; DARWIN: Segment: __LD -; CHECK: Index: 8 -; CHECK-NEXT: Name: .eh_frame (41) -; CHECK-NEXT: Type: SHT_PROGBITS (0x1) -; CHECK-NEXT: Flags [ (0x2) -; CHECK-NEXT: SHF_ALLOC (0x2) -; CHECK-NEXT: ] -; CHECK-NEXT: Address: 0x0 -; CHECK-NEXT: Offset: 0x64 -; CHECK-NEXT: Size: 60 -; CHECK-NEXT: Link: 0 -; CHECK-NEXT: Info: 0 -; CHECK-NEXT: AddressAlignment: 4 -; CHECK-NEXT: EntrySize: 0 -; CHECK-NEXT: Relocations [ -; CHECK-NEXT: ] -; CHECK-NEXT: SectionData ( -; CHECK-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..|| -; CHECK-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................| -; CHECK-NEXT: 0020: 18000000 24000000 00000000 19000000 |....$...........| -; CHECK-NEXT: 0030: 04000000 00430E10 2E100000 |.....C......| -; CHECK-NEXT: ) +; LINUX: Index: 8 +; LINUX-NEXT: Name: .eh_frame (41) +; LINUX-NEXT: Type: SHT_PROGBITS (0x1) +; LINUX-NEXT: Flags [ (0x2) +; LINUX-NEXT: SHF_ALLOC (0x2) +; LINUX-NEXT: ] +; LINUX-NEXT: Address: 0x0 +; LINUX-NEXT: Offset: 0x68 +; LINUX-NEXT: Size: 64 +; LINUX-NEXT: Link: 0 +; LINUX-NEXT: Info: 0 +; LINUX-NEXT: AddressAlignment: 4 +; LINUX-NEXT: EntrySize: 0 +; LINUX-NEXT: Relocations [ +; LINUX-NEXT: ] +; LINUX-NEXT: SectionData ( +; LINUX-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..|| +; LINUX-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................| +; LINUX-NEXT: 0020: 1C000000 24000000 00000000 1D000000 |....$...........| +; LINUX-NEXT: 0030: 04000000 00410E08 8502420D 05432E10 |.....A....B..C..| +; LINUX-NEXT: ) declare i32 @__gxx_personality_v0(...) declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) -define void @test() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define void @test() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) to label %continue unwind label %cleanup @@ -41,3 +41,5 @@ cleanup ret void } + +attributes #0 = { optsize "no-frame-pointer-elim"="true" } Index: llvm/trunk/test/CodeGen/X86/push-cfi.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/push-cfi.ll +++ llvm/trunk/test/CodeGen/X86/push-cfi.ll @@ -1,21 +1,51 @@ -; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s +; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK +; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK declare i32 @__gxx_personality_v0(...) declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) declare void @empty() -; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16 -; before the invocation -; CHECK-LABEL: test1: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK-NEXT: pushl $4 -; CHECK-NEXT: pushl $3 -; CHECK-NEXT: pushl $2 -; CHECK-NEXT: pushl $1 -; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp -define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; When we use an invoke, and have FP, we expect a .cfi_escape GNU_ARGS_SIZE +; with size 16 before the invocation. Without FP, we expect.cfi_adjust_cfa_offset +; before and after. +; Darwin should not generate pushes in neither circumstance. +; CHECK-LABEL: test1_nofp: +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: .cfi_adjust_cfa_offset 16 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_adjust_cfa_offset -16 +; DARWIN-NOT: .cfi_escape +; DARWIN-NOT: pushl +define void @test1_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void @good(i32 1, i32 2, i32 3, i32 4) + to label %continue unwind label %cleanup +continue: + ret void +cleanup: + landingpad { i8*, i32 } + cleanup + ret void +} + +; CHECK-LABEL: test1_fp: +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; DARWIN: pushl %ebp +; DARWIN-NOT: .cfi_escape +; DARWIN-NOT: pushl +define void @test1_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) to label %continue unwind label %cleanup @@ -28,27 +58,69 @@ } ; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE, -; even if it has an unwind table. -; CHECK-LABEL: test2: +; even if it has an unwind table. Without FP, we still need cfi_adjust_cfa_offset, +; so darwin should not generate pushes. +; CHECK-LABEL: test2_nofp: +; LINUX-NOT: .cfi_escape +; LINUX: .cfi_adjust_cfa_offset 16 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_adjust_cfa_offset -16 +; DARWIN-NOT: .cfi_escape +; DARWIN-NOT: pushl +define void @test2_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + call void @good(i32 1, i32 2, i32 3, i32 4) + ret void +} + +; CHECK-LABEL: test2_fp: ; CHECK-NOT: .cfi_escape +; CHECK-NOT: .cfi_adjust_cfa_offset ; CHECK: pushl $4 ; CHECK-NEXT: pushl $3 ; CHECK-NEXT: pushl $2 ; CHECK-NEXT: pushl $1 ; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp -define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; CHECK-NEXT: addl $24, %esp +define void @test2_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: call void @good(i32 1, i32 2, i32 3, i32 4) ret void } -; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere -; CHECK-LABEL: test3: -; CHECK-NOT: .cfi_escape -; CHECK-NOT: pushl -; CHECK: retl -define void @test3() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or +; cfi_adjust_cfa_offset. +; CHECK-LABEL: test3_nofp: +; LINUX-NOT: .cfi_escape +; LINUX-NOT: .cfi_adjust_cfa_offset +; LINUX-NOT: pushl +; LINUX: retl +define void @test3_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void @empty() + to label %continue unwind label %cleanup +continue: + ret void +cleanup: + landingpad { i8*, i32 } + cleanup + ret void +} + +; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or +; cfi_adjust_cfa_offset. +; CHECK-LABEL: test3_fp: +; LINUX: pushl %ebp +; LINUX-NOT: .cfi_escape +; LINUX-NOT: .cfi_adjust_cfa_offset +; LINUX-NOT: pushl +; LINUX: retl +define void @test3_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @empty() to label %continue unwind label %cleanup @@ -62,24 +134,24 @@ ; Different sized stacks need different GNU_ARGS_SIZEs ; CHECK-LABEL: test4: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK-NEXT: pushl $4 -; CHECK-NEXT: pushl $3 -; CHECK-NEXT: pushl $2 -; CHECK-NEXT: pushl $1 -; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp -; CHECK: .cfi_escape 0x2e, 0x20 -; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: pushl $11 -; CHECK-NEXT: pushl $10 -; CHECK-NEXT: pushl $9 -; CHECK-NEXT: pushl $8 -; CHECK-NEXT: pushl $7 -; CHECK-NEXT: pushl $6 -; CHECK-NEXT: calll large -; CHECK-NEXT: addl $32, %esp -define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_escape 0x2e, 0x20 +; LINUX: subl $8, %esp +; LINUX-NEXT: pushl $11 +; LINUX-NEXT: pushl $10 +; LINUX-NEXT: pushl $9 +; LINUX-NEXT: pushl $8 +; LINUX-NEXT: pushl $7 +; LINUX-NEXT: pushl $6 +; LINUX-NEXT: calll large +; LINUX-NEXT: addl $32, %esp +define void @test4() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) to label %continue1 unwind label %cleanup @@ -95,18 +167,48 @@ } ; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call -; without parameters -; CHECK-LABEL: test5: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK-NEXT: pushl $4 -; CHECK-NEXT: pushl $3 -; CHECK-NEXT: pushl $2 -; CHECK-NEXT: pushl $1 -; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp -; CHECK: .cfi_escape 0x2e, 0x00 -; CHECK-NEXT: call -define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; without parameters, but don't need to adjust the cfa offset +; CHECK-LABEL: test5_nofp: +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: .cfi_adjust_cfa_offset 16 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_adjust_cfa_offset -16 +; LINUX-NOT: .cfi_adjust_cfa_offset +; LINUX: .cfi_escape 0x2e, 0x00 +; LINUX-NOT: .cfi_adjust_cfa_offset +; LINUX: call +define void @test5_nofp() #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void @good(i32 1, i32 2, i32 3, i32 4) + to label %continue1 unwind label %cleanup +continue1: + invoke void @empty() + to label %continue2 unwind label %cleanup +continue2: + ret void +cleanup: + landingpad { i8*, i32 } + cleanup + ret void +} + +; CHECK-LABEL: test5_fp: +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_escape 0x2e, 0x00 +; LINUX-NOT: .cfi_adjust_cfa_offset +; LINUX: call +define void @test5_fp() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) to label %continue1 unwind label %cleanup @@ -121,13 +223,13 @@ ret void } -; This is actually inefficient - we don't need to repeat the .cfi_escape twice. +; FIXME: This is actually inefficient - we don't need to repeat the .cfi_escape twice. ; CHECK-LABEL: test6: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK: call -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK: call -define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: call +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: call +define void @test6() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) to label %continue1 unwind label %cleanup @@ -141,3 +243,41 @@ cleanup ret void } + +; Darwin should generate pushes in the presense of FP and an unwind table, +; but not FP and invoke. +; CHECK-LABEL: test7: +; DARWIN: pushl %ebp +; DARWIN: movl %esp, %ebp +; DARWIN: .cfi_def_cfa_register %ebp +; DARWIN-NOT: .cfi_adjust_cfa_offset +; DARWIN: pushl $4 +; DARWIN-NEXT: pushl $3 +; DARWIN-NEXT: pushl $2 +; DARWIN-NEXT: pushl $1 +; DARWIN-NEXT: call +define void @test7() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + call void @good(i32 1, i32 2, i32 3, i32 4) + ret void +} + +; CHECK-LABEL: test8: +; DARWIN: pushl %ebp +; DARWIN: movl %esp, %ebp +; DARWIN-NOT: .cfi_adjust_cfa_offset +; DARWIN-NOT: pushl +define void @test8() #1 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void @good(i32 1, i32 2, i32 3, i32 4) + to label %continue unwind label %cleanup +continue: + ret void +cleanup: + landingpad { i8*, i32 } + cleanup + ret void +} + +attributes #0 = { optsize } +attributes #1 = { optsize "no-frame-pointer-elim"="true" }