Index: lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp =================================================================== --- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -228,6 +228,9 @@ case MCCFIInstruction::OpDefCfaOffset: OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); break; + case MCCFIInstruction::OpAdjustCfaOffset: + OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset()); + break; case MCCFIInstruction::OpDefCfa: OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); break; Index: lib/Target/X86/X86CallFrameOptimization.cpp =================================================================== --- lib/Target/X86/X86CallFrameOptimization.cpp +++ lib/Target/X86/X86CallFrameOptimization.cpp @@ -131,9 +131,12 @@ if (STI.is64Bit()) return false; - // We can't encode multiple DW_CFA_GNU_args_size in the compact - // unwind encoding that Darwin uses. - if (STI.isTargetDarwin() && !MF.getMMI().getLandingPads().empty()) + // We can't encode multiple DW_CFA_GNU_args_size or DW_CFA_def_cfa_offset + // in the compact unwind encoding that Darwin uses. So, bail if there + // is a danger of that being generated. + if (STI.isTargetDarwin() && + (!MF.getMMI().getLandingPads().empty() || + (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF)))) return false; // You would expect straight-line code between call-frame setup and @@ -455,6 +458,7 @@ for (int Idx = (Context.ExpectedDist / 4) - 1; Idx >= 0; --Idx) { MachineBasicBlock::iterator MOV = *Context.MovVector[Idx]; MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands); + MachineBasicBlock::iterator Push = nullptr; if (MOV->getOpcode() == X86::MOV32mi) { unsigned PushOpcode = X86::PUSHi32; // If the operand is a small (8-bit) immediate, we can use a @@ -466,7 +470,8 @@ if (isInt<8>(Val)) PushOpcode = X86::PUSH32i8; } - BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).addOperand(PushOp); + Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)) + .addOperand(PushOp); } else { unsigned int Reg = PushOp.getReg(); @@ -479,8 +484,7 @@ // conservative about that. MachineInstr *DefMov = nullptr; if (!SlowPUSHrmm && (DefMov = canFoldIntoRegPush(FrameSetup, Reg))) { - MachineInstr *Push = - BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); + Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32rmm)); unsigned NumOps = DefMov->getDesc().getNumOperands(); for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) @@ -488,12 +492,17 @@ DefMov->eraseFromParent(); } else { - BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) + Push = BuildMI(MBB, Context.Call, DL, TII->get(X86::PUSH32r)) .addReg(Reg) .getInstr(); } } + // For debugging, we need to adjust the CFA offset after each push. + if (MF.getMMI().hasDebugInfo()) + static_cast(TFL)->BuildCFI(MBB, std::next(Push), DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); + MBB.erase(MOV); } Index: lib/Target/X86/X86FrameLowering.h =================================================================== --- lib/Target/X86/X86FrameLowering.h +++ lib/Target/X86/X86FrameLowering.h @@ -123,6 +123,10 @@ /// \p MBB will be correctly handled by the target. bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; + /// Wraps up getting a CFI index and building a MachineInstr for it. + void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + DebugLoc DL, MCCFIInstruction CFIInst) const; + private: /// convertArgMovsToPushes - This method tries to convert a call sequence /// that uses sub and mov instructions to put the argument onto the stack @@ -135,10 +139,6 @@ uint64_t calculateMaxStackAlign(const MachineFunction &MF) const; - /// Wraps up getting a CFI index and building a MachineInstr for it. - void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - DebugLoc DL, MCCFIInstruction CFIInst) const; - /// Aligns the stack pointer by ANDing it with -MaxAlign. void BuildStackAlignAND(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, Index: lib/Target/X86/X86FrameLowering.cpp =================================================================== --- lib/Target/X86/X86FrameLowering.cpp +++ lib/Target/X86/X86FrameLowering.cpp @@ -2086,6 +2086,12 @@ unsigned StackAlign = getStackAlignment(); Amount = RoundUpToAlignment(Amount, StackAlign); + MachineModuleInfo &MMI = MF.getMMI(); + const Function *Fn = MF.getFunction(); + bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); + bool DwarfCFI = !WindowsCFI && + (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); + // If we have any exception handlers in this function, and we adjust // the SP before calls, we may need to indicate this to the unwinder, // using GNU_ARGS_SIZE. Note that this may be necessary @@ -2093,9 +2099,8 @@ // set a non-0 GNU_ARGS_SIZE. // TODO: We don't need to reset this between subsequent functions, // if it didn't change. - bool HasDwarfEHHandlers = - !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && - !MF.getMMI().getLandingPads().empty(); + bool HasDwarfEHHandlers = !WindowsCFI && + !MF.getMMI().getLandingPads().empty(); if (HasDwarfEHHandlers && !isDestroy && MF.getInfo()->getHasPushSequences()) @@ -2113,11 +2118,26 @@ // Add Amount to SP to destroy a frame, and subtract to setup. int Offset = isDestroy ? Amount : -Amount; - if (!(MF.getFunction()->optForMinSize() && + if (!(Fn->optForMinSize() && adjustStackWithPops(MBB, I, DL, Offset))) BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false); } + if (DwarfCFI && !hasFP(MF)) { + // If we don't have FP, but need to generate unwind information, + // we need to set the correct CFA offset after the stack adjustment. + // How much we adjust the CFA offset depends on whether we're emitting + // CFI only for EH purposes or for debugging. EH only requires the CFA + // offset to be correct at each call site, while for debugging we want + // it to be more precise. + int CFAOffset = Amount; + if (!MMI.hasDebugInfo()) + CFAOffset += InternalAmt; + CFAOffset = isDestroy ? -CFAOffset : CFAOffset; + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + } + return; } Index: test/CodeGen/X86/fold-push.ll =================================================================== --- test/CodeGen/X86/fold-push.ll +++ test/CodeGen/X86/fold-push.ll @@ -3,7 +3,7 @@ declare void @foo(i32 %r) -define void @test(i32 %a, i32 %b) optsize { +define void @test(i32 %a, i32 %b) optsize nounwind { ; CHECK-LABEL: test: ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] @@ -22,7 +22,7 @@ ret void } -define void @test_min(i32 %a, i32 %b) minsize { +define void @test_min(i32 %a, i32 %b) minsize nounwind { ; CHECK-LABEL: test_min: ; CHECK: movl [[EAX:%e..]], (%esp) ; CHECK-NEXT: pushl [[EAX]] Index: test/CodeGen/X86/pop-stack-cleanup.ll =================================================================== --- test/CodeGen/X86/pop-stack-cleanup.ll +++ test/CodeGen/X86/pop-stack-cleanup.ll @@ -9,7 +9,7 @@ declare void @param8(i64, i64, i64, i64, i64, i64, i64, i64) -define void @test() minsize { +define void @test() minsize nounwind { ; CHECK-LABEL: test: ; CHECK: calll _param1 ; CHECK-NEXT: popl %eax @@ -48,7 +48,7 @@ ret void } -define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize { +define void @spill(i32 inreg %a, i32 inreg %b, i32 inreg %c) minsize nounwind { ; CHECK-LABEL: spill: ; CHECK-DAG: movl %ecx, ; CHECK-DAG: movl %edx, @@ -63,7 +63,7 @@ ret void } -define void @test_linux64(i32 %size) minsize { +define void @test_linux64(i32 %size) minsize nounwind { ; LINUX64-LABEL: test_linux64: ; LINUX64: pushq %rbp ; LINUX64: callq param8 Index: test/CodeGen/X86/push-cfi-debug.ll =================================================================== --- test/CodeGen/X86/push-cfi-debug.ll +++ test/CodeGen/X86/push-cfi-debug.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s + + +; Function Attrs: optsize +declare void @foo(i32, i32) #0 + +; CHECK-LABEL: test1: +; CHECK: subl $8, %esp +; CHECK: .cfi_adjust_cfa_offset 8 +; CHECK: pushl $2 +; CHECK: .cfi_adjust_cfa_offset 4 +; CHECK: pushl $1 +; CHECK: .cfi_adjust_cfa_offset 4 +; CHECK: call +; CHECK: addl $16, %esp +; CHECK: .cfi_adjust_cfa_offset -16 +define void @test1() #0 { +entry: + tail call void @foo(i32 1, i32 2) #1, !dbg !10 + ret void, !dbg !11 +} + +attributes #0 = { nounwind optsize } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250289)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3) +!1 = !DIFile(filename: "foo.c", directory: "foo") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "test1", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, function: void ()* @test1, variables: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !{i32 2, !"Dwarf Version", i32 4} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{!"clang version 3.8.0 (trunk 250289)"} +!10 = !DILocation(line: 4, column: 3, scope: !4) +!11 = !DILocation(line: 5, column: 1, scope: !4) Index: test/CodeGen/X86/push-cfi-obj.ll =================================================================== --- test/CodeGen/X86/push-cfi-obj.ll +++ test/CodeGen/X86/push-cfi-obj.ll @@ -1,31 +1,31 @@ -; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s +; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s -check-prefix=LINUX ; RUN: llc < %s -mtriple=i686-darwin-macosx10.7 -filetype=obj | llvm-readobj -sections | FileCheck -check-prefix=DARWIN %s ; On darwin, check that we manage to generate the compact unwind section ; DARWIN: Name: __compact_unwind ; DARWIN: Segment: __LD -; CHECK: Index: 8 -; CHECK-NEXT: Name: .eh_frame (41) -; CHECK-NEXT: Type: SHT_PROGBITS (0x1) -; CHECK-NEXT: Flags [ (0x2) -; CHECK-NEXT: SHF_ALLOC (0x2) -; CHECK-NEXT: ] -; CHECK-NEXT: Address: 0x0 -; CHECK-NEXT: Offset: 0x64 -; CHECK-NEXT: Size: 60 -; CHECK-NEXT: Link: 0 -; CHECK-NEXT: Info: 0 -; CHECK-NEXT: AddressAlignment: 4 -; CHECK-NEXT: EntrySize: 0 -; CHECK-NEXT: Relocations [ -; CHECK-NEXT: ] -; CHECK-NEXT: SectionData ( -; CHECK-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..|| -; CHECK-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................| -; CHECK-NEXT: 0020: 18000000 24000000 00000000 19000000 |....$...........| -; CHECK-NEXT: 0030: 04000000 00430E10 2E100000 |.....C......| -; CHECK-NEXT: ) +; LINUX: Index: 8 +; LINUX-NEXT: Name: .eh_frame (41) +; LINUX-NEXT: Type: SHT_PROGBITS (0x1) +; LINUX-NEXT: Flags [ (0x2) +; LINUX-NEXT: SHF_ALLOC (0x2) +; LINUX-NEXT: ] +; LINUX-NEXT: Address: 0x0 +; LINUX-NEXT: Offset: 0x64 +; LINUX-NEXT: Size: 64 +; LINUX-NEXT: Link: 0 +; LINUX-NEXT: Info: 0 +; LINUX-NEXT: AddressAlignment: 4 +; LINUX-NEXT: EntrySize: 0 +; LINUX-NEXT: Relocations [ +; LINUX-NEXT: ] +; LINUX-NEXT: SectionData ( +; LINUX-NEXT: 0000: 1C000000 00000000 017A504C 5200017C |.........zPLR..|| +; LINUX-NEXT: 0010: 08070000 00000000 1B0C0404 88010000 |................| +; LINUX-NEXT: 0020: 1C000000 24000000 00000000 19000000 |....$...........| +; LINUX-NEXT: 0030: 04000000 00430E10 2E100E20 500E1000 |.....C..... P...| +; LINUX-NEXT: ) declare i32 @__gxx_personality_v0(...) declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) Index: test/CodeGen/X86/push-cfi.ll =================================================================== --- test/CodeGen/X86/push-cfi.ll +++ test/CodeGen/X86/push-cfi.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s +; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX -check-prefix=CHECK +; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=DARWIN -check-prefix=CHECK declare i32 @__gxx_personality_v0(...) declare void @good(i32 %a, i32 %b, i32 %c, i32 %d) @@ -6,15 +7,20 @@ declare void @empty() ; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16 -; before the invocation +; before the invocation, as well as .cfi_adjust_cfa_offset before and after. +; Darwin should not generate pushes. ; CHECK-LABEL: test1: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK-NEXT: pushl $4 -; CHECK-NEXT: pushl $3 -; CHECK-NEXT: pushl $2 -; CHECK-NEXT: pushl $1 -; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: .cfi_adjust_cfa_offset 16 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_adjust_cfa_offset -16 +; DARWIN-NOT: .cfi_escape +; DARWIN-NOT: pushl define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) @@ -28,26 +34,33 @@ } ; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE, -; even if it has an unwind table. +; even if it has an unwind table. We still need cfi_adjust_cfa_offset, though. +; Darwin should not generate pushes. ; CHECK-LABEL: test2: -; CHECK-NOT: .cfi_escape -; CHECK: pushl $4 -; CHECK-NEXT: pushl $3 -; CHECK-NEXT: pushl $2 -; CHECK-NEXT: pushl $1 -; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp +; LINUX-NOT: .cfi_escape +; LINUX: .cfi_adjust_cfa_offset 16 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_adjust_cfa_offset -16 +; DARWIN-NOT: .cfi_escape +; DARWIN-NOT: pushl define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: call void @good(i32 1, i32 2, i32 3, i32 4) ret void } -; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere +; If we did not end up using any pushes, no need for GNU_ARGS_SIZE or +; cfi_adjust_cfa_offset. ; CHECK-LABEL: test3: -; CHECK-NOT: .cfi_escape -; CHECK-NOT: pushl -; CHECK: retl +; LINUX-NOT: .cfi_escape +; LINUX-NOT: .cfi_adjust_cfa_offset +; LINUX-NOT: pushl +; LINUX: retl define void @test3() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @empty() @@ -62,23 +75,27 @@ ; Different sized stacks need different GNU_ARGS_SIZEs ; CHECK-LABEL: test4: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK-NEXT: pushl $4 -; CHECK-NEXT: pushl $3 -; CHECK-NEXT: pushl $2 -; CHECK-NEXT: pushl $1 -; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp -; CHECK: .cfi_escape 0x2e, 0x20 -; CHECK-NEXT: subl $8, %esp -; CHECK-NEXT: pushl $11 -; CHECK-NEXT: pushl $10 -; CHECK-NEXT: pushl $9 -; CHECK-NEXT: pushl $8 -; CHECK-NEXT: pushl $7 -; CHECK-NEXT: pushl $6 -; CHECK-NEXT: calll large -; CHECK-NEXT: addl $32, %esp +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: .cfi_adjust_cfa_offset 16 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_adjust_cfa_offset -16 +; LINUX: .cfi_escape 0x2e, 0x20 +; LINUX: subl $8, %esp +; LINUX: .cfi_adjust_cfa_offset 32 +; LINUX-NEXT: pushl $11 +; LINUX-NEXT: pushl $10 +; LINUX-NEXT: pushl $9 +; LINUX-NEXT: pushl $8 +; LINUX-NEXT: pushl $7 +; LINUX-NEXT: pushl $6 +; LINUX-NEXT: calll large +; LINUX-NEXT: addl $32, %esp +; LINUX: .cfi_adjust_cfa_offset -32 define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) @@ -95,17 +112,20 @@ } ; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call -; without parameters +; without parameters, but don't need to adjust the cfa offset ; CHECK-LABEL: test5: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK-NEXT: pushl $4 -; CHECK-NEXT: pushl $3 -; CHECK-NEXT: pushl $2 -; CHECK-NEXT: pushl $1 -; CHECK-NEXT: call -; CHECK-NEXT: addl $16, %esp -; CHECK: .cfi_escape 0x2e, 0x00 -; CHECK-NEXT: call +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: .cfi_adjust_cfa_offset 16 +; LINUX-NEXT: pushl $4 +; LINUX-NEXT: pushl $3 +; LINUX-NEXT: pushl $2 +; LINUX-NEXT: pushl $1 +; LINUX-NEXT: call +; LINUX-NEXT: addl $16, %esp +; LINUX: .cfi_adjust_cfa_offset -16 +; LINUX: .cfi_escape 0x2e, 0x00 +; LINUX-NOT: .cfi_adjust_cfa_offset +; LINUX: call define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) @@ -123,10 +143,10 @@ ; This is actually inefficient - we don't need to repeat the .cfi_escape twice. ; CHECK-LABEL: test6: -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK: call -; CHECK: .cfi_escape 0x2e, 0x10 -; CHECK: call +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: call +; LINUX: .cfi_escape 0x2e, 0x10 +; LINUX: call define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: invoke void @good(i32 1, i32 2, i32 3, i32 4) @@ -141,3 +161,40 @@ cleanup ret void } + +; Darwin should generate pushes in the presense of FP and an unwind table, +; but not FP and invoke. +; CHECK-LABEL: test7: +; DARWIN: pushl %ebp +; DARWIN: movl %esp, %ebp +; DARWIN: .cfi_def_cfa_register %ebp +; DARWIN-NOT: .cfi_adjust_cfa_offset +; DARWIN: pushl $4 +; DARWIN-NEXT: pushl $3 +; DARWIN-NEXT: pushl $2 +; DARWIN-NEXT: pushl $1 +; DARWIN-NEXT: call +define void @test7() optsize #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + call void @good(i32 1, i32 2, i32 3, i32 4) + ret void +} + +; CHECK-LABEL: test8: +; DARWIN: pushl %ebp +; DARWIN: movl %esp, %ebp +; DARWIN-NOT: .cfi_adjust_cfa_offset +; DARWIN-NOT: pushl +define void @test8() optsize #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + invoke void @good(i32 1, i32 2, i32 3, i32 4) + to label %continue unwind label %cleanup +continue: + ret void +cleanup: + landingpad { i8*, i32 } + cleanup + ret void +} + +attributes #0 = { optsize "no-frame-pointer-elim"="true" }