Index: include/llvm/MC/MCDwarf.h
===================================================================
--- include/llvm/MC/MCDwarf.h
+++ include/llvm/MC/MCDwarf.h
@@ -340,7 +340,8 @@
     OpRestore,
     OpUndefined,
     OpRegister,
-    OpWindowSave
+    OpWindowSave,
+    OpGnuArgsSize
   };
 
 private:
@@ -454,6 +455,11 @@
     return MCCFIInstruction(OpEscape, L, 0, 0, Vals);
   }
 
+  /// \brief A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE
+  static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size) {
+    return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, "");
+  }
+
   OpType getOperation() const { return Operation; }
   MCSymbol *getLabel() const { return Label; }
 
@@ -473,7 +479,7 @@
   int getOffset() const {
     assert(Operation == OpDefCfa || Operation == OpOffset ||
            Operation == OpRelOffset || Operation == OpDefCfaOffset ||
-           Operation == OpAdjustCfaOffset);
+           Operation == OpAdjustCfaOffset || Operation == OpGnuArgsSize);
     return Offset;
   }
 
Index: include/llvm/MC/MCStreamer.h
===================================================================
--- include/llvm/MC/MCStreamer.h
+++ include/llvm/MC/MCStreamer.h
@@ -662,6 +662,7 @@
   virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset);
   virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment);
   virtual void EmitCFIEscape(StringRef Values);
+  virtual void EmitCFIGnuArgsSize(int64_t Size);
   virtual void EmitCFISignalFrame();
   virtual void EmitCFIUndefined(int64_t Register);
   virtual void EmitCFIRegister(int64_t Register1, int64_t Register2);
Index: lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -246,6 +246,12 @@
   case MCCFIInstruction::OpSameValue:
     OutStreamer->EmitCFISameValue(Inst.getRegister());
     break;
+  case MCCFIInstruction::OpGnuArgsSize:
+    OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset());
+    break;
+  case MCCFIInstruction::OpEscape:
+    OutStreamer->EmitCFIEscape(Inst.getValues());
+    break;
   }
 }
 
Index: lib/MC/MCAsmStreamer.cpp
===================================================================
--- lib/MC/MCAsmStreamer.cpp
+++ lib/MC/MCAsmStreamer.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
 #include <cctype>
@@ -210,6 +211,8 @@
   void EmitCFISameValue(int64_t Register) override;
   void EmitCFIRelOffset(int64_t Register, int64_t Offset) override;
   void EmitCFIAdjustCfaOffset(int64_t Adjustment) override;
+  void EmitCFIEscape(StringRef Values) override;
+  void EmitCFIGnuArgsSize(int64_t Size) override;
   void EmitCFISignalFrame() override;
   void EmitCFIUndefined(int64_t Register) override;
   void EmitCFIRegister(int64_t Register1, int64_t Register2) override;
@@ -1016,6 +1019,32 @@
   EmitEOL();
 }
 
+static void PrintCFIEscape(llvm::formatted_raw_ostream &OS, StringRef Values) {
+  OS << "\t.cfi_escape ";
+  if (!Values.empty()) {
+    size_t e = Values.size() - 1;
+    for (size_t i = 0; i < e; ++i)
+      OS << format("0x%02x", uint8_t(Values[i])) << ", ";
+    OS << format("0x%02x", uint8_t(Values[e]));
+  }
+}
+
+void MCAsmStreamer::EmitCFIEscape(StringRef Values) {
+  MCStreamer::EmitCFIEscape(Values);
+  PrintCFIEscape(OS, Values);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitCFIGnuArgsSize(int64_t Size) {
+  MCStreamer::EmitCFIGnuArgsSize(Size);
+  
+  uint8_t Buffer[16] = { dwarf::DW_CFA_GNU_args_size };
+  unsigned Len = encodeULEB128(Size, Buffer + 1) + 1;
+  
+  PrintCFIEscape(OS, StringRef((const char *)&Buffer[0], Len));
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
   MCStreamer::EmitCFIDefCfaRegister(Register);
   OS << "\t.cfi_def_cfa_register ";
Index: lib/MC/MCDwarf.cpp
===================================================================
--- lib/MC/MCDwarf.cpp
+++ lib/MC/MCDwarf.cpp
@@ -1147,6 +1147,11 @@
     Streamer.EmitIntValue(dwarf::DW_CFA_restore | Reg, 1);
     return;
   }
+  case MCCFIInstruction::OpGnuArgsSize: {
+    Streamer.EmitIntValue(dwarf::DW_CFA_GNU_args_size, 1);
+    Streamer.EmitULEB128IntValue(Instr.getOffset());
+    return;
+  }
   case MCCFIInstruction::OpEscape:
     Streamer.EmitBytes(Instr.getValues());
     return;
Index: lib/MC/MCStreamer.cpp
===================================================================
--- lib/MC/MCStreamer.cpp
+++ lib/MC/MCStreamer.cpp
@@ -361,6 +361,14 @@
   CurFrame->Instructions.push_back(Instruction);
 }
 
+void MCStreamer::EmitCFIGnuArgsSize(int64_t Size) {
+  MCSymbol *Label = EmitCFICommon();
+  MCCFIInstruction Instruction = 
+    MCCFIInstruction::createGnuArgsSize(Label, Size);
+  MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
+  CurFrame->Instructions.push_back(Instruction);
+}
+
 void MCStreamer::EmitCFISignalFrame() {
   EnsureValidDwarfFrame();
   MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -2051,8 +2051,6 @@
     // If the stack pointer can be changed after prologue, turn the
     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
     // adjcallstackdown instruction into 'add ESP, <amt>'
-    if (Amount == 0)
-      return;
 
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
@@ -2060,6 +2058,25 @@
     unsigned StackAlign = getStackAlignment();
     Amount = RoundUpToAlignment(Amount, StackAlign);
 
+    // If we have any exception handlers in this function, and we adjust
+    // the SP before calls, we may need to indicate this to the unwinder,
+    // using GNU_ARGS_SIZE. Note that this may be necessary
+    // even when Amount == 0, because the preceding function may have
+    // set a non-0 GNU_ARGS_SIZE.
+    // TODO: We don't need to reset this between subsequent functions,
+    // if it didn't change.
+    bool HasDwarfEHHandlers =
+      !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 
+      !MF.getMMI().getLandingPads().empty();
+
+    if (HasDwarfEHHandlers && !isDestroy && 
+        MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
+      BuildCFI(MBB, I, DL,
+               MCCFIInstruction::createGnuArgsSize(nullptr, Amount));
+
+    if (Amount == 0)
+      return;
+
     // Factor out the amount that gets handled inside the sequence
     // (Pushes of argument for frame setup, callee pops for frame destroy)
     Amount -= InternalAmt;
Index: test/CodeGen/X86/push-cfi-obj.ll
===================================================================
--- test/CodeGen/X86/push-cfi-obj.ll
+++ test/CodeGen/X86/push-cfi-obj.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=i686-pc-linux -filetype=obj | llvm-readobj -s -sr -sd | FileCheck %s
+
+; CHECK:         Index: 8
+; CHECK-NEXT:    Name: .eh_frame (41)
+; CHECK-NEXT:    Type: SHT_PROGBITS (0x1)
+; CHECK-NEXT:    Flags [ (0x2)
+; CHECK-NEXT:      SHF_ALLOC (0x2)
+; CHECK-NEXT:    ]
+; CHECK-NEXT:    Address: 0x0
+; CHECK-NEXT:    Offset: 0x64
+; CHECK-NEXT:    Size: 60
+; CHECK-NEXT:    Link: 0
+; CHECK-NEXT:    Info: 0
+; CHECK-NEXT:    AddressAlignment: 4
+; CHECK-NEXT:    EntrySize: 0
+; CHECK-NEXT:    Relocations [
+; CHECK-NEXT:    ]
+; CHECK-NEXT:    SectionData (
+; CHECK-NEXT:      0000: 1C000000 00000000 017A504C 5200017C  |.........zPLR..||
+; CHECK-NEXT:      0010: 08070000 00000000 1B0C0404 88010000  |................|
+; CHECK-NEXT:      0020: 18000000 24000000 00000000 19000000  |....$...........|
+; CHECK-NEXT:      0030: 04000000 00430E10 2E100000           |.....C......|
+; CHECK-NEXT:    )
+
+declare i32 @__gxx_personality_v0(...)
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+
+define void @test() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
Index: test/CodeGen/X86/push-cfi.ll
===================================================================
--- test/CodeGen/X86/push-cfi.ll
+++ test/CodeGen/X86/push-cfi.ll
@@ -0,0 +1,143 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
+declare void @empty()
+
+; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16
+; before the invocation
+; CHECK-LABEL: test1:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK-NEXT: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; If the function has no handlers, we don't need to generate GNU_ARGS_SIZE,
+; even if it has an unwind table.
+; CHECK-LABEL: test2:
+; CHECK-NOT: .cfi_escape
+; CHECK: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere
+; CHECK-LABEL: test3:
+; CHECK-NOT: .cfi_escape
+; CHECK-NOT: pushl
+; CHECK: retl
+define void @test3() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @empty()
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; Different sized stacks need different GNU_ARGS_SIZEs
+; CHECK-LABEL: test4:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK-NEXT: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+; CHECK: .cfi_escape 0x2e, 0x20
+; CHECK-NEXT: subl    $8, %esp
+; CHECK-NEXT: pushl   $11
+; CHECK-NEXT: pushl   $10
+; CHECK-NEXT: pushl   $9
+; CHECK-NEXT: pushl   $8
+; CHECK-NEXT: pushl   $7
+; CHECK-NEXT: pushl   $6
+; CHECK-NEXT: calll   large
+; CHECK-NEXT: addl $32, %esp
+define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @large(i32 6, i32 7, i32 8, i32 9, i32 10, i32 11)
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; If we did use pushes, we need to reset GNU_ARGS_SIZE before a call
+; without parameters
+; CHECK-LABEL: test5:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK-NEXT: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+; CHECK: .cfi_escape 0x2e, 0x00
+; CHECK-NEXT: call
+define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @empty()
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; This is actually inefficient - we don't need to repeat the .cfi_escape twice.
+; CHECK-LABEL: test6:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK: call
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK: call
+define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue1 unwind label %cleanup
+continue1:
+  invoke void @good(i32 5, i32 6, i32 7, i32 8)
+          to label %continue2 unwind label %cleanup
+continue2:
+  ret void          
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}