Index: lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -246,6 +246,9 @@
   case MCCFIInstruction::OpSameValue:
     OutStreamer->EmitCFISameValue(Inst.getRegister());
     break;
+  case MCCFIInstruction::OpEscape:
+    OutStreamer->EmitCFIEscape(Inst.getValues());
+    break;
   }
 }
 
Index: lib/MC/MCAsmStreamer.cpp
===================================================================
--- lib/MC/MCAsmStreamer.cpp
+++ lib/MC/MCAsmStreamer.cpp
@@ -210,6 +210,7 @@
   void EmitCFISameValue(int64_t Register) override;
   void EmitCFIRelOffset(int64_t Register, int64_t Offset) override;
   void EmitCFIAdjustCfaOffset(int64_t Adjustment) override;
+  void EmitCFIEscape(StringRef Values) override;
   void EmitCFISignalFrame() override;
   void EmitCFIUndefined(int64_t Register) override;
   void EmitCFIRegister(int64_t Register1, int64_t Register2) override;
@@ -1016,6 +1017,19 @@
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitCFIEscape(StringRef Values) {
+  MCStreamer::EmitCFIEscape(Values);
+  OS << "\t.cfi_escape ";
+  if (Values.size()) {
+    size_t e = Values.size() - 1;
+    for (size_t i = 0; i < e; ++i)
+      OS << format("0x%02x", uint8_t(Values[i])) << ", ";
+    OS << format("0x%02x", uint8_t(Values[e]));
+  }
+
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) {
   MCStreamer::EmitCFIDefCfaRegister(Register);
   OS << "\t.cfi_def_cfa_register ";
Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -30,6 +30,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/LEB128.h"
 #include <cstdlib>
 
 using namespace llvm;
@@ -2047,12 +2048,14 @@
   uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
   I = MBB.erase(I);
 
+  bool NeedsDwarfCFI =
+      !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && 
+      (MF.getMMI().hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry());
+
   if (!reserveCallFrame) {
     // If the stack pointer can be changed after prologue, turn the
     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
     // adjcallstackdown instruction into 'add ESP, <amt>'
-    if (Amount == 0)
-      return;
 
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
@@ -2060,6 +2063,25 @@
     unsigned StackAlign = getStackAlignment();
     Amount = RoundUpToAlignment(Amount, StackAlign);
 
+    // If we adjust the SP before calls, we may need to indicate this to
+    // the unwinder, using GNU_ARGS_SIZE. Note that this may be necessary
+    // even when Amount == 0, because the preceding function may have
+    // set a non-0 GNU_ARGS_SIZE.
+    // TODO: We don't need to reset this between subsequent functions,
+    // if it didn't change.
+    if (NeedsDwarfCFI && !isDestroy && 
+        MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) {
+      const uint8_t GNU_ARGS_SIZE = 0x2e;
+      uint8_t Buffer[9] = { GNU_ARGS_SIZE };
+      unsigned Len = encodeULEB128(Amount, Buffer + 1) + 1;
+      BuildCFI(MBB, I, DL, MCCFIInstruction::createEscape(
+                               nullptr, StringRef((const char *)&Buffer[0], 
+                                                  Len)));
+    }
+
+    if (Amount == 0)
+      return;
+
     // Factor out the amount that gets handled inside the sequence
     // (Pushes of argument for frame setup, callee pops for frame destroy)
     Amount -= InternalAmt;
Index: test/CodeGen/X86/push-cfi.ll
===================================================================
--- test/CodeGen/X86/push-cfi.ll
+++ test/CodeGen/X86/push-cfi.ll
@@ -0,0 +1,127 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+declare i32 @__gxx_personality_v0(...)
+declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
+declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
+declare void @empty()
+
+; We use an invoke, and expect a .cfi_escape GNU_ARGS_SIZE with size 16
+; before the invocation
+; CHECK-LABEL: test1:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK-NEXT: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+define void @test1() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  invoke void @good(i32 1, i32 2, i32 3, i32 4)
+          to label %continue unwind label %cleanup
+continue:
+  ret void
+cleanup:  
+  landingpad { i8*, i32 }
+     cleanup
+  ret void
+}
+
+; Same for a call
+; CHECK-LABEL: test2:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK-NEXT: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+define void @test2() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; But not if the function is nounwind
+; CHECK-LABEL: test3:
+; CHECK-NOT: .cfi_escape
+; CHECK: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+define void @test3() nounwind optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; If we did not end up using any pushes, no need for GNU_ARGS_SIZE anywhere
+; CHECK-LABEL: test4:
+; CHECK-NOT: .cfi_escape
+; CHECK-NOT: pushl
+; CHECK: retl
+define void @test4() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @empty()
+  ret void
+}
+
+; If we did use pushes, we need to resest GNU_ARGS_SIZE before a call
+; without parameters
+; CHECK-LABEL: test5:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK-NEXT: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+; CHECK: .cfi_escape 0x2e, 0x20
+; CHECK-NEXT: subl    $8, %esp
+; CHECK-NEXT: pushl   $11
+; CHECK-NEXT: pushl   $10
+; CHECK-NEXT: pushl   $9
+; CHECK-NEXT: pushl   $8
+; CHECK-NEXT: pushl   $7
+; CHECK-NEXT: pushl   $6
+; CHECK-NEXT: calll   large
+; CHECK-NEXT: addl $32, %esp
+define void @test5() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  call void @large(i32 6, i32 7, i32 8, i32 9, i32 10, i32 11)
+  ret void
+}
+
+; If we did use pushes, we need to resest GNU_ARGS_SIZE before a call
+; without parameters
+; CHECK-LABEL: test6:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK-NEXT: pushl   $4
+; CHECK-NEXT: pushl   $3
+; CHECK-NEXT: pushl   $2
+; CHECK-NEXT: pushl   $1
+; CHECK-NEXT: call
+; CHECK-NEXT: addl $16, %esp
+; CHECK: .cfi_escape 0x2e, 0x00
+; CHECK-NEXT: call
+define void @test6() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  call void @empty()
+  ret void
+}
+; This is actuall enfficient - we don't need to repeat the .cfi_escape twice.
+; CHECK-LABEL: test7:
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK: call
+; CHECK: .cfi_escape 0x2e, 0x10
+; CHECK: call
+define void @test7() optsize personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+  call void @good(i32 1, i32 2, i32 3, i32 4)
+  call void @good(i32 5, i32 6, i32 7, i32 8)
+  ret void
+}
\ No newline at end of file