Index: llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h
===================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h
+++ llvm/trunk/include/llvm/CodeGen/MachineModuleInfo.h
@@ -161,6 +161,12 @@
   bool CallsUnwindInit;
   bool HasEHFunclets;
 
+  // TODO: Ideally, what we'd like is to have a switch that allows emitting 
+  // synchronous (precise at call-sites only) CFA into .eh_frame. However,
+  // even under this switch, we'd like .debug_frame to be precise when using.
+  // -g. At this moment, there's no way to specify that some CFI directives
+  // go into .eh_frame only, while others go into .debug_frame only.
+
   /// DbgInfoAvailable - True if debugging information is available
   /// in this module.
   bool DbgInfoAvailable;
@@ -235,11 +241,6 @@
   bool hasDebugInfo() const { return DbgInfoAvailable; }
   void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
 
-  // Returns true if we need to generate precise CFI. Currently
-  // this is equivalent to hasDebugInfo(), but if we ever implement
-  // async EH, it will require precise CFI as well.
-  bool usePreciseUnwindInfo() const { return hasDebugInfo(); }
-
   bool callsEHReturn() const { return CallsEHReturn; }
   void setCallsEHReturn(bool b) { CallsEHReturn = b; }
 
Index: llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp
+++ llvm/trunk/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -500,7 +500,8 @@
 
     // For debugging, when using SP-based CFA, we need to adjust the CFA
     // offset after each push.
-    if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo())
+    // TODO: This is needed only if we require precise CFA.
+    if (!TFL->hasFP(MF))
       TFL->BuildCFI(MBB, std::next(Push), DL, 
                     MCCFIInstruction::createAdjustCfaOffset(nullptr, 4));
 
Index: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
@@ -2524,10 +2524,10 @@
     // (Pushes of argument for frame setup, callee pops for frame destroy)
     Amount -= InternalAmt;
 
-    // If this is a callee-pop calling convention, and we're emitting precise
-    // SP-based CFI, emit a CFA adjust for the amount the callee popped.
-    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && 
-        MMI.usePreciseUnwindInfo())
+    // TODO: This is needed only if we require precise CFA.
+    // If this is a callee-pop calling convention, emit a CFA adjust for
+    // the amount the callee popped.
+    if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF))
       BuildCFI(MBB, I, DL, 
                MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
 
@@ -2548,11 +2548,14 @@
       // offset to be correct at each call site, while for debugging we want
       // it to be more precise.
       int CFAOffset = Amount;
-      if (!MMI.usePreciseUnwindInfo())
-        CFAOffset += InternalAmt;
-      CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
-      BuildCFI(MBB, I, DL, 
-               MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+      // TODO: When not using precise CFA, we also need to adjust for the
+      // InternalAmt here.
+
+      if (CFAOffset) {
+        CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
+        BuildCFI(MBB, I, DL, 
+                 MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+      }
     }
 
     return;
Index: llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
===================================================================
--- llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
+++ llvm/trunk/lib/Target/X86/X86MCInstLower.cpp
@@ -1143,8 +1143,10 @@
     const X86FrameLowering* FrameLowering =
         MF->getSubtarget<X86Subtarget>().getFrameLowering();
     bool hasFP = FrameLowering->hasFP(*MF);
-
-    bool NeedsDwarfCFI = MMI->usePreciseUnwindInfo();
+    
+    // TODO: This is needed only if we require precise CFA.
+    bool NeedsDwarfCFI = 
+         (MMI->hasDebugInfo() || MF->getFunction()->needsUnwindTableEntry());
     int stackGrowth = -RI->getSlotSize();
 
     if (NeedsDwarfCFI && !hasFP) {
Index: llvm/trunk/test/CodeGen/X86/push-cfi.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/push-cfi.ll
+++ llvm/trunk/test/CodeGen/X86/push-cfi.ll
@@ -6,17 +6,24 @@
 declare void @large(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f)
 declare void @empty()
 
-; When we use an invoke, and have FP, we expect a .cfi_escape GNU_ARGS_SIZE
-; with size 16 before the invocation. Without FP, we expect.cfi_adjust_cfa_offset
-; before and after.
-; Darwin should not generate pushes in neither circumstance.
+; When we use an invoke, we expect a .cfi_escape GNU_ARGS_SIZE
+; with size 16 before the invocation. Without FP, we also expect
+; .cfi_adjust_cfa_offset after each push.
+; Darwin should not generate pushes in either circumstance.
 ; CHECK-LABEL: test1_nofp:
 ; LINUX: .cfi_escape 0x2e, 0x10
-; LINUX: .cfi_adjust_cfa_offset 16
 ; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: call
 ; LINUX-NEXT: addl $16, %esp
 ; LINUX: .cfi_adjust_cfa_offset -16
@@ -62,11 +69,18 @@
 ; so darwin should not generate pushes.
 ; CHECK-LABEL: test2_nofp:
 ; LINUX-NOT: .cfi_escape
-; LINUX: .cfi_adjust_cfa_offset 16
-; LINUX-NEXT: pushl   $4
+; LINUX: pushl   $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: call
 ; LINUX-NEXT: addl $16, %esp
 ; LINUX: .cfi_adjust_cfa_offset -16
@@ -170,11 +184,18 @@
 ; without parameters, but don't need to adjust the cfa offset
 ; CHECK-LABEL: test5_nofp:
 ; LINUX: .cfi_escape 0x2e, 0x10
-; LINUX: .cfi_adjust_cfa_offset 16
 ; LINUX-NEXT: pushl   $4
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $3
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $2
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: pushl   $1
+; LINUX-NEXT: Ltmp{{[0-9]+}}:
+; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: call
 ; LINUX-NEXT: addl $16, %esp
 ; LINUX: .cfi_adjust_cfa_offset -16
Index: llvm/trunk/test/CodeGen/X86/tls-pie.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/tls-pie.ll
+++ llvm/trunk/test/CodeGen/X86/tls-pie.ll
@@ -36,9 +36,13 @@
 define i32 @f3() {
 ; X32-LABEL: f3:
 ; X32:      calll .L{{[0-9]+}}$pb
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset 4
 ; X32-NEXT: .L{{[0-9]+}}$pb:
 ; X32-NEXT: popl %eax
 ; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset -4
+; X32-NEXT: .Ltmp{{[0-9]+}}:
 ; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %eax
 ; X32-NEXT: movl i2@GOTNTPOFF(%eax), %eax
 ; X32-NEXT: movl %gs:(%eax), %eax
@@ -56,9 +60,13 @@
 define i32* @f4() {
 ; X32-LABEL: f4:
 ; X32:      calll .L{{[0-9]+}}$pb
+; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset 4
 ; X32-NEXT: .L{{[0-9]+}}$pb:
 ; X32-NEXT: popl %ecx
 ; X32-NEXT: .Ltmp{{[0-9]+}}:
+; X32-NEXT: .cfi_adjust_cfa_offset -4
+; X32-NEXT: .Ltmp{{[0-9]+}}:
 ; X32-NEXT: addl $_GLOBAL_OFFSET_TABLE_+(.Ltmp{{[0-9]+}}-.L{{[0-9]+}}$pb), %ecx
 ; X32-NEXT: movl %gs:0, %eax
 ; X32-NEXT: addl i2@GOTNTPOFF(%ecx), %eax
Index: llvm/trunk/test/CodeGen/X86/win32-pic-jumptable.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/win32-pic-jumptable.ll
+++ llvm/trunk/test/CodeGen/X86/win32-pic-jumptable.ll
@@ -1,8 +1,12 @@
 ; RUN: llc < %s -relocation-model=pic | FileCheck %s
 
 ; CHECK:        calll L0$pb
+; CHECK-NEXT: Ltmp{{[0-9]+}}:
+; CHECK-NEXT: .cfi_adjust_cfa_offset 4
 ; CHECK-NEXT: L0$pb:
 ; CHECK-NEXT:   popl %eax
+; CHECK-NEXT: Ltmp{{[0-9]+}}:
+; CHECK-NEXT: .cfi_adjust_cfa_offset -4
 ; CHECK-NEXT:   addl LJTI0_0(,%ecx,4), %eax
 ; CHECK-NEXT:   jmpl *%eax