Index: include/llvm/CodeGen/MachineFunction.h
===================================================================
--- include/llvm/CodeGen/MachineFunction.h
+++ include/llvm/CodeGen/MachineFunction.h
@@ -20,6 +20,7 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/ilist.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/IR/DebugLoc.h"
@@ -234,6 +235,9 @@
   /// True if the function includes any inline assembly.
   bool HasInlineAsm = false;
 
+  /// True if any WinCFI instruction have been emitted in this function.
+  Optional<bool> HasWinCFI;
+
   /// Current high-level properties of the IR of the function (e.g. is in SSA
   /// form or whether registers have been allocated)
   MachineFunctionProperties Properties;
@@ -372,6 +376,12 @@
     HasInlineAsm = B;
   }
 
+  bool hasWinCFI() const {
+    assert(HasWinCFI.hasValue() && "HasWinCFI not set yet!");
+    return *HasWinCFI;
+  }
+  void setHasWinCFI(bool v) { HasWinCFI = v; }
+
   /// Get the function properties
   const MachineFunctionProperties &getProperties() const { return Properties; }
   MachineFunctionProperties &getProperties() { return Properties; }
Index: lib/CodeGen/AsmPrinter/WinException.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/WinException.cpp
+++ lib/CodeGen/AsmPrinter/WinException.cpp
@@ -90,7 +90,7 @@
 
   // If we're not using CFI, we don't want the CFI or the personality, but we
   // might want EH tables if we had EH pads.
-  if (!Asm->MAI->usesWindowsCFI()) {
+  if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !Per)) {
     shouldEmitLSDA = hasEHFunclets;
     shouldEmitPersonality = false;
     return;
Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -935,6 +935,7 @@
       STI.isTarget64BitILP32()
           ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
   unsigned BasePtr = TRI->getBaseRegister();
+  bool HasWinCFI = false;
   
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -1063,6 +1064,7 @@
     }
 
     if (NeedsWinCFI) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
           .addImm(FramePtr)
           .setMIFlag(MachineInstr::FrameSetup);
@@ -1124,6 +1126,7 @@
     }
 
     if (NeedsWinCFI) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
           MachineInstr::FrameSetup);
     }
@@ -1209,10 +1212,12 @@
     emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false);
   }
 
-  if (NeedsWinCFI && NumBytes)
+  if (NeedsWinCFI && NumBytes) {
+    HasWinCFI = true;
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
         .addImm(NumBytes)
         .setMIFlag(MachineInstr::FrameSetup);
+  }
 
   int SEHFrameOffset = 0;
   unsigned SPOrEstablisher;
@@ -1259,6 +1264,7 @@
 
     // If this is not a funclet, emit the CFI describing our frame pointer.
     if (NeedsWinCFI && !IsFunclet) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
@@ -1295,6 +1301,7 @@
           int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
           Offset += SEHFrameOffset;
 
+          HasWinCFI = true;
           BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
               .addImm(Reg)
               .addImm(Offset)
@@ -1304,7 +1311,7 @@
     }
   }
 
-  if (NeedsWinCFI)
+  if (NeedsWinCFI && HasWinCFI)
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
         .setMIFlag(MachineInstr::FrameSetup);
 
@@ -1396,6 +1403,9 @@
   if (Fn->getCallingConv() == CallingConv::X86_INTR)
     BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
         .setMIFlag(MachineInstr::FrameSetup);
+
+  // At this point we know if the function has WinCFI or not.
+  MF.setHasWinCFI(HasWinCFI);
 }
 
 bool X86FrameLowering::canUseLEAForSPInEpilogue(
@@ -1630,7 +1640,7 @@
   // into the epilogue.  To cope with that, we insert an epilogue marker here,
   // then replace it with a 'nop' if it ends up immediately after a CALL in the
   // final emitted code.
-  if (NeedsWinCFI)
+  if (NeedsWinCFI && MF.hasWinCFI())
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
 
   if (!RetOpcode || !isTailCallOpcode(*RetOpcode)) {
Index: lib/Target/X86/X86MCInstLower.cpp
===================================================================
--- lib/Target/X86/X86MCInstLower.cpp
+++ lib/Target/X86/X86MCInstLower.cpp
@@ -1420,37 +1420,45 @@
     return;
 
   case X86::SEH_PushReg:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
     return;
 
   case X86::SEH_SaveReg:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
                                    MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_SaveXMM:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
                                    MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_StackAlloc:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
     return;
 
   case X86::SEH_SetFrame:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
                                     MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_PushFrame:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
     return;
 
   case X86::SEH_EndPrologue:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIEndProlog();
     return;
 
   case X86::SEH_Epilogue: {
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     MachineBasicBlock::const_iterator MBBI(MI);
     // Check if preceded by a call and emit nop if so.
     for (MBBI = PrevCrossBBInst(MBBI);
Index: test/CodeGen/X86/coalescer-win64.ll
===================================================================
--- test/CodeGen/X86/coalescer-win64.ll
+++ test/CodeGen/X86/coalescer-win64.ll
@@ -11,6 +11,6 @@
 }
 
 ; CHECK-LABEL: test1{{$}}
-; CHECK: .seh_proc test1{{$}}
+; CHECK-NOT: .seh_proc test1
 ; CHECK: rex64 jmpq *fnptr(%rip)
-; CHECK: .seh_endproc
+; CHECK-NOT: .seh_endproc
Index: test/CodeGen/X86/pr24374.ll
===================================================================
--- test/CodeGen/X86/pr24374.ll
+++ test/CodeGen/X86/pr24374.ll
@@ -31,7 +31,6 @@
   unreachable
 }
 ; CHECK-LABEL: g:
-; CHECK:       .seh_proc g
-; CHECK:       .seh_endproc
+; CHECK: ud2
 
 attributes #0 = { nounwind }
Index: test/CodeGen/X86/seh-catchpad.ll
===================================================================
--- test/CodeGen/X86/seh-catchpad.ll
+++ test/CodeGen/X86/seh-catchpad.ll
@@ -171,10 +171,7 @@
 }
 
 ; CHECK: "?filt$0@0@main@@":                     # @"\01?filt$0@0@main@@"
-; CHECK: .seh_proc "?filt$0@0@main@@"
-; CHECK:         .seh_endprologue
 ; CHECK:         jmp       filt  # TAILCALL
-; CHECK:         .seh_handlerdata
 
 declare i32 @filt() #1
 
Index: test/CodeGen/X86/win64_eh.ll
===================================================================
--- test/CodeGen/X86/win64_eh.ll
+++ test/CodeGen/X86/win64_eh.ll
@@ -8,10 +8,7 @@
   ret void
 }
 ; WIN64-LABEL: foo0:
-; WIN64: .seh_proc foo0
-; WIN64: .seh_endprologue
 ; WIN64: ret
-; WIN64: .seh_endproc
 
 ; Checks a small stack allocation
 define void @foo1() uwtable {
Index: test/CodeGen/X86/win64_eh_leaf.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/win64_eh_leaf.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -O1 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=ASM
+; RUN: llc < %s -O1 -mtriple=x86_64-pc-win32 -filetype=obj -o %t
+; RUN: llvm-readobj -unwind %t | FileCheck %s -check-prefix=READOBJ
+
+declare void @g(i32)
+
+define i32 @not_leaf(i32) uwtable {
+entry:
+  call void @g(i32 42)
+  ret i32 42
+
+; ASM-LABEL: not_leaf:
+; ASM: .seh
+
+; READOBJ: RuntimeFunction {
+; READOBJ-NEXT: StartAddress: not_leaf
+; READOBJ-NEXT: EndAddress: not_leaf
+}
+
+define void @leaf_func(i32) uwtable {
+entry:
+  tail call void @g(i32 42)
+  ret void
+
+; A Win64 "leaf" function gets no .seh directives in the asm.
+; ASM-LABEL: leaf_func:
+; ASM-NOT: .seh
+
+; and no unwind info in the object file.
+; READOBJ-NOT: leaf_func
+}