Index: include/llvm/CodeGen/MachineFrameInfo.h
===================================================================
--- include/llvm/CodeGen/MachineFrameInfo.h
+++ include/llvm/CodeGen/MachineFrameInfo.h
@@ -482,7 +482,8 @@
   /// efficiency. By default, fixed objects are immutable. This returns an
   /// index with a negative value.
   ///
-  int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable);
+  int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable,
+                        bool isSpillSlot = false);
 
 
   /// isFixedObjectIndex - Returns true if the specified index corresponds to a
Index: lib/CodeGen/AsmPrinter/Win64Exception.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/Win64Exception.cpp
+++ lib/CodeGen/AsmPrinter/Win64Exception.cpp
@@ -78,9 +78,9 @@
   if (!shouldEmitPersonality)
     return;
 
-  MCSymbol *GCCHandlerSym =
-    Asm->GetExternalSymbolSymbol("_GCC_specific_handler");
-  Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true);
+  const MCSymbol *PersHandlerSym = TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang,
+                                                                Asm->TM, MMI);
+  Asm->OutStreamer.EmitWin64EHHandler(PersHandlerSym, true, true);
 
   Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
                                                 Asm->getFunctionNumber()));
@@ -99,15 +99,8 @@
   MMI->TidyLandingPads();
 
   if (shouldEmitPersonality) {
-    const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-    const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()];
-    const MCSymbol *Sym =
-        TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI);
-
     Asm->OutStreamer.PushSection();
     Asm->OutStreamer.EmitWin64EHHandlerData();
-    Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext),
-                               4);
     EmitExceptionTable();
     Asm->OutStreamer.PopSection();
   }
Index: lib/CodeGen/MachineFunction.cpp
===================================================================
--- lib/CodeGen/MachineFunction.cpp
+++ lib/CodeGen/MachineFunction.cpp
@@ -457,7 +457,7 @@
 /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table.
 /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a
 /// normal 'L' label is returned.
-MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, 
+MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
                                         bool isLinkerPrivate) const {
   const DataLayout *DL = getTarget().getDataLayout();
   assert(JumpTableInfo && "No jump tables");
@@ -533,7 +533,7 @@
   Alignment =
     clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
                           !RealignOption,
-                        Alignment, getFrameLowering()->getStackAlignment()); 
+                        Alignment, getFrameLowering()->getStackAlignment());
   CreateStackObject(Size, Alignment, true);
   int Index = (int)Objects.size() - NumFixedObjects - 1;
   ensureMaxAlignment(Alignment);
@@ -551,7 +551,7 @@
   Alignment =
     clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
                           !RealignOption,
-                        Alignment, getFrameLowering()->getStackAlignment()); 
+                        Alignment, getFrameLowering()->getStackAlignment());
   Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca));
   ensureMaxAlignment(Alignment);
   return (int)Objects.size()-NumFixedObjects-1;
@@ -563,7 +563,7 @@
 /// index with a negative value.
 ///
 int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
-                                        bool Immutable) {
+                                        bool Immutable, bool isSpillSlot) {
   assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
   // The alignment of the frame index can be determined from its offset from
   // the incoming frame position.  If the frame object is at offset 32 and
@@ -574,10 +574,9 @@
   Align =
     clampStackAlignment(!getFrameLowering()->isStackRealignable() ||
                           !RealignOption,
-                        Align, getFrameLowering()->getStackAlignment()); 
+                        Align, getFrameLowering()->getStackAlignment());
   Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
-                                              /*isSS*/   false,
-                                              /*Alloca*/ nullptr));
+                                              isSpillSlot, /*Alloca*/ nullptr));
   return -++NumFixedObjects;
 }
 
@@ -849,10 +848,10 @@
   if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) ||
       isa<StructType>(B->getType()) || isa<ArrayType>(B->getType()))
     return false;
-  
+
   // For now, only support constants with the same size.
   uint64_t StoreSize = TD->getTypeStoreSize(A->getType());
-  if (StoreSize != TD->getTypeStoreSize(B->getType()) || 
+  if (StoreSize != TD->getTypeStoreSize(B->getType()) ||
       StoreSize > 128)
     return false;
 
@@ -882,7 +881,7 @@
 /// an existing one.  User must specify the log2 of the minimum required
 /// alignment for the object.
 ///
-unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, 
+unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C,
                                                    unsigned Alignment) {
   assert(Alignment && "Alignment must be specified!");
   if (Alignment > PoolAlignment) PoolAlignment = Alignment;
Index: lib/CodeGen/PrologEpilogInserter.cpp
===================================================================
--- lib/CodeGen/PrologEpilogInserter.cpp
+++ lib/CodeGen/PrologEpilogInserter.cpp
@@ -309,7 +309,7 @@
       if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
     } else {
       // Spill it to the stack where we must.
-      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
+      FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true, true);
     }
 
     I->setFrameIdx(FrameIdx);
@@ -483,6 +483,8 @@
   // callee saved registers.
   if (StackGrowsDown) {
     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+      if (MFI->isDeadObjectIndex(i))
+        continue;
       // If the stack grows down, we need to add the size to find the lowest
       // address of the object.
       Offset += MFI->getObjectSize(i);
@@ -496,6 +498,8 @@
   } else {
     int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex;
     for (int i = MaxCSFI; i >= MinCSFI ; --i) {
+      if (MFI->isDeadObjectIndex(i))
+        continue;
       unsigned Align = MFI->getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = (Offset+Align-1)/Align*Align;
Index: lib/MC/MCObjectFileInfo.cpp
===================================================================
--- lib/MC/MCObjectFileInfo.cpp
+++ lib/MC/MCObjectFileInfo.cpp
@@ -632,11 +632,16 @@
   // though it contains relocatable pointers.  In PIC mode, this is probably a
   // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
   // adjusted or this should be a data section.
-  LSDASection =
-    Ctx->getCOFFSection(".gcc_except_table",
-                        COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                        COFF::IMAGE_SCN_MEM_READ,
-                        SectionKind::getReadOnly());
+  if (T.isOSWindows() && T.getArch() == Triple::x86_64) {
+    // On Windows 64 with SEH, the LSDA is emitted into the .xdata section
+    LSDASection = 0;
+  } else {
+    LSDASection =
+      Ctx->getCOFFSection(".gcc_except_table",
+                          COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                          COFF::IMAGE_SCN_MEM_READ,
+                          SectionKind::getReadOnly());
+  }
 
   // Debug info.
   COFFDebugSymbolsSection =
Index: lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
===================================================================
--- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -142,8 +142,11 @@
 void X86MCAsmInfoMicrosoft::anchor() { }
 
 X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) {
-  if (Triple.getArch() == Triple::x86_64)
+  if (Triple.getArch() == Triple::x86_64) {
     PrivateGlobalPrefix = ".L";
+    PointerSize = 8;
+    ExceptionsType = ExceptionHandling::Win64;
+  }
 
   AssemblerDialect = AsmWriterFlavor;
 
@@ -162,12 +165,14 @@
     PointerSize = 8;
   }
 
+  if (Triple.isOSWindows() && Triple.getArch() == Triple::x86_64)
+    ExceptionsType = ExceptionHandling::Win64;
+  else
+    ExceptionsType = ExceptionHandling::DwarfCFI;
+
   AssemblerDialect = AsmWriterFlavor;
 
   TextAlignFillValue = 0x90;
 
-  // Exceptions handling
-  ExceptionsType = ExceptionHandling::DwarfCFI;
-
   UseIntegratedAssembler = true;
 }
Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -29,6 +29,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
 
 using namespace llvm;
 
@@ -313,63 +314,24 @@
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+  bool HasFP = hasFP(MF);
 
   // Add callee saved registers to move list.
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   if (CSI.empty()) return;
 
-  const X86RegisterInfo *RegInfo =
-      static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
-  bool HasFP = hasFP(MF);
-
-  // Calculate amount of bytes used for return address storing.
-  int stackGrowth = -RegInfo->getSlotSize();
-
-  // FIXME: This is dirty hack. The code itself is pretty mess right now.
-  // It should be rewritten from scratch and generalized sometimes.
-
-  // Determine maximum offset (minimum due to stack growth).
-  int64_t MaxOffset = 0;
-  for (std::vector<CalleeSavedInfo>::const_iterator
-         I = CSI.begin(), E = CSI.end(); I != E; ++I)
-    MaxOffset = std::min(MaxOffset,
-                         MFI->getObjectOffset(I->getFrameIdx()));
-
   // Calculate offsets.
-  int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth;
   for (std::vector<CalleeSavedInfo>::const_iterator
          I = CSI.begin(), E = CSI.end(); I != E; ++I) {
     int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
     unsigned Reg = I->getReg();
-    Offset = MaxOffset - Offset + saveAreaOffset;
-
-    // Don't output a new machine move if we're re-saving the frame
-    // pointer. This happens when the PrologEpilogInserter has inserted an extra
-    // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
-    // generates one when frame pointers are used. If we generate a "machine
-    // move" for this extra "PUSH", the linker will lose track of the fact that
-    // the frame pointer should have the value of the first "PUSH" when it's
-    // trying to unwind.
-    //
-    // FIXME: This looks inelegant. It's possibly correct, but it's covering up
-    //        another bug. I.e., one where we generate a prolog like this:
-    //
-    //          pushl  %ebp
-    //          movl   %esp, %ebp
-    //          pushl  %ebp
-    //          pushl  %esi
-    //           ...
-    //
-    //        The immediate re-push of EBP is unnecessary. At the least, it's an
-    //        optimization bug. EBP can be used as a scratch register in certain
-    //        cases, but probably not when we have a frame pointer.
+
     if (HasFP && FramePtr == Reg)
       continue;
 
     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-    unsigned CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
-                                                        Offset));
+    unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+                                         nullptr, DwarfReg, Offset));
     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex);
   }
@@ -396,6 +358,75 @@
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
 /// generate the exception handling frames.
+
+// Here's a gist of what gets emitted:
+//
+// .cfi_def_cfa_offset 16
+// .cfi_offset %rbp, -16
+//
+// ; Establish frame pointer, if needed
+// [if needs FP]
+//     push  %rbp
+//     .cfi_def_cfa_offset 16
+//     .cfi_offset %rbp, -16
+//     .seh_pushreg %rpb
+//     mov  %rsp, %rbp
+//     .cfi_def_cfa_register %rbp
+//
+// ; Spill general-purpose registers
+// [for all callee-saved GRPs]
+//     pushq %rXX
+//     [if not needs FP]
+//        .cfi_def_cfa_offset NNN
+//     .seh_pushreg %rXX
+//
+// ; If the required stack alignment > default stack alignment
+// ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
+// ; of unknown size in the stack frame.
+// [if stack needs re-alignment]
+//     and  $MASK, %rsp
+//
+// ; Allocate space for locals
+// [if target is Windows and allocated space > 4096 bytes]
+//     ; Windows needs special care for allocations larger
+//     ; than one page.
+//     mov $NNN, %rax
+//     call ___chkstk_ms/___chkstk
+//     sub  %rax, %rsp
+// [else]
+//     sub  $NNN, %rsp
+//
+// .cfi_def_cfa_offset MMM
+// .seh_stackalloc NNN
+//
+// [if needs FP]
+//     .seh_setframe %rbp, KKK
+//
+// ; Currently only Win64 spills non-GPRs
+// [for all callee-saved XMM registers]
+//     [if needs FP]
+//         movaps  %xmmXX, -NNN(%rbp)
+//     [else]
+//         movaps  %xmmXX, NNN(%rsp)
+//
+// [for all callee-saved XMM registers]
+//     .seh_savexmm 6, NNN
+//
+// .seh_endprologue
+//
+// [if needs base pointer]
+//     mov  %rsp, %rbx
+//
+// ; Emit CFI info for GPRs
+// [if needs FP]
+//     [for all callee-saved registers]
+//         .cfi_offset %rXX, NNN
+//
+// Notes:
+// - .seh directives are emitted only for Windows 64 ABI
+// - .cfi directives are emitted for all other ABIs
+// - for 32-bit code, substitute %eXX registers for %rXX
+
 void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -406,8 +437,6 @@
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  bool needsFrameMoves = MMI.hasDebugInfo() ||
-    Fn->needsUnwindTableEntry();
   uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
   bool HasFP = hasFP(MF);
@@ -415,6 +444,8 @@
   bool Is64Bit = STI.is64Bit();
   bool IsLP64 = STI.isTarget64BitLP64();
   bool IsWin64 = STI.isTargetWin64();
+  bool NeedsWin64SEH = IsWin64 && Fn->needsUnwindTableEntry();
+  bool NeedsDwarfCFI = !IsWin64 && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
   bool UseLEA = STI.useLeaForSP();
   unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
@@ -512,7 +543,7 @@
       .addReg(FramePtr, RegState::Kill)
       .setMIFlag(MachineInstr::FrameSetup);
 
-    if (needsFrameMoves) {
+    if (NeedsDwarfCFI) {
       // Mark the place where EBP/RBP was saved.
       // Define the current CFA rule to use the provided offset.
       assert(StackSize);
@@ -530,13 +561,19 @@
           .addCFIIndex(CFIIndex);
     }
 
+    if (NeedsWin64SEH) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
+          .addImm(FramePtr)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+
     // Update EBP with the new base value.
     BuildMI(MBB, MBBI, DL,
             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
         .addReg(StackPtr)
         .setMIFlag(MachineInstr::FrameSetup);
 
-    if (needsFrameMoves) {
+    if (NeedsDwarfCFI) {
       // Mark effective beginning of when frame pointer becomes valid.
       // Define the current CFA to use the EBP/RBP register.
       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
@@ -562,10 +599,10 @@
          (MBBI->getOpcode() == X86::PUSH32r ||
           MBBI->getOpcode() == X86::PUSH64r)) {
     PushedRegs = true;
-    MBBI->setFlag(MachineInstr::FrameSetup);
+    unsigned Reg = MBBI->getOperand(0).getReg();
     ++MBBI;
 
-    if (!HasFP && needsFrameMoves) {
+    if (!HasFP && NeedsDwarfCFI) {
       // Mark callee-saved push instruction.
       // Define the current CFA rule to use the provided offset.
       assert(StackSize);
@@ -575,6 +612,12 @@
           .addCFIIndex(CFIIndex);
       StackOffset += stackGrowth;
     }
+
+    if (NeedsWin64SEH) {
+      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
+          .addImm(Reg)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
   }
 
   // Realign stack after we pushed callee-saved registers (so that we'll be
@@ -683,23 +726,90 @@
         MI->setFlag(MachineInstr::FrameSetup);
         MBB.insert(MBBI, MI);
     }
-  } else if (NumBytes)
+  } else if (NumBytes) {
     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
                  UseLEA, TII, *RegInfo);
+  }
+
+  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  int SEHFrameOffset = 0;
+  if (NeedsWin64SEH) {
+    if (HasFP) {
+        // We need to set frame base offset low enough such that all saved
+        // register offsets would be positive relative to it, but we can't
+        // just use NumBytes, because .seh_setframe offset must be <=240.
+        // So we pretend to have only allocated enough space to spill the
+        // non-volatile registers.
+
+        for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+          int offset = MFI->getObjectOffset(CSI[i].getFrameIdx());
+          SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
+        }
+        SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
+
+        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+            .addImm(SEHFrameOffset - X86FI->getCalleeSavedFrameSize())
+            .setMIFlag(MachineInstr::FrameSetup);
+
+        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
+            .addImm(FramePtr)
+            .addImm(SEHFrameOffset)
+            .setMIFlag(MachineInstr::FrameSetup);
+
+        // Don't care about the rest of stack allocation, because unwinder
+        // will restore SP to (BP - LastSpillSlotOffset)
+    } else {
+      // SP will be the base register for restoring XMMs
+      if (NumBytes) {
+        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+            .addImm(NumBytes)
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
+    }
+  }
+
+  // Skip the rest of register spilling code
+  while (MBBI != MBB.end() &&
+    MBBI->getFlag(MachineInstr::FrameSetup)) {
+    ++MBBI;
+  }
+
+  // Emit SEH info for non-GPRs
+  if (NeedsWin64SEH) {
+    for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+      unsigned Reg = CSI[i].getReg();
+      if (X86::GR64RegClass.contains(Reg) ||
+          X86::GR32RegClass.contains(Reg))
+            continue;
+      // Win64 should spill only GPRs and XMMs
+      assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
+
+      int Offset = getFrameIndexOffset(MF, CSI[i].getFrameIdx());
+      Offset += SEHFrameOffset;
+
+      BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
+          .addImm(Reg)
+          .addImm(Offset)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
+
+    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
 
   // If we need a base pointer, set it up here. It's whatever the value
   // of the stack pointer is at this point. Any variable size objects
   // will be allocated after this, so we can still use the base pointer
   // to reference locals.
   if (RegInfo->hasBasePointer(MF)) {
-    // Update the frame pointer with the current stack pointer.
+    // Update the base pointer with the current stack pointer.
     unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
       .addReg(StackPtr)
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
+  if (( (!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
     // Mark end of stack pointer adjustment.
     if (!HasFP && NumBytes) {
       // Define the current CFA rule to use the provided offset.
@@ -797,8 +907,8 @@
     MachineBasicBlock::iterator PI = std::prev(MBBI);
     unsigned Opc = PI->getOpcode();
 
-    if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
-        !PI->isTerminator())
+    if (Opc != X86::POP32r && Opc != X86::POP64r &&
+        Opc != X86::DBG_VALUE && !PI->isTerminator())
       break;
 
     --MBBI;
@@ -975,43 +1085,57 @@
 }
 
 bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                             MachineBasicBlock::iterator MI,
-                                        const std::vector<CalleeSavedInfo> &CSI,
+                                          MachineBasicBlock::iterator MI,
+                                          const std::vector<CalleeSavedInfo> &CSI_,
                                           const TargetRegisterInfo *TRI) const {
-  if (CSI.empty())
-    return false;
 
+  std::vector<CalleeSavedInfo> &CSI = const_cast<std::vector<CalleeSavedInfo>&>(CSI_);
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   MachineFunction &MF = *MBB.getParent();
+  MachineFrameInfo *MFI = MF.getFrameInfo();
   const X86RegisterInfo *RegInfo =
     static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FPReg = TRI->getFrameRegister(MF);
-  unsigned CalleeFrameSize = 0;
-
+  bool HasFP = hasFP(MF);
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
 
+  unsigned CalleeSavedFrameSize = 0;
+  int SpillSlotOffset = getOffsetOfLocalArea()
+                        + X86FI->getTCReturnAddrDelta()
+                        - (HasFP ? 1 : 0) * SlotSize;
+
   // Push GPRs. It increases frame size.
   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
+
     if (!X86::GR64RegClass.contains(Reg) &&
         !X86::GR32RegClass.contains(Reg))
       continue;
+
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
-    if (Reg == FPReg)
+    if (Reg == FPReg) {
       // X86RegisterInfo::emitPrologue will handle spilling of frame register.
       continue;
-    CalleeFrameSize += SlotSize;
+    }
+
+    SpillSlotOffset -= SlotSize;
+    CalleeSavedFrameSize += SlotSize;
+
+    int SlotIndex = MFI->CreateFixedObject(SlotSize, SpillSlotOffset, true, true);
+    MFI->RemoveStackObject(CSI[i-1].getFrameIdx());
+    CSI[i-1].setFrameIdx(SlotIndex);
+
     BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
       .setMIFlag(MachineInstr::FrameSetup);
   }
 
-  X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+  X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
 
   // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
   // It can be done by spilling XMMs to stack frame.
@@ -1024,8 +1148,19 @@
     // Add the callee-saved register as live-in. It's killed at the spill.
     MBB.addLiveIn(Reg);
     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
-                            RC, TRI);
+
+    // ensure alignment
+    SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
+    // spill into slot
+    SpillSlotOffset -= RC->getSize();
+    int SlotIndex = MFI->CreateFixedObject(RC->getSize(), SpillSlotOffset, true, true);
+    MFI->RemoveStackObject(CSI[i-1].getFrameIdx());
+    CSI[i-1].setFrameIdx(SlotIndex);
+
+    TII.storeRegToStackSlot(MBB, MI, Reg, true, SlotIndex, RC, TRI);
+    --MI;
+    MI->setFlag(MachineInstr::FrameSetup);
+    ++MI;
   }
 
   return true;
@@ -1043,20 +1178,21 @@
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
+  unsigned FPReg = TRI->getFrameRegister(MF);
 
-  // Reload XMMs from stack frame.
+  // Reload non-GPRs
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
     if (X86::GR64RegClass.contains(Reg) ||
         X86::GR32RegClass.contains(Reg))
       continue;
-    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
-                             RC, TRI);
+
+      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+                               RC, TRI);
   }
 
   // POP GPRs.
-  unsigned FPReg = TRI->getFrameRegister(MF);
   unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     unsigned Reg = CSI[i].getReg();
@@ -1106,7 +1242,7 @@
                                           -(int)SlotSize +
                                           TFI.getOffsetOfLocalArea() +
                                           TailCallReturnAddrDelta,
-                                          true);
+                                          true, true);
     assert(FrameIdx == MFI->getObjectIndexBegin() &&
            "Slot for EBP register must be last in order to be found!");
     (void)FrameIdx;
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -599,7 +599,8 @@
   // FIXME - use subtarget debug flags
   if (!Subtarget->isTargetDarwin() &&
       !Subtarget->isTargetELF() &&
-      !Subtarget->isTargetCygMing()) {
+      !Subtarget->isTargetCygMing() &&
+      !Subtarget->isTargetWin64()) {
     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
   }
 
Index: lib/Target/X86/X86InstrCompiler.td
===================================================================
--- lib/Target/X86/X86InstrCompiler.td
+++ lib/Target/X86/X86InstrCompiler.td
@@ -110,7 +110,7 @@
 
 // When using segmented stacks these are lowered into instructions which first
 // check if the current stacklet has enough free memory. If it does, memory is
-// allocated by bumping the stack pointer. Otherwise memory is allocated from 
+// allocated by bumping the stack pointer. Otherwise memory is allocated from
 // the heap.
 
 let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in
@@ -197,6 +197,26 @@
 }
 
 //===----------------------------------------------------------------------===//
+// Pseudo instructions used by unwind info.
+//
+let isPseudo = 1 in {
+  def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg),
+                            "#SEH_PushReg $reg", []>;
+  def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+                            "#SEH_SaveReg $reg, $dst", []>;
+  def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst),
+                            "#SEH_SaveXMM $reg, $dst", []>;
+  def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size),
+                            "#SEH_StackAlloc $size", []>;
+  def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset),
+                            "#SEH_SetFrame $reg, $offset", []>;
+  def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode),
+                            "#SEH_PushFrame $mode", []>;
+  def SEH_EndPrologue : I<0, Pseudo, (outs), (ins),
+                            "#SEH_EndPrologue", []>;
+}
+
+//===----------------------------------------------------------------------===//
 // Pseudo instructions used by segmented stacks.
 //
 
@@ -371,7 +391,7 @@
   def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}",
                       [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32,
                      Requires<[In64BitMode]>;
- 
+
   let Uses = [RAX,RCX,RDI] in
   def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
                       [(X86rep_stos i64)], IIC_REP_STOS>, REP,
Index: lib/Target/X86/X86MCInstLower.cpp
===================================================================
--- lib/Target/X86/X86MCInstLower.cpp
+++ lib/Target/X86/X86MCInstLower.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86AsmPrinter.h"
+#include "X86RegisterInfo.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "llvm/ADT/SmallString.h"
@@ -20,6 +21,7 @@
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -779,6 +781,9 @@
 
 void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(*MF, *this);
+  const X86RegisterInfo *RI =
+    static_cast<const X86RegisterInfo *>(TM.getRegisterInfo());
+
   switch (MI->getOpcode()) {
   case TargetOpcode::DBG_VALUE:
     llvm_unreachable("Should be handled target independently");
@@ -883,6 +888,43 @@
       .addReg(X86::R10)
       .addReg(X86::RAX));
     return;
+
+  case X86::SEH_PushReg:
+    OutStreamer.EmitWin64EHPushReg(
+          RI->getSEHRegNum(MI->getOperand(0).getImm()));
+    return;
+
+  case X86::SEH_SaveReg:
+    OutStreamer.EmitWin64EHSaveReg(
+          RI->getSEHRegNum(MI->getOperand(0).getImm()),
+          MI->getOperand(1).getImm());
+    return;
+
+  case X86::SEH_SaveXMM:
+    OutStreamer.EmitWin64EHSaveXMM(
+          RI->getSEHRegNum(MI->getOperand(0).getImm()),
+          MI->getOperand(1).getImm());
+    return;
+
+  case X86::SEH_StackAlloc:
+    OutStreamer.EmitWin64EHAllocStack(
+          MI->getOperand(0).getImm());
+    return;
+
+  case X86::SEH_SetFrame:
+    OutStreamer.EmitWin64EHSetFrame(
+          RI->getSEHRegNum(MI->getOperand(0).getImm()),
+          MI->getOperand(1).getImm());
+    return;
+
+  case X86::SEH_PushFrame:
+    OutStreamer.EmitWin64EHPushFrame(
+          MI->getOperand(0).getImm());
+    return;
+
+  case X86::SEH_EndPrologue:
+    OutStreamer.EmitWin64EHEndProlog();
+    return;
   }
 
   MCInst TmpInst;
Index: test/CodeGen/X86/2007-05-05-Personality.ll
===================================================================
--- test/CodeGen/X86/2007-05-05-Personality.ll
+++ test/CodeGen/X86/2007-05-05-Personality.ll
@@ -1,12 +1,14 @@
 ; RUN: llc < %s -mtriple=i686-pc-linux-gnu -o -     | FileCheck %s  --check-prefix=LIN
-; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s  --check-prefix=LIN
 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -o -       | FileCheck %s  --check-prefix=WIN
 ; RUN: llc < %s -mtriple=i686-pc-windows-gnu -o -   | FileCheck %s  --check-prefix=WIN
+; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu -o - | FileCheck %s  --check-prefix=WIN64
 
 ; LIN: .cfi_personality 0, __gnat_eh_personality
 ; LIN: .cfi_lsda 0, .Lexception0
 ; WIN: .cfi_personality 0, ___gnat_eh_personality
 ; WIN: .cfi_lsda 0, Lexception0
+; WIN64: .seh_handler __gnat_eh_personality
+; WIN64: .seh_handlerdata
 
 @error = external global i8
 
@@ -15,7 +17,7 @@
   invoke void @raise()
           to label %eh_then unwind label %unwind
 
-unwind:                                           ; preds = %entry 
+unwind:                                           ; preds = %entry
   %eh_ptr = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*)
               catch i8* @error
   %eh_select = extractvalue { i8*, i32 } %eh_ptr, 1
Index: test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
===================================================================
--- test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
+++ test/CodeGen/X86/2009-06-03-Win64SpillXMM.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -mcpu=generic -mtriple=x86_64-mingw32 < %s | FileCheck %s
 ; CHECK: subq    $40, %rsp
-; CHECK: movaps  %xmm8, (%rsp)
-; CHECK: movaps  %xmm7, 16(%rsp)
+; CHECK: movaps  %xmm8, 16(%rsp)
+; CHECK: movaps  %xmm7, (%rsp)
 
 define i32 @a() nounwind {
 entry:
Index: test/CodeGen/X86/avx-intel-ocl.ll
===================================================================
--- test/CodeGen/X86/avx-intel-ocl.ll
+++ test/CodeGen/X86/avx-intel-ocl.ll
@@ -7,21 +7,21 @@
 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
 declare i32 @func_int(i32, i32)
 
-; WIN64: testf16_inp
+; WIN64-LABEL: testf16_inp
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; WIN64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; WIN64: leaq    {{.*}}(%rsp), %rcx
 ; WIN64: call
 ; WIN64: ret
 
-; X32: testf16_inp
+; X32-LABEL: testf16_inp
 ; X32: movl    %eax, (%esp)
 ; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X32: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X32: call
 ; X32: ret
 
-; X64: testf16_inp
+; X64-LABEL: testf16_inp
 ; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X64: vaddps  {{.*}}, {{%ymm[0-1]}}
 ; X64: leaq    {{.*}}(%rsp), %rdi
@@ -41,14 +41,14 @@
 ;test calling conventions - preserved registers
 
 ; preserved ymm6-ymm15
-; WIN64: testf16_regs
+; WIN64-LABEL: testf16_regs
 ; WIN64: call
 ; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; WIN64: vaddps  {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; WIN64: ret
 
 ; preserved ymm8-ymm15
-; X64: testf16_regs
+; X64-LABEL: testf16_regs
 ; X64: call
 ; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
 ; X64: vaddps  {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
@@ -65,28 +65,30 @@
 }
 
 ; test calling conventions - prolog and epilog
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
-; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rsp).*}}     # 32-byte Spill
+; WIN64-LABEL: test_prolog_epilog
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
+; WIN64: vmovaps {{%ymm([6-9]|1[0-5])}}, {{.*(%rbp).*}}     # 32-byte Spill
 ; WIN64: call
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-; WIN64: vmovaps {{.*(%rsp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
-
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+; WIN64: vmovaps {{.*(%rbp).*}}, {{%ymm([6-9]|1[0-5])}}     # 32-byte Reload
+
+; X64-LABEL: test_prolog_epilog
 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
 ; X64: vmovups {{%ymm([8-9]|1[0-5])}}, {{.*}}(%rsp)  ## 32-byte Folded Spill
@@ -111,12 +113,14 @@
 
 ; test functions with integer parameters
 ; pass parameters on stack for 32-bit platform
+; X32-LABEL: test_int
 ; X32: movl {{.*}}, 4(%esp)
 ; X32: movl {{.*}}, (%esp)
 ; X32: call
 ; X32: addl {{.*}}, %eax
 
 ; pass parameters in registers for 64-bit platform
+; X64-LABEL: test_int
 ; X64: leal {{.*}}, %edi
 ; X64: movl {{.*}}, %esi
 ; X64: call
@@ -128,21 +132,21 @@
 	ret i32 %c
 }
 
-; WIN64: test_float4
+; WIN64-LABEL: test_float4
 ; WIN64-NOT: vzeroupper
 ; WIN64: call
 ; WIN64-NOT: vzeroupper
 ; WIN64: call
 ; WIN64: ret
 
-; X64: test_float4
+; X64-LABEL: test_float4
 ; X64-NOT: vzeroupper
 ; X64: call
 ; X64-NOT: vzeroupper
 ; X64: call
 ; X64: ret
 
-; X32: test_float4
+; X32-LABEL: test_float4
 ; X32: vzeroupper
 ; X32: call
 ; X32: vzeroupper
Index: test/CodeGen/X86/gcc_except_table.ll
===================================================================
--- test/CodeGen/X86/gcc_except_table.ll
+++ test/CodeGen/X86/gcc_except_table.ll
@@ -13,14 +13,14 @@
 ; APPLE: GCC_except_table0:
 ; APPLE: Lexception0:
 
-; MINGW64: .cfi_startproc
-; MINGW64: .cfi_personality 0, __gxx_personality_v0
-; MINGW64: .cfi_lsda 0, .Lexception0
-; MINGW64: .cfi_def_cfa_offset 16
+; MINGW64: .seh_proc
+; MINGW64: .seh_handler __gxx_personality_v0
+; MINGW64: .seh_setframe 5, 0
 ; MINGW64: callq _Unwind_Resume
-; MINGW64: .cfi_endproc
+; MINGW64: .seh_handlerdata
 ; MINGW64: GCC_except_table0:
 ; MINGW64: Lexception0:
+; MINGW64: .seh_endproc
 
 ; MINGW32: .cfi_startproc
 ; MINGW32: .cfi_personality 0, ___gxx_personality_v0
Index: test/CodeGen/X86/win64_eh.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/win64_eh.ll
@@ -0,0 +1,170 @@
+; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-win32 | FileCheck %s -check-prefix=WIN64
+; RUN: llc < %s -O0 -mcpu=corei7 -mtriple=x86_64-pc-mingw32 | FileCheck %s -check-prefix=WIN64
+
+; Check function without prolog
+define void @foo0() uwtable {
+entry:
+  ret void
+}
+; WIN64-LABEL: foo0:
+; WIN64: .seh_proc foo0
+; WIN64: .seh_endprologue
+; WIN64: ret
+; WIN64: .seh_endproc
+
+; Checks a small stack allocation
+define void @foo1() uwtable {
+entry:
+  %baz = alloca [2000 x i16], align 2
+  ret void
+}
+; WIN64-LABEL: foo1:
+; WIN64: .seh_proc foo1
+; WIN64: subq $4000, %rsp
+; WIN64: .seh_stackalloc 4000
+; WIN64: .seh_endprologue
+; WIN64: addq $4000, %rsp
+; WIN64: ret
+; WIN64: .seh_endproc
+
+; Checks a stack allocation requiring call to __chkstk/___chkstk_ms
+define void @foo2() uwtable {
+entry:
+  %baz = alloca [4000 x i16], align 2
+  ret void
+}
+; WIN64-LABEL: foo2:
+; WIN64: .seh_proc foo2
+; WIN64: movabsq $8000, %rax
+; WIN64: callq {{__chkstk|___chkstk_ms}}
+; WIN64: subq %rax, %rsp
+; WIN64: .seh_stackalloc 8000
+; WIN64: .seh_endprologue
+; WIN64: addq $8000, %rsp
+; WIN64: ret
+; WIN64: .seh_endproc
+
+
+; Checks stack push
+define i32 @foo3(i32 %f_arg, i32 %e_arg, i32 %d_arg, i32 %c_arg, i32 %b_arg, i32 %a_arg) uwtable {
+entry:
+  %a = alloca i32
+  %b = alloca i32
+  %c = alloca i32
+  %d = alloca i32
+  %e = alloca i32
+  %f = alloca i32
+  store i32 %a_arg, i32* %a
+  store i32 %b_arg, i32* %b
+  store i32 %c_arg, i32* %c
+  store i32 %d_arg, i32* %d
+  store i32 %e_arg, i32* %e
+  store i32 %f_arg, i32* %f
+  %tmp = load i32* %a
+  %tmp1 = mul i32 %tmp, 2
+  %tmp2 = load i32* %b
+  %tmp3 = mul i32 %tmp2, 3
+  %tmp4 = add i32 %tmp1, %tmp3
+  %tmp5 = load i32* %c
+  %tmp6 = mul i32 %tmp5, 5
+  %tmp7 = add i32 %tmp4, %tmp6
+  %tmp8 = load i32* %d
+  %tmp9 = mul i32 %tmp8, 7
+  %tmp10 = add i32 %tmp7, %tmp9
+  %tmp11 = load i32* %e
+  %tmp12 = mul i32 %tmp11, 11
+  %tmp13 = add i32 %tmp10, %tmp12
+  %tmp14 = load i32* %f
+  %tmp15 = mul i32 %tmp14, 13
+  %tmp16 = add i32 %tmp13, %tmp15
+  ret i32 %tmp16
+}
+; WIN64-LABEL: foo3:
+; WIN64: .seh_proc foo3
+; WIN64: pushq %rsi
+; WIN64: .seh_pushreg 6
+; WIN64: subq $24, %rsp
+; WIN64: .seh_stackalloc 24
+; WIN64: .seh_endprologue
+; WIN64: addq $24, %rsp
+; WIN64: popq %rsi
+; WIN64: ret
+; WIN64: .seh_endproc
+
+
+; Check emission of eh handler and handler data
+declare i32 @_d_eh_personality(i32, i32, i64, i8*, i8*)
+declare void @_d_eh_resume_unwind(i8*)
+
+declare i32 @bar()
+
+define i32 @foo4() #0 {
+entry:
+  %step = alloca i32, align 4
+  store i32 0, i32* %step
+  %tmp = load i32* %step
+
+  %tmp1 = invoke i32 @bar()
+          to label %finally unwind label %landingpad
+
+finally:
+  store i32 1, i32* %step
+  br label %endtryfinally
+
+landingpad:
+  %landing_pad = landingpad { i8*, i32 } personality i32 (i32, i32, i64, i8*, i8*)* @_d_eh_personality
+          cleanup
+  %tmp3 = extractvalue { i8*, i32 } %landing_pad, 0
+  store i32 2, i32* %step
+  call void @_d_eh_resume_unwind(i8* %tmp3)
+  unreachable
+
+endtryfinally:
+  %tmp10 = load i32* %step
+  ret i32 %tmp10
+}
+; WIN64-LABEL: foo4:
+; WIN64: .seh_proc foo4
+; WIN64: .seh_handler _d_eh_personality, @unwind, @except
+; WIN64: subq $56, %rsp
+; WIN64: .seh_stackalloc 56
+; WIN64: .seh_endprologue
+; WIN64: addq $56, %rsp
+; WIN64: ret
+; WIN64: .seh_handlerdata
+; WIN64: .seh_endproc
+
+
+; Check stack re-alignment and xmm spilling
+define void @foo5() uwtable {
+entry:
+  %s = alloca i32, align 64
+  call void asm sideeffect "", "~{rbx},~{rdi},~{xmm6},~{xmm7}"()
+  ret void
+}
+; WIN64-LABEL: foo5:
+; WIN64: .seh_proc foo5
+; WIN64: pushq %rbp
+; WIN64: .seh_pushreg 5
+; WIN64: movq  %rsp, %rbp
+; WIN64: pushq %rdi
+; WIN64: .seh_pushreg 7
+; WIN64: pushq %rbx
+; WIN64: .seh_pushreg 3
+; WIN64: andq  $-64, %rsp
+; WIN64: subq  $128, %rsp
+; WIN64: .seh_stackalloc 48
+; WIN64: .seh_setframe 5, 64
+; WIN64: movaps  %xmm7, -32(%rbp)        # 16-byte Spill
+; WIN64: movaps  %xmm6, -48(%rbp)        # 16-byte Spill
+; WIN64: .seh_savexmm 6, 16
+; WIN64: .seh_savexmm 7, 32
+; WIN64: .seh_endprologue
+; WIN64: movaps  -48(%rbp), %xmm6        # 16-byte Reload
+; WIN64: movaps  -32(%rbp), %xmm7        # 16-byte Reload
+; WIN64: leaq  -16(%rbp), %rsp
+; WIN64: popq  %rbx
+; WIN64: popq  %rdi
+; WIN64: popq  %rbp
+; WIN64: retq
+; WIN64: .seh_endproc