Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -484,6 +484,35 @@
   }
 }
 
+static unsigned calculateSetFPREG(uint64_t SPAdjust) {
+  // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
+  // and might require smaller successive adjustments.
+  const uint64_t Win64MaxSEHOffset = 128;
+  uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
+  // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
+  return static_cast<unsigned>(RoundUpToAlignment(SEHFrameOffset, 16));
+}
+
+// If we're forcing a stack realignment we can't rely on just the frame
+// info, we need to know the ABI stack alignment as well in case we
+// have a call out.  Otherwise just make sure we have some alignment - we'll
+// go with the minimum SlotSize.
+static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+  const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+  const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
+  if (ForceStackAlign) {
+    if (MFI->hasCalls())
+      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
+    else if (MaxAlign < SlotSize)
+      MaxAlign = SlotSize;
+  }
+  return MaxAlign;
+}
+
 /// emitPrologue - Push callee-saved registers onto the stack, which
 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
 /// space for local variables. Also emit labels used by the exception handler to
@@ -578,7 +607,7 @@
   const TargetInstrInfo &TII = *STI.getInstrInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
+  uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
   bool HasFP = hasFP(MF);
   bool Is64Bit = STI.is64Bit();
@@ -591,7 +620,6 @@
   bool NeedsDwarfCFI =
       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
   bool UseLEA = STI.useLeaForSP();
-  unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   const unsigned MachineFramePtr =
@@ -602,19 +630,11 @@
   unsigned BasePtr = RegInfo->getBaseRegister();
   DebugLoc DL;
 
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum SlotSize.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else if (MaxAlign < SlotSize)
-      MaxAlign = SlotSize;
-  }
-
   // Add RETADDR move area to callee saved frame size.
   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
+  if (TailCallReturnAddrDelta && IsWinEH)
+    report_fatal_error("Can't handle guaranteed tail call under win64 yet");
+
   if (TailCallReturnAddrDelta < 0)
     X86FI->setCalleeSavedFrameSize(
       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
@@ -689,7 +709,7 @@
       // Callee-saved registers are pushed on stack before the stack
       // is realigned.
       FrameSize -= X86FI->getCalleeSavedFrameSize();
-      NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
+      NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
     } else {
       NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
     }
@@ -728,11 +748,14 @@
           .setMIFlag(MachineInstr::FrameSetup);
     }
 
-    // Update EBP with the new base value.
-    BuildMI(MBB, MBBI, DL,
-            TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), FramePtr)
-        .addReg(StackPtr)
-        .setMIFlag(MachineInstr::FrameSetup);
+    if (!IsWinEH) {
+      // Update EBP with the new base value.
+      BuildMI(MBB, MBBI, DL,
+              TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
+              FramePtr)
+          .addReg(StackPtr)
+          .setMIFlag(MachineInstr::FrameSetup);
+    }
 
     if (NeedsDwarfCFI) {
       // Mark effective beginning of when frame pointer becomes valid.
@@ -781,15 +804,16 @@
 
   // Realign stack after we pushed callee-saved registers (so that we'll be
   // able to calculate their offsets from the frame pointer).
-  if (RegInfo->needsStackRealignment(MF)) {
+  // Don't do this for Win64, it needs to realign the stack after the prologue.
+  if (!IsWinEH && RegInfo->needsStackRealignment(MF)) {
     assert(HasFP && "There should be a frame pointer if stack is realigned.");
     uint64_t Val = -MaxAlign;
     MachineInstr *MI =
-      BuildMI(MBB, MBBI, DL,
-              TII.get(getANDriOpcode(Uses64BitFramePtr, Val)), StackPtr)
-      .addReg(StackPtr)
-      .addImm(Val)
-      .setMIFlag(MachineInstr::FrameSetup);
+        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+                StackPtr)
+            .addReg(StackPtr)
+            .addImm(Val)
+            .setMIFlag(MachineInstr::FrameSetup);
 
     // The EFLAGS implicit def is dead.
     MI->getOperand(3).setIsDead();
@@ -867,50 +891,28 @@
                  UseLEA, TII, *RegInfo);
   }
 
+  if (NeedsWinEH && NumBytes)
+    BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
+        .addImm(NumBytes)
+        .setMIFlag(MachineInstr::FrameSetup);
+
   int SEHFrameOffset = 0;
-  if (NeedsWinEH) {
-    if (HasFP) {
-      // We need to set frame base offset low enough such that all saved
-      // register offsets would be positive relative to it, but we can't
-      // just use NumBytes, because .seh_setframe offset must be <=240.
-      // So we pretend to have only allocated enough space to spill the
-      // non-volatile registers.
-      // We don't care about the rest of stack allocation, because unwinder
-      // will restore SP to (BP - SEHFrameOffset)
-      for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
-        int offset = MFI->getObjectOffset(Info.getFrameIdx());
-        SEHFrameOffset = std::max(SEHFrameOffset, std::abs(offset));
-      }
-      SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
-
-      // This only needs to account for XMM spill slots, GPR slots
-      // are covered by the .seh_pushreg's emitted above.
-      unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
-      if (Size) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
-            .addImm(Size)
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
+  if (IsWinEH && HasFP) {
+    SEHFrameOffset = calculateSetFPREG(NumBytes);
+    addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
+                 StackPtr, false, SEHFrameOffset);
 
+    if (NeedsWinEH)
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
           .setMIFlag(MachineInstr::FrameSetup);
-    } else {
-      // SP will be the base register for restoring XMMs
-      if (NumBytes) {
-        BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
-            .addImm(NumBytes)
-            .setMIFlag(MachineInstr::FrameSetup);
-      }
-    }
   }
 
   // Skip the rest of register spilling code
   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
     ++MBBI;
 
-  // Emit SEH info for non-GPRs
   if (NeedsWinEH) {
     for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
       unsigned Reg = Info.getReg();
@@ -931,6 +933,23 @@
         .setMIFlag(MachineInstr::FrameSetup);
   }
 
+  // Realign stack after we spilled callee-saved registers (so that we'll be
+  // able to calculate their offsets from the frame pointer).
+  // Win64 requires aligning the stack after the prologue.
+  if (IsWinEH && RegInfo->needsStackRealignment(MF)) {
+    assert(HasFP && "There should be a frame pointer if stack is realigned.");
+    uint64_t Val = -MaxAlign;
+    MachineInstr *MI =
+        BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
+                StackPtr)
+            .addReg(StackPtr)
+            .addImm(Val)
+            .setMIFlag(MachineInstr::FrameSetup);
+
+    // The EFLAGS implicit def is dead.
+    MI->getOperand(3).setIsDead();
+  }
+
   // If we need a base pointer, set it up here. It's whatever the value
   // of the stack pointer is at this point. Any variable size objects
   // will be allocated after this, so we can still use the base pointer
@@ -986,7 +1005,6 @@
   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
   const bool Is64BitILP32 = STI.isTarget64BitILP32();
   bool UseLEA = STI.useLeaForSP();
-  unsigned StackAlign = getStackAlignment();
   unsigned SlotSize = RegInfo->getSlotSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
   unsigned MachineFramePtr =
@@ -1017,21 +1035,10 @@
 
   // Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = MFI->getStackSize();
-  uint64_t MaxAlign  = MFI->getMaxAlignment();
+  uint64_t MaxAlign = calculateMaxStackAlign(MF);
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
-  // If we're forcing a stack realignment we can't rely on just the frame
-  // info, we need to know the ABI stack alignment as well in case we
-  // have a call out.  Otherwise just make sure we have some alignment - we'll
-  // go with the minimum.
-  if (ForceStackAlign) {
-    if (MFI->hasCalls())
-      MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
-    else
-      MaxAlign = MaxAlign ? MaxAlign : 4;
-  }
-
   if (hasFP(MF)) {
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
@@ -1050,6 +1057,7 @@
   } else {
     NumBytes = StackSize - CSSize;
   }
+  uint64_t SEHStackAllocAmt = NumBytes;
 
   // Skip the callee-saved pop instructions.
   while (MBBI != MBB.begin()) {
@@ -1077,7 +1085,12 @@
   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
     if (RegInfo->needsStackRealignment(MF))
       MBBI = FirstCSPop;
-    if (CSSize != 0) {
+    if (IsWinEH) {
+      unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
+      addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), StackPtr),
+                   FramePtr, false, SEHStackAllocAmt - SEHFrameOffset);
+      --MBBI;
+    } else if (CSSize != 0) {
       unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
                    FramePtr, false, -CSSize);
@@ -1195,14 +1208,53 @@
   const X86RegisterInfo *RegInfo =
       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   const MachineFrameInfo *MFI = MF.getFrameInfo();
+  // Offset will hold the offset from the stack pointer at function entry to the
+  // object.
+  // We need to factor in additional offsets applied during the prologue to the
+  // frame, base, and stack pointer depending on which is used.
   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t StackSize = MFI->getStackSize();
+  unsigned SlotSize = RegInfo->getSlotSize();
+  bool HasFP = hasFP(MF);
+  bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
+  int64_t FPDelta = 0;
+
+  if (IsWinEH) {
+    uint64_t NumBytes = 0;
+    // Calculate required stack adjustment.
+    uint64_t FrameSize = StackSize - SlotSize;
+    // If required, include space for extra hidden slot for stashing base pointer.
+    if (X86FI->getRestoreBasePointer())
+      FrameSize += SlotSize;
+    uint64_t SEHStackAllocAmt = StackSize;
+    if (RegInfo->needsStackRealignment(MF)) {
+      // Callee-saved registers are pushed on stack before the stack
+      // is realigned.
+      FrameSize -= CSSize;
+
+      uint64_t MaxAlign =
+          calculateMaxStackAlign(MF); // Desired stack alignment.
+      NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
+      SEHStackAllocAmt = RoundUpToAlignment(SEHStackAllocAmt, 16);
+    } else {
+      NumBytes = FrameSize - CSSize;
+    }
+    uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
+    // FPDelta is the offset from the "traditional" FP location of the old base
+    // pointer followed by return address and the location required by the
+    // restricted Win64 prologue.
+    // Add FPDelta to all offsets below that go through the frame pointer.
+    FPDelta = SEHStackAllocAmt - SEHFrameOffset;
+  }
+
 
   if (RegInfo->hasBasePointer(MF)) {
-    assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
+    assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
     if (FI < 0) {
       // Skip the saved EBP.
-      return Offset + RegInfo->getSlotSize();
+      return Offset + SlotSize + FPDelta;
     } else {
       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
       return Offset + StackSize;
@@ -1210,21 +1262,22 @@
   } else if (RegInfo->needsStackRealignment(MF)) {
     if (FI < 0) {
       // Skip the saved EBP.
-      return Offset + RegInfo->getSlotSize();
+      return Offset + SlotSize + FPDelta;
     } else {
       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
       return Offset + StackSize;
     }
     // FIXME: Support tail calls
   } else {
-    if (!hasFP(MF))
+    if (!HasFP)
       return Offset + StackSize;
+    if (IsWinEH)
+      return Offset + FPDelta;
 
     // Skip the saved EBP.
-    Offset += RegInfo->getSlotSize();
+    Offset += SlotSize;
 
     // Skip the RETADDR move area
-    const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
     if (TailCallReturnAddrDelta < 0)
       Offset -= TailCallReturnAddrDelta;
@@ -1959,8 +2012,8 @@
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
     // alignment boundary.
-    unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
-    Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
+    unsigned StackAlign = getStackAlignment();
+    Amount = RoundUpToAlignment(Amount, StackAlign);
 
     MachineInstr *New = nullptr;
 
Index: test/CodeGen/X86/frameaddr.ll
===================================================================
--- test/CodeGen/X86/frameaddr.ll
+++ test/CodeGen/X86/frameaddr.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86                                | FileCheck %s --check-prefix=CHECK-32
 ; RUN: llc < %s -march=x86    -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-32
-; RUN: llc < %s -march=x86-64                             | FileCheck %s --check-prefix=CHECK-64
-; RUN: llc < %s -march=x86-64 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-unknown                             | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=x86_64-unknown -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-64
 ; RUN: llc < %s -mtriple=x86_64-gnux32                    | FileCheck %s --check-prefix=CHECK-X32ABI
 ; RUN: llc < %s -mtriple=x86_64-gnux32 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=CHECK-X32ABI
 ; RUN: llc < %s -mtriple=x86_64-nacl                    | FileCheck %s --check-prefix=CHECK-NACL64
Index: test/CodeGen/X86/gcc_except_table.ll
===================================================================
--- test/CodeGen/X86/gcc_except_table.ll
+++ test/CodeGen/X86/gcc_except_table.ll
@@ -15,7 +15,7 @@
 
 ; MINGW64: .seh_proc
 ; MINGW64: .seh_handler __gxx_personality_v0
-; MINGW64: .seh_setframe 5, 0
+; MINGW64: .seh_setframe 5, 32
 ; MINGW64: callq _Unwind_Resume
 ; MINGW64: .seh_handlerdata
 ; MINGW64: GCC_except_table0:
Index: test/CodeGen/X86/win64_alloca_dynalloca.ll
===================================================================
--- test/CodeGen/X86/win64_alloca_dynalloca.ll
+++ test/CodeGen/X86/win64_alloca_dynalloca.ll
@@ -14,26 +14,24 @@
   %buf0 = alloca i8, i64 4096, align 1
 
 ; ___chkstk_ms does not adjust %rsp.
-; M64: movq  %rsp, %rbp
 ; M64:       $4096, %rax
 ; M64: callq ___chkstk_ms
 ; M64: subq  %rax, %rsp
+; M64: leaq 128(%rsp), %rbp
 
 ; __chkstk does not adjust %rsp.
-; W64: movq  %rsp, %rbp
 ; W64:       $4096, %rax
 ; W64: callq __chkstk
 ; W64: subq  %rax, %rsp
+; W64: leaq 128(%rsp), %rbp
 
 ; Use %r11 for the large model.
-; L64: movq  %rsp, %rbp
 ; L64:       $4096, %rax
 ; L64: movabsq $__chkstk, %r11
 ; L64: callq *%r11
 ; L64: subq  %rax, %rsp
 
 ; Freestanding
-; EFI: movq  %rsp, %rbp
 ; EFI:       $[[B0OFS:4096|4104]], %rsp
 ; EFI-NOT:   call
 
@@ -68,12 +66,12 @@
 
 ; M64: subq  $48, %rsp
 ; M64: movq  %rax, 32(%rsp)
-; M64: leaq  -4096(%rbp), %r9
+; M64: leaq  -128(%rbp), %r9
 ; M64: callq bar
 
 ; W64: subq  $48, %rsp
 ; W64: movq  %rax, 32(%rsp)
-; W64: leaq  -4096(%rbp), %r9
+; W64: leaq  -128(%rbp), %r9
 ; W64: callq bar
 
 ; EFI: subq  $48, %rsp
@@ -83,9 +81,9 @@
 
   ret i64 %r
 
-; M64: movq    %rbp, %rsp
+; M64: leaq    3968(%rbp), %rsp
 
-; W64: movq    %rbp, %rsp
+; W64: leaq    3968(%rbp), %rsp
 
 }
 
Index: test/CodeGen/X86/win64_alloca_dynalloca.s
===================================================================
--- /dev/null
+++ test/CodeGen/X86/win64_alloca_dynalloca.s
@@ -0,0 +1,29 @@
+	.text
+	.def	 unaligned;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	unaligned
+	.align	16, 0x90
+unaligned:                              # @unaligned
+# BB#0:                                 # %entry
+	pushq	%rbp
+	movabsq	$4096, %rax             # imm = 0x1000
+	callq	__chkstk
+	subq	%rax, %rsp
+	leaq	128(%rsp), %rbp
+	leaq	15(%rcx), %rax
+	andq	$-16, %rax
+	callq	__chkstk
+	subq	%rax, %rsp
+	movq	%rsp, %rax
+	subq	$48, %rsp
+	movq	%rax, 32(%rsp)
+	leaq	-128(%rbp), %r9
+	movq	%rcx, %r8
+	callq	bar
+	leaq	4016(%rbp), %rsp
+	popq	%rbp
+	retq
+
+
Index: test/CodeGen/X86/win64_eh.ll
===================================================================
--- test/CodeGen/X86/win64_eh.ll
+++ test/CodeGen/X86/win64_eh.ll
@@ -146,23 +146,23 @@
 ; WIN64: .seh_proc foo5
 ; WIN64: pushq %rbp
 ; WIN64: .seh_pushreg 5
-; WIN64: movq  %rsp, %rbp
 ; WIN64: pushq %rdi
 ; WIN64: .seh_pushreg 7
 ; WIN64: pushq %rbx
 ; WIN64: .seh_pushreg 3
-; WIN64: andq  $-64, %rsp
 ; WIN64: subq  $128, %rsp
-; WIN64: .seh_stackalloc 48
-; WIN64: .seh_setframe 5, 64
+; WIN64: .seh_stackalloc 128
+; WIN64: leaq  128(%rsp), %rbp
+; WIN64: .seh_setframe 5, 128
 ; WIN64: movaps  %xmm7, -32(%rbp)        # 16-byte Spill
 ; WIN64: movaps  %xmm6, -48(%rbp)        # 16-byte Spill
-; WIN64: .seh_savexmm 6, 16
-; WIN64: .seh_savexmm 7, 32
+; WIN64: .seh_savexmm 6, 80
+; WIN64: .seh_savexmm 7, 96
 ; WIN64: .seh_endprologue
+; WIN64: andq  $-64, %rsp
 ; WIN64: movaps  -48(%rbp), %xmm6        # 16-byte Reload
 ; WIN64: movaps  -32(%rbp), %xmm7        # 16-byte Reload
-; WIN64: leaq  -16(%rbp), %rsp
+; WIN64: leaq  (%rbp), %rsp
 ; WIN64: popq  %rbx
 ; WIN64: popq  %rdi
 ; WIN64: popq  %rbp
Index: test/CodeGen/X86/win64_eh.s
===================================================================
--- /dev/null
+++ test/CodeGen/X86/win64_eh.s
@@ -0,0 +1,50 @@
+	.text
+	.def	 foo5;
+	.scl	2;
+	.type	32;
+	.endef
+	.globl	foo5
+	.align	16, 0x90
+foo5:                                   # @foo5
+.Ltmp0:
+.seh_proc foo5
+# BB#0:                                 # %entry
+	pushq	%rbp
+.Ltmp1:
+	.seh_pushreg 5
+	pushq	%rdi
+.Ltmp2:
+	.seh_pushreg 7
+	pushq	%rbx
+.Ltmp3:
+	.seh_pushreg 3
+	subq	$384, %rsp              # imm = 0x180
+.Ltmp4:
+	.seh_stackalloc 384
+	leaq	128(%rsp), %rbp
+.Ltmp5:
+	.seh_setframe 5, 128
+	movaps	%xmm7, -32(%rbp)        # 16-byte Spill
+	movaps	%xmm6, -48(%rbp)        # 16-byte Spill
+.Ltmp6:
+	.seh_savexmm 6, 80
+.Ltmp7:
+	.seh_savexmm 7, 96
+.Ltmp8:
+	.seh_endprologue
+	andq	$-64, %rsp
+	#APP
+	#NO_APP
+	movl	$42, (%rsp)
+	movaps	-48(%rbp), %xmm6        # 16-byte Reload
+	movaps	-32(%rbp), %xmm7        # 16-byte Reload
+	leaq	256(%rbp), %rsp
+	popq	%rbx
+	popq	%rdi
+	popq	%rbp
+	retq
+.Leh_func_end0:
+.Ltmp9:
+	.seh_endproc
+
+
Index: test/CodeGen/X86/win64_frame.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/win64_frame.ll
@@ -0,0 +1,121 @@
+; RUN: llc < %s -mtriple=x86_64-pc-win32 | FileCheck %s
+
+define i32 @f1(i32 %p1, i32 %p2, i32 %p3, i32 %p4, i32 %p5) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f1:
+  ; CHECK:       movl    48(%rbp), %eax
+  ret i32 %p5
+}
+
+define void @f2(i32 %p, ...) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f2:
+  ; CHECK:      .seh_stackalloc 8
+  ; CHECK:      leaq    16(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 16
+  ; CHECK:      movq    %rdx, 16(%rbp)
+  ; CHECK:      leaq    16(%rbp), %rax
+  %ap = alloca i8, align 8
+  call void @llvm.va_start(i8* %ap)
+  ret void
+}
+
+define i8* @f3() "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f3:
+  ; CHECK:      leaq    (%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 0
+  ; CHECK:      movq    8(%rbp), %rax
+  %ra = call i8* @llvm.returnaddress(i32 0)
+  ret i8* %ra
+}
+
+define i8* @f4() "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f4:
+  ; CHECK:      pushq   %rbp
+  ; CHECK:      .seh_pushreg 5
+  ; CHECK:      subq    $304, %rsp
+  ; CHECK:      .seh_stackalloc 304
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      .seh_endprologue
+  ; CHECK:      movq    184(%rbp), %rax
+  alloca [300 x i8]
+  %ra = call i8* @llvm.returnaddress(i32 0)
+  ret i8* %ra
+}
+
+declare void @external(i8*)
+
+define void @f5() "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f5:
+  ; CHECK:      subq    $336, %rsp
+  ; CHECK:      .seh_stackalloc 336
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      leaq    -92(%rbp), %rcx
+  ; CHECK:      callq   external
+  %a = alloca [300 x i8]
+  %gep = getelementptr [300 x i8]* %a, i32 0, i32 0
+  call void @external(i8* %gep)
+  ret void
+}
+
+define void @f6(i32 %p, ...) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f6:
+  ; CHECK:      subq    $336, %rsp
+  ; CHECK:      .seh_stackalloc 336
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      leaq    -92(%rbp), %rcx
+  ; CHECK:      callq   external
+  %a = alloca [300 x i8]
+  %gep = getelementptr [300 x i8]* %a, i32 0, i32 0
+  call void @external(i8* %gep)
+  ret void
+}
+
+define i32 @f7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f7:
+  ; CHECK:      pushq   %rbp
+  ; CHECK:      .seh_pushreg 5
+  ; CHECK:      subq    $320, %rsp
+  ; CHECK:      .seh_stackalloc 320
+  ; CHECK:      leaq    128(%rsp), %rbp
+  ; CHECK:      .seh_setframe 5, 128
+  ; CHECK:      movl    240(%rbp), %eax
+  ; CHECK:      leaq    192(%rbp), %rsp
+  alloca [300 x i8], align 64
+  ret i32 %e
+}
+
+define i32 @f8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) "no-frame-pointer-elim"="true" {
+  ; CHECK-LABEL: f8:
+  ; CHECK:        subq    $384, %rsp
+  ; CHECK:        .seh_stackalloc 384
+  ; CHECK:        leaq    128(%rsp), %rbp
+  ; CHECK:        .seh_setframe 5, 128
+
+  %alloca = alloca [300 x i8], align 64
+  ; CHECK:        andq    $-64, %rsp
+  ; CHECK:        movq    %rsp, %rbx
+
+  alloca i32, i32 %a
+  ; CHECK:        movl    %ecx, %eax
+  ; CHECK:        leaq    15(,%rax,4), %rax
+  ; CHECK:        andq    $-16, %rax
+  ; CHECK:        callq   __chkstk
+  ; CHECK:        subq    %rax, %rsp
+
+  %gep = getelementptr [300 x i8]* %alloca, i32 0, i32 0
+  call void @external(i8* %gep)
+  ; CHECK:        subq    $32, %rsp
+  ; CHECK:        leaq    (%rbx), %rcx
+  ; CHECK:        callq   external
+  ; CHECK:        addq    $32, %rsp
+
+  ret i32 %e
+  ; CHECK:        movl    %esi, %eax
+  ; CHECK:        leaq    256(%rbp), %rsp
+}
+
+declare i8* @llvm.returnaddress(i32) nounwind readnone
+
+declare void @llvm.va_start(i8*) nounwind